I finally got it working perfectly. Thank you guys!
Here is the modified code:
void shiftDmxOut(int pin, int theByte){
int theDelay = 1;
int wasteTime = 0;
int count = 0; //simple counter
int portNumber = port_to_output[digital_pin_to_port[pin].port];
int pinNumber = digital_pin_to_port[pin].bit;
// disable interrupts, otherwise the timer 0 overflow interrupt that
// tracks milliseconds will make us delay longer than we want.
cli();
// the first thing we do is to write te pin to high
// it will be the mark between bytes. It may be also
// high from before
_SFR_BYTE(_SFR_IO8(portNumber)) |= _BV(pinNumber);
//delayMicroseconds(4); //changed from 10 microseconds
// DMX starts with a start-bit that must always be zero
_SFR_BYTE(_SFR_IO8(portNumber)) &= ~_BV(pinNumber);
//we need a delay of 4us (then one bit is transfert)
// at the arduino just the delay for 1us is precise every thing between 2 and 12 is imprecise
// to get excatly 4us we have do delay 1us 4 times
delayMicroseconds(1);
delayMicroseconds(1);
delayMicroseconds(1);
for (wasteTime =0; wasteTime <2; wasteTime++) {}
for (count = 0; count < 8; count++) {
if (theByte & 01) {
_SFR_BYTE(_SFR_IO8(portNumber)) |= _BV(pinNumber);
}
else {
_SFR_BYTE(_SFR_IO8(portNumber)) &= ~_BV(pinNumber);
}
delayMicroseconds(1);
delayMicroseconds(1);
delayMicroseconds(1);
// to write every bit exactly 4 microseconds, we have to waste some time here.
//thats why we are doing some assembly nops
__asm__(
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
);
theByte>>=1; //bit shifting
}
// the last thing we do is to write the pin to high
// it will be the mark between bytes. (this break has to be between 8 us and 1 sec)
_SFR_BYTE(_SFR_IO8(portNumber)) |= _BV(pinNumber);
delayMicroseconds(

;
// reenable interrupts.
sei();
}