Did you replace the tunedDelay with the version with volatile declarations too?
/* static */
inline void SoftwareSerial7E1::tunedDelay(volatile uint16_t delay) {
volatile uint8_t tmp=0;
asm volatile("sbiw %0, 0x01 \n\t"
"ldi %1, 0xFF \n\t"
"cpi %A0, 0xFF \n\t"
"cpc %B0, %1 \n\t"
"brne .-10 \n\t"
: "+r" (delay), "+a" (tmp)
: "0" (delay)
);
}