First, I don’t take credit for this, the algorithm is based upon Snial's brilliant “Bootjacker” hack documented on his blog.
I’m trying to read the BLS from an application, which the lock bits prevent. The normal arduino lock bits prevent SPM writes to the BLS, and bar application section LPMs from reading the BLS. Yet, there is nothing preventing an LPM from inside the BLS reading the BLS. Am I wrong here?
To understand what I did, probably requires you to read Snial’s blog first. The program executes an LPM instruction inside the BLS using Snial’s potentially catastrophic technique of devious stack manipulation combined with precise timing of an interrupt to cleverly redirect execution.
The code below works when the lock bits are disabled. However, if the lock bits are not set, this hack wouldn’t be required. With the lock bits set in the normal arduino manner the code doesn’t work.
//arduino 168 memory locations
#define kTCCR0B 0x25 //these defines required for inline assembly
#define kTCNT0 0x26
#define kTIFR0 0x15
#define T0_CYCLES 22
#define BLS_START 0x3800; //start of bls on atmega168
uint16_t ReadAddr;
void SetupTimer0B(void) {
TCCR0B = 0; // stop the timer
TCCR0A = 0; // mode 0, no OCR outputs
TCNT0 = 0; // reset the timer
TIFR0 = (1<<OCF0B) | (1<<OCF0A) | (1<<TOV0); //clear all pending t0 interrupts
OCR0B = T0_CYCLES; // clock cycles from now
TIMSK0 = (1<<OCIE0B); // OCR0B interrupt enabled
}
uint8_t LpmCmd(void) {
uint8_t result;
asm volatile(
"push r0 \n"
"push r1 \n"
"push r16 \n"
"push r30 \n"
"push r31 \n"
"ldi r16,1 \n" // timer 0 start at fClk
"out %1,r16 \n" // set TCCR0B so off we go. This is time 0c
"ldi r30,pm_lo8(ReturnHere) \n" //(1c)
"ldi r31,pm_hi8(ReturnHere) \n" //(1c)
"push r30 \n" //(2c)
"push r31 \n" //(2c) these addresses must be pushed big-endian
//0x3de6 or 0x1ef3 (word address) is location in bls of lpm instruction
"ldi r30,0xf3 \n" //(1c) lo byte
"ldi r31,0x1e \n" //(1c) hi byte
"push r30 \n" //(2c)
"push r31 \n" //(2c)
"lds r30,ReadAddr \n" //(2c) lpm instruction needs a byte address in Z
"lds r31,ReadAddr+1 \n" //(2c)
"ldi r25,0x00 \n\t" //(1c)
"ret \n" //(4c) goto (return t0) bootloader via address we pushed onto stack
// lpm r25,z+ (3c) 24c total, timer set to 22 due to ISR latency (x-2)
"ReturnHere: \n" // interrupt returns to this location
"mov %0,r25 \n" // save byte that lpm instruction fetched
"pop r31 \n"
"pop r30 \n"
"pop r16 \n"
"pop r1 \n"
"pop r0 \n"
: "=r" (result) : "I" (kTCCR0B)
);
return(result);
}
// This timer interrupt fires during bootloader execution immediately after the lpm instruction.
// But, if we would simply return (reti), we would go back to the bootloader. So, first we pop the
// return address (discard it) and then do a reti, which takes us back to the "ReturnHere" location,
// which LpmCmd() previously pushed on the stack.
ISR(__vector_15, ISR_NAKED) {
asm volatile(
"ldi r30,0 \n"
"out %0,r30 \n" //stop timer 0
"out %1,r30 \n" //reset timer 0
"ldi r30,%2 \n"
"out %3,r30 \n" //clear interrupts on timer 0
"pop r30 \n" //pop ISR return, so we return to LpmCmd
"pop r30 \n" //understand we are trashing value in r30 here, but that shouldn't matter...
"reti \n"
: : "I" (kTCCR0B), "I" (kTCNT0), "I" (T0_TIFR0), "I" (kTIFR0)
);
}
void setup(void) {
uint8_t b;
Serial.begin(9600);
ReadAddr = BLS_START;
SetupTimer0B();
for (uint8_t i=0; i<32; i++) {
Serial.print(ReadAddr, HEX);
Serial.print(": ");
for (uint8_t j=0; j<8; j++) {
b = LpmCmd(); //read byte
OCR0B = T0_CYCLES; //reset timer
ReadAddr++; //advance to next byte
if (b < 0x10)
Serial.print("0");
Serial.print(b, HEX);
}
Serial.println();
}
}
void loop(void) { }
Published previously here.