I made a lot of progress Friday night and early Saturday morning and have had a confusing and frustrating time since.
I actually have/had something working to move this stuff into the core in wiring.h and wiring_digital.c I emailed it to Paul Stoffregen hoping to get him to review it and haven't heard back.
I struggled all day Saturday trying to get a version of digitalWriteFast that would cooperate with both the current versions of wiring.h/wiring_digital.c and my modified ones.
My Mega2560 came and it does not work with what I posted a few days ago. It seems to me that I should just be able to use the same version of the test code as I use for the Mega1280, select Mega2560 from the boards menu and it should have just worked. But there are at least some errors in the test program on almost every pin pair. AND THE CODE SIZE FOR THE 2560 IS 20000 bytes smaller! I was very puzzled.
Here is the 1280 disassembly (which works):
analogWrite(2,254);
2dc: 82 e0 ldi r24, 0x02 ; 2
2de: 6e ef ldi r22, 0xFE ; 254
2e0: 70 e0 ldi r23, 0x00 ; 0
2e2: 0e 94 14 a3 call 0x14628 ; 0x14628 <analogWrite>
pinModeFast(2,INPUT);
2e6: 6c 98 cbi 0x0d, 4 ; 13
digitalWriteFast(2,HIGH);
2e8: 80 91 90 00 lds r24, 0x0090
2ec: 8f 7d andi r24, 0xDF ; 223
2ee: 80 93 90 00 sts 0x0090, r24
2f2: 74 9a sbi 0x0e, 4 ; 14
pinModeFast(5,OUTPUT);
2f4: 6b 9a sbi 0x0d, 3 ; 13
digitalWriteFast(5,LOW);
2f6: 80 91 90 00 lds r24, 0x0090
2fa: 8f 77 andi r24, 0x7F ; 127
2fc: 80 93 90 00 sts 0x0090, r24
300: 73 98 cbi 0x0e, 3 ; 14
delay(1);
302: 61 e0 ldi r22, 0x01 ; 1
304: 70 e0 ldi r23, 0x00 ; 0
306: 80 e0 ldi r24, 0x00 ; 0
308: 90 e0 ldi r25, 0x00 ; 0
30a: 0e 94 59 a2 call 0x144b2 ; 0x144b2 <delay>
if((int) digitalReadFast(2) != LOW) error(2,5,1);
30e: 80 91 90 00 lds r24, 0x0090
312: 8f 7d andi r24, 0xDF ; 223
314: 80 93 90 00 sts 0x0090, r24
318: 64 9b sbis 0x0c, 4 ; 12
31a: 07 c0 rjmp .+14 ; 0x32a <loop+0x5e>
31c: 82 e0 ldi r24, 0x02 ; 2
31e: 90 e0 ldi r25, 0x00 ; 0
320: 65 e0 ldi r22, 0x05 ; 5
322: 70 e0 ldi r23, 0x00 ; 0
324: 41 e0 ldi r20, 0x01 ; 1
326: 50 e0 ldi r21, 0x00 ; 0
328: 9a df rcall .-204 ; 0x25e <_Z5erroriii>
analogWrite(5,254);
32a: 85 e0 ldi r24, 0x05 ; 5
32c: 6e ef ldi r22, 0xFE ; 254
32e: 70 e0 ldi r23, 0x00 ; 0
330: 0e 94 14 a3 call 0x14628 ; 0x14628 <analogWrite>
pinModeFast(2,INPUT);
334: 6c 98 cbi 0x0d, 4 ; 13
digitalWriteFast(2,HIGH);
336: 80 91 90 00 lds r24, 0x0090
33a: 8f 7d andi r24, 0xDF ; 223
33c: 80 93 90 00 sts 0x0090, r24
340: 74 9a sbi 0x0e, 4 ; 14
pinModeFast(5,OUTPUT);
342: 6b 9a sbi 0x0d, 3 ; 13
digitalWriteFast(5,LOW);
344: 80 91 90 00 lds r24, 0x0090
348: 8f 77 andi r24, 0x7F ; 127
34a: 80 93 90 00 sts 0x0090, r24
34e: 73 98 cbi 0x0e, 3 ; 14
delay(1);
350: 61 e0 ldi r22, 0x01 ; 1
352: 70 e0 ldi r23, 0x00 ; 0
354: 80 e0 ldi r24, 0x00 ; 0
356: 90 e0 ldi r25, 0x00 ; 0
358: 0e 94 59 a2 call 0x144b2 ; 0x144b2 <delay>
if((int) digitalReadFast(2) != LOW) error(2,5,1);
35c: 80 91 90 00 lds r24, 0x0090
360: 8f 7d andi r24, 0xDF ; 223
362: 80 93 90 00 sts 0x0090, r24
366: 64 9b sbis 0x0c, 4 ; 12
368: 07 c0 rjmp .+14 ; 0x378 <loop+0xac>
36a: 82 e0 ldi r24, 0x02 ; 2
36c: 90 e0 ldi r25, 0x00 ; 0
36e: 65 e0 ldi r22, 0x05 ; 5
370: 70 e0 ldi r23, 0x00 ; 0
372: 41 e0 ldi r20, 0x01 ; 1
374: 50 e0 ldi r21, 0x00 ; 0
376: 73 df rcall .-282 ; 0x25e <_Z5erroriii>
here is corresponding 2560 disassembly from 2 of the cases that fail:
analogWrite(2,254);
2e0: 82 e0 ldi r24, 0x02 ; 2
2e2: 6e ef ldi r22, 0xFE ; 254
2e4: 70 e0 ldi r23, 0x00 ; 0
2e6: 0e 94 c1 7b call 0xf782 ; 0xf782 <analogWrite>
pinModeFast(2,INPUT);
2ea: 52 98 cbi 0x0a, 2 ; 10
digitalWriteFast(2,HIGH);
2ec: 5a 9a sbi 0x0b, 2 ; 11
pinModeFast(5,OUTPUT);
2ee: 55 9a sbi 0x0a, 5 ; 10
digitalWriteFast(5,LOW);
2f0: 84 b5 in r24, 0x24 ; 36
2f2: 8f 7d andi r24, 0xDF ; 223
2f4: 84 bd out 0x24, r24 ; 36
2f6: 5d 98 cbi 0x0b, 5 ; 11
delay(1);
2f8: 61 e0 ldi r22, 0x01 ; 1
2fa: 70 e0 ldi r23, 0x00 ; 0
2fc: 80 e0 ldi r24, 0x00 ; 0
2fe: 90 e0 ldi r25, 0x00 ; 0
300: 0e 94 06 7b call 0xf60c ; 0xf60c <delay>
if((int) digitalReadFast(2) != LOW) error(2,5,1);
304: 4a 9b sbis 0x09, 2 ; 9
306: 07 c0 rjmp .+14 ; 0x316 <loop+0x46>
308: 82 e0 ldi r24, 0x02 ; 2
30a: 90 e0 ldi r25, 0x00 ; 0
30c: 65 e0 ldi r22, 0x05 ; 5
30e: 70 e0 ldi r23, 0x00 ; 0
310: 41 e0 ldi r20, 0x01 ; 1
312: 50 e0 ldi r21, 0x00 ; 0
314: a6 df rcall .-180 ; 0x262 <_Z5erroriii>
analogWrite(5,254);
316: 85 e0 ldi r24, 0x05 ; 5
318: 6e ef ldi r22, 0xFE ; 254
31a: 70 e0 ldi r23, 0x00 ; 0
31c: 0e 94 c1 7b call 0xf782 ; 0xf782 <analogWrite>
pinModeFast(2,INPUT);
320: 52 98 cbi 0x0a, 2 ; 10
digitalWriteFast(2,HIGH);
322: 5a 9a sbi 0x0b, 2 ; 11
pinModeFast(5,OUTPUT);
324: 55 9a sbi 0x0a, 5 ; 10
digitalWriteFast(5,LOW);
326: 84 b5 in r24, 0x24 ; 36
328: 8f 7d andi r24, 0xDF ; 223
32a: 84 bd out 0x24, r24 ; 36
32c: 5d 98 cbi 0x0b, 5 ; 11
delay(1);
32e: 61 e0 ldi r22, 0x01 ; 1
330: 70 e0 ldi r23, 0x00 ; 0
332: 80 e0 ldi r24, 0x00 ; 0
334: 90 e0 ldi r25, 0x00 ; 0
336: 0e 94 06 7b call 0xf60c ; 0xf60c <delay>
if((int) digitalReadFast(2) != LOW) error(2,5,1);
33a: 4a 9b sbis 0x09, 2 ; 9
33c: 07 c0 rjmp .+14 ; 0x34c <loop+0x7c>
33e: 82 e0 ldi r24, 0x02 ; 2
340: 90 e0 ldi r25, 0x00 ; 0
342: 65 e0 ldi r22, 0x05 ; 5
344: 70 e0 ldi r23, 0x00 ; 0
346: 41 e0 ldi r20, 0x01 ; 1
348: 50 e0 ldi r21, 0x00 ; 0
34a: 8b df rcall .-234 ; 0x262 <_Z5erroriii>
and here is disassembly for the uno (which has different pin/port correspondences than the 2560; this code also works):
analogWrite(2,254);
194: 82 e0 ldi r24, 0x02 ; 2
196: 6e ef ldi r22, 0xFE ; 254
198: 70 e0 ldi r23, 0x00 ; 0
19a: 0e 94 87 14 call 0x290e ; 0x290e <analogWrite>
pinModeFast(2,INPUT);
19e: 52 98 cbi 0x0a, 2 ; 10
digitalWriteFast(2,HIGH);
1a0: 5a 9a sbi 0x0b, 2 ; 11
pinModeFast(5,OUTPUT);
1a2: 55 9a sbi 0x0a, 5 ; 10
digitalWriteFast(5,LOW);
1a4: 84 b5 in r24, 0x24 ; 36
1a6: 8f 7d andi r24, 0xDF ; 223
1a8: 84 bd out 0x24, r24 ; 36
1aa: 5d 98 cbi 0x0b, 5 ; 11
delay(1);
1ac: 61 e0 ldi r22, 0x01 ; 1
1ae: 70 e0 ldi r23, 0x00 ; 0
1b0: 80 e0 ldi r24, 0x00 ; 0
1b2: 90 e0 ldi r25, 0x00 ; 0
1b4: 0e 94 f3 13 call 0x27e6 ; 0x27e6 <delay>
if((int) digitalReadFast(2) != LOW) error(2,5,1);
1b8: 4a 9b sbis 0x09, 2 ; 9
1ba: 07 c0 rjmp .+14 ; 0x1ca <loop+0x46>
1bc: 82 e0 ldi r24, 0x02 ; 2
1be: 90 e0 ldi r25, 0x00 ; 0
1c0: 65 e0 ldi r22, 0x05 ; 5
1c2: 70 e0 ldi r23, 0x00 ; 0
1c4: 41 e0 ldi r20, 0x01 ; 1
1c6: 50 e0 ldi r21, 0x00 ; 0
1c8: a6 df rcall .-180 ; 0x116 <_Z5erroriii>
analogWrite(5,254);
1ca: 85 e0 ldi r24, 0x05 ; 5
1cc: 6e ef ldi r22, 0xFE ; 254
1ce: 70 e0 ldi r23, 0x00 ; 0
1d0: 0e 94 87 14 call 0x290e ; 0x290e <analogWrite>
pinModeFast(2,INPUT);
1d4: 52 98 cbi 0x0a, 2 ; 10
digitalWriteFast(2,HIGH);
1d6: 5a 9a sbi 0x0b, 2 ; 11
pinModeFast(5,OUTPUT);
1d8: 55 9a sbi 0x0a, 5 ; 10
digitalWriteFast(5,LOW);
1da: 84 b5 in r24, 0x24 ; 36
1dc: 8f 7d andi r24, 0xDF ; 223
1de: 84 bd out 0x24, r24 ; 36
1e0: 5d 98 cbi 0x0b, 5 ; 11
delay(1);
1e2: 61 e0 ldi r22, 0x01 ; 1
1e4: 70 e0 ldi r23, 0x00 ; 0
1e6: 80 e0 ldi r24, 0x00 ; 0
1e8: 90 e0 ldi r25, 0x00 ; 0
1ea: 0e 94 f3 13 call 0x27e6 ; 0x27e6 <delay>
if((int) digitalReadFast(2) != LOW) error(2,5,1);
1ee: 4a 9b sbis 0x09, 2 ; 9
1f0: 07 c0 rjmp .+14 ; 0x200 <loop+0x7c>
1f2: 82 e0 ldi r24, 0x02 ; 2
1f4: 90 e0 ldi r25, 0x00 ; 0
1f6: 65 e0 ldi r22, 0x05 ; 5
1f8: 70 e0 ldi r23, 0x00 ; 0
1fa: 41 e0 ldi r20, 0x01 ; 1
1fc: 50 e0 ldi r21, 0x00 ; 0
1fe: 8b df rcall .-234 ; 0x116 <_Z5erroriii>
So its pretty clear what's going on. Its picking the pin/port for a 328 Arduino. To be continued...