This issue is the result of the compiler optimization, you could see that in the assembler file of your sketch.
As newbie said, this is the compiler "optimizing" your code. When you do not actually use the contents of the array, the compiler notices, and does not bother storing the data read from the port (it still has to READ from the port, as required by the "volatile" definitions somewhere deep inside the CMSIS .h files.):
00080148 <loop>:
80148: b508 push {r3, lr}
This function disables IRQ interrupts by setting the I-bit in the CPSR.
Can only be executed in Privileged modes.
*/
__attribute__( ( always_inline ) ) static __INLINE void __disable_irq(void)
{
__ASM volatile ("cpsid i");
8014a: b672 cpsid i
8014c: 4b18 ldr r3, [pc, #96] ; (801b0 <loop+0x68>)
8014e: 4919 ldr r1, [pc, #100] ; (801b4 <loop+0x6c>)
80150: 688a ldr r2, [r1, #8]
80152: 6bd8 ldr r0, [r3, #60] ; 0x3c
80154: 6bd8 ldr r0, [r3, #60] ; 0x3c
80156: 6bd8 ldr r0, [r3, #60] ; 0x3c
80158: 6bd8 ldr r0, [r3, #60] ; 0x3c
8015a: 6bd8 ldr r0, [r3, #60] ; 0x3c
8015c: 6bd8 ldr r0, [r3, #60] ; 0x3c
8015e: 6bd8 ldr r0, [r3, #60] ; 0x3c
80160: 6bd8 ldr r0, [r3, #60] ; 0x3c
80162: 6bd8 ldr r0, [r3, #60] ; 0x3c
80164: 6bd8 ldr r0, [r3, #60] ; 0x3c
80166: 6bd8 ldr r0, [r3, #60] ; 0x3c
80168: 6bd8 ldr r0, [r3, #60] ; 0x3c
8016a: 6bd8 ldr r0, [r3, #60] ; 0x3c
8016c: 6bd8 ldr r0, [r3, #60] ; 0x3c
8016e: 6bdb ldr r3, [r3, #60] ; 0x3c
80170: 6889 ldr r1, [r1, #8]
80172: 428a cmp r2, r1
80174: bf34 ite cc
When you print the array, it can't optimize any more:
00080148 <loop>:
80148: b510 push {r4, lr}
8014a: f5ad 5dfa sub.w sp, sp, #8000 ; 0x1f40
This function disables IRQ interrupts by setting the I-bit in the CPSR.
Can only be executed in Privileged modes.
*/
__attribute__( ( always_inline ) ) static __INLINE void __disable_irq(void)
{
__ASM volatile ("cpsid i");
8014e: b672 cpsid i
80150: 4b2d ldr r3, [pc, #180] ; (80208 <loop+0xc0>)
80152: 492e ldr r1, [pc, #184] ; (8020c <loop+0xc4>)
80154: 688a ldr r2, [r1, #8]
80156: 6bd8 ldr r0, [r3, #60] ; 0x3c
80158: b2c0 uxtb r0, r0
8015a: 9000 str r0, [sp, #0]
8015c: 6bd8 ldr r0, [r3, #60] ; 0x3c
8015e: b2c0 uxtb r0, r0
80160: 9001 str r0, [sp, #4]
80162: 6bd8 ldr r0, [r3, #60] ; 0x3c
80164: b2c0 uxtb r0, r0
80166: 9002 str r0, [sp, #8]
80168: 6bd8 ldr r0, [r3, #60] ; 0x3c
8016a: b2c0 uxtb r0, r0
8016c: 9003 str r0, [sp, #12]
8016e: 6bd8 ldr r0, [r3, #60] ; 0x3c
80170: b2c0 uxtb r0, r0
80172: 9004 str r0, [sp, #16]
80174: 6bd8 ldr r0, [r3, #60] ; 0x3c
80176: b2c0 uxtb r0, r0
80178: 9005 str r0, [sp, #20]
8017a: 6bd8 ldr r0, [r3, #60] ; 0x3c
8017c: b2c0 uxtb r0, r0
8017e: 9006 str r0, [sp, #24]
80180: 6bd8 ldr r0, [r3, #60] ; 0x3c
80182: b2c0 uxtb r0, r0
(This is using the code from Post #2, so it has the extra uxtb instructions to isolate the low 8bits.)
*a++ = PIOD->PIO_PDSR & 0B00000000000000000000000011111111;//3ticks
3 cycles for that statement is a very optimistic guess/result, given a slow peripheral bus, wait states on the flash memory (complicated by "flash acceleration"), and who knows what sort of synchronization issues. It's very difficult to predict ARM timing with any certainty