Have I made this hardware SPI transfer as fast as possible?

Well this is odd...

I finally figured out how to dump the assembly code myself, and here is the disassembly of my updateLEDs function:

00000624 <_Z10updateLEDsv>:
     624:	20 91 c2 01 	lds	r18, 0x01C2
     628:	30 91 c3 01 	lds	r19, 0x01C3
     62c:	40 91 c4 01 	lds	r20, 0x01C4
     630:	50 91 c5 01 	lds	r21, 0x01C5
     634:	80 91 a5 02 	lds	r24, 0x02A5
     638:	90 91 a6 02 	lds	r25, 0x02A6
     63c:	a0 91 a7 02 	lds	r26, 0x02A7
     640:	b0 91 a8 02 	lds	r27, 0x02A8
     644:	28 17       	cp	r18, r24
     646:	39 07       	cpc	r19, r25
     648:	4a 07       	cpc	r20, r26
     64a:	5b 07       	cpc	r21, r27
     64c:	08 f4       	brcc	.+2      	; 0x650 <_Z10updateLEDsv+0x2c>
     64e:	31 c0       	rjmp	.+98     	; 0x6b2 <_Z10updateLEDsv+0x8e>
     650:	f8 94       	cli
     652:	ec e9       	ldi	r30, 0x9C	; 156
     654:	f2 e0       	ldi	r31, 0x02	; 2
     656:	82 91       	ld	r24, -Z
     658:	8e bd       	out	0x2e, r24	; 46
     65a:	00 00       	nop
     65c:	00 00       	nop
     65e:	00 00       	nop
     660:	00 00       	nop
     662:	00 00       	nop
     664:	00 00       	nop
     666:	00 00       	nop
     668:	00 00       	nop
     66a:	00 00       	nop
     66c:	00 00       	nop
     66e:	82 e0       	ldi	r24, 0x02	; 2
     670:	e4 35       	cpi	r30, 0x54	; 84
     672:	f8 07       	cpc	r31, r24
     674:	81 f7       	brne	.-32     	; 0x656 <_Z10updateLEDsv+0x32>
     676:	00 00       	nop
     678:	00 00       	nop
     67a:	00 00       	nop
     67c:	00 00       	nop
     67e:	00 00       	nop
     680:	00 00       	nop
     682:	00 00       	nop
     684:	00 00       	nop
     686:	00 00       	nop
     688:	00 00       	nop
     68a:	8d b5       	in	r24, 0x2d	; 45
     68c:	8f 77       	andi	r24, 0x7F	; 127
     68e:	8d bd       	out	0x2d, r24	; 45
     690:	2a 98       	cbi	0x05, 2	; 5
     692:	2a 9a       	sbi	0x05, 2	; 5
     694:	2b 9a       	sbi	0x05, 3	; 5
     696:	2b 98       	cbi	0x05, 3	; 5
     698:	78 94       	sei
     69a:	2f 5d       	subi	r18, 0xDF	; 223
     69c:	3f 4f       	sbci	r19, 0xFF	; 255
     69e:	4f 4f       	sbci	r20, 0xFF	; 255
     6a0:	5f 4f       	sbci	r21, 0xFF	; 255
     6a2:	20 93 a5 02 	sts	0x02A5, r18
     6a6:	30 93 a6 02 	sts	0x02A6, r19
     6aa:	40 93 a7 02 	sts	0x02A7, r20
     6ae:	50 93 a8 02 	sts	0x02A8, r21
     6b2:	08 95       	ret

Strange, that this shows only 10 NOPs, when there should be 11!

Could it be the whole time I thought I had 10 before, there were actually 9?

Have I done something wrong here? This is how it was written in some sample code I found...

#define NOP asm volatile ("nop\n");