@DrAzzy I was aware that Adafruit Neopixel would work with the Nano Every. I had even gone to the trouble of attempting to modify it to work with the 20MHz clock frequency, that is when I discovered the clock speed was actually 16MHz.
Here is the code I had modified for 20MHz, it appears to work ok with a string of 10 leds, didn't have more to test it on, and I made no modifications to the 400khz code section, had not gotten around to looking at that part, and have no leds for testing anyway.
// ***********************************************************************
// **************** my code for atmega4809 *******************************
// 20 MHz(ish) AVR --------------------------------------------------------
#elif (F_CPU == 20000000UL)
//#warning "processing for 20MHz clock" F_CPU
#ifdef NEO_KHZ400 // 800 KHz check needed only if 400 KHz support enabled
if(is800KHz) {
#endif
// WS2811 and WS2812 have different hi/lo duty cycles; this is
// similar but NOT an exact copy of the prior 400-on-8 code.
// 25 inst. clocks per bit: HHHHHHHxxxxxxLLLLLLLLLLLL
// ST instructions: ^ ^ ^ (T=0,7,13)
volatile uint8_t next, bit;
hi = *port | pinMask;
lo = *port & ~pinMask;
next = lo;
bit = 8;
asm volatile(
"head20:" "\n\t" // Clk Pseudocode (T = 0)
"st %a[port], %[hi]" "\n\t" // 2 PORT = hi (T = 2)
"sbrc %[byte], 7" "\n\t" // 1-2 if(b & 128)
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 4)
"dec %[bit]" "\n\t" // 1 bit-- (T = 5)
"rjmp .+0" "\n\t" // 2 nop nop (T = 7)
"st %a[port], %[next]" "\n\t" // 2 PORT = next (T = 9)
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 10)
"breq nextbyte20" "\n\t" // 1-2 if(bit == 0) (from dec above)
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 12)
"nop" "\n\t" // 1 nop (T = 13)
"st %a[port], %[lo]" "\n\t" // 2 PORT = lo (T = 15)
"rjmp .+0" "\n\t" // 2 nop nop (T = 17)
"rjmp .+0" "\n\t" // 2 nop nop (T = 19)
"rjmp .+0" "\n\t" // 2 nop nop (T = 21)
"rjmp .+0" "\n\t" // 2 nop nop (T = 23)
"rjmp head20" "\n\t" // 2 -> head20 (next bit out)
"nextbyte20:" "\n\t" // (T = 12)
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 13)
"st %a[port], %[lo]" "\n\t" // 2 PORT = lo (T = 15)
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 17)
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 19)
"rjmp .+0" "\n\t" // 2 nop nop (T = 21)
"rjmp .+0" "\n\t" // 2 nop nop (T = 23)
"brne head20" "\n" // 2 if(i != 0) -> (next byte)
: [port] "+e" (port),
[byte] "+r" (b),
[bit] "+r" (bit),
[next] "+r" (next),
[count] "+w" (i)
: [ptr] "e" (ptr),
[hi] "r" (hi),
[lo] "r" (lo));
#ifdef NEO_KHZ400
} else { // 400 KHz
// The 400 KHz clock on 16 MHz MCU is the most 'relaxed' version.
// 40 inst. clocks per bit: HHHHHHHHxxxxxxxxxxxxLLLLLLLLLLLLLLLLLLLL
// ST instructions: ^ ^ ^ (T=0,8,20)
volatile uint8_t next, bit;
hi = *port | pinMask;
lo = *port & ~pinMask;
next = lo;
bit = 8;
asm volatile(
"head40:" "\n\t" // Clk Pseudocode (T = 0)
"st %a[port], %[hi]" "\n\t" // 2 PORT = hi (T = 2)
"sbrc %[byte] , 7" "\n\t" // 1-2 if(b & 128)
"mov %[next] , %[hi]" "\n\t" // 0-1 next = hi (T = 4)
"rjmp .+0" "\n\t" // 2 nop nop (T = 6)
"rjmp .+0" "\n\t" // 2 nop nop (T = 8)
"st %a[port], %[next]" "\n\t" // 2 PORT = next (T = 10)
"rjmp .+0" "\n\t" // 2 nop nop (T = 12)
"rjmp .+0" "\n\t" // 2 nop nop (T = 14)
"rjmp .+0" "\n\t" // 2 nop nop (T = 16)
"rjmp .+0" "\n\t" // 2 nop nop (T = 18)
"rjmp .+0" "\n\t" // 2 nop nop (T = 20)
"st %a[port], %[lo]" "\n\t" // 2 PORT = lo (T = 22)
"nop" "\n\t" // 1 nop (T = 23)
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 24)
"dec %[bit]" "\n\t" // 1 bit-- (T = 25)
"breq nextbyte40" "\n\t" // 1-2 if(bit == 0)
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 27)
"nop" "\n\t" // 1 nop (T = 28)
"rjmp .+0" "\n\t" // 2 nop nop (T = 30)
"rjmp .+0" "\n\t" // 2 nop nop (T = 32)
"rjmp .+0" "\n\t" // 2 nop nop (T = 34)
"rjmp .+0" "\n\t" // 2 nop nop (T = 36)
"rjmp .+0" "\n\t" // 2 nop nop (T = 38)
"rjmp head40" "\n\t" // 2 -> head40 (next bit out)
"nextbyte40:" "\n\t" // (T = 27)
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 28)
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 30)
"rjmp .+0" "\n\t" // 2 nop nop (T = 32)
"st %a[port], %[lo]" "\n\t" // 2 PORT = lo (T = 34)
"rjmp .+0" "\n\t" // 2 nop nop (T = 36)
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 38)
"brne head40" "\n" // 1-2 if(i != 0) -> (next byte)
: [port] "+e" (port),
[byte] "+r" (b),
[bit] "+r" (bit),
[next] "+r" (next),
[count] "+w" (i)
: [ptr] "e" (ptr),
[hi] "r" (hi),
[lo] "r" (lo));
}
#endif // NEO_KHZ400
// ***********************************************************************
// ***********************************************************************