Mega vs Due execution time comparison

I tried the simple case on a Grand Central and got about 17ns...
(however, I was wrong about it having a fast IOBUS. That's on the SAMD21 and rp2040, but not on the SAMD51 (I think I've had that discussion before - the fast IOBUS and actual cache memory don't get along...)

#define SerialUSB Serial
void setup() {
  pinMode(53, OUTPUT);
  SerialUSB.begin(9600);
  while (!SerialUSB)
    ;
  delay(1000);
  SerialUSB.print("Configured flash wait states: ");
  SerialUSB.println(NVMCTRL->CTRLA.reg, HEX);
}
#define PORTD (PORT->Group[3])

//__attribute__ ((section(".ramfunc")))
void myloop() {
  while (1) {
    PORTD.OUT.reg = 1 << 10;
    PORTD.OUT.reg = 1 << 11;
    delayMicroseconds(1);
    PORTD.OUT.reg = 0;
    delayMicroseconds(1);
  }
}

void loop() {
  myloop();
}