Arduino Due digitalWrite vs. direct port manipulation speed

There was a recent topic on AvrFreaks. I wrote this sketch to write 50 toggles i.e. 100 edges. And measure the start and finish time with a Logic Analyser.

The interesting result is that Due digitalWrite() is appalling.
M0, M3 are not that impressive.
M4 gives very fast result.

In practice many Arduino apps know the wiring at compile-time. So can use the appropriate register writes.
If you have to determine the GPIO at run-time it is well worth writing a mask to the port address.

//                 F072 @ 48MHz  F103 @ 64MHz  L476 @ 80MHz  F446 @ 180MHz    SAM3X @ 84MHz  SAMD21 @ 48MHz  UNO @ 16MHz
//digitalWrite:     98.75us 47    101.6us 65     41.33us 33   18.5us 33cycle 207.2us 174      154.8us 74      356.5us  57
//PortAddress:      14.83    7     17.29  11      7.67    6    3.96   7       13.17   11       25.29  12       31.92    5
//ReadModifyWrite:  10.58    5     14.21   9      7.67    6    2.83   5       10.79    9       20.46  10       12.63    2
//WriteOnly:         4.33    2      3.29   2      1.33    1    0.58   1        2.38    2        8.96   4       12.63    2

#define TGL  { TGL_RMW; }

#if 0
#elif defined(ARDUINO_SAM_DUE)
#define P8 PIOC
#define B8 22
#define P9 PIOC
#define B9 21
#define PIN_HIGH(port, pin)   (port)-> PIO_SODR = (1<<(pin))
#define PIN_LOW(port, pin)    (port)-> PIO_CODR = (1<<((pin)))
#define PIN_HIGHX(port, pin)   (port)-> PIO_ODSR |= (1<<(pin))
#define PIN_LOWX(port, pin)    (port)-> PIO_ODSR &= ~(1<<((pin)))

#elif defined(ARDUINO_ARCH_SAMD)
#define P8 REG_PORT_OUT0
#define B8 6
#define P9 REG_PORT_OUT0
#define B9 7
#define PIN_HIGH(port, pin)   REG_PORT_OUTSET0 = (1<<(pin))
#define PIN_LOW(port, pin)    REG_PORT_OUTCLR0 = (1<<((pin)))
#define PIN_HIGHX(port, pin)   (port) |= (1<<(pin))
#define PIN_LOWX(port, pin)    (port) &= ~(1<<((pin)))

#elif defined(ARDUINO_ARCH_SAMD)
#define P8 PORT_IOBUS->Group[0]
#define B8 6
#define P9 PORT_IOBUS->Group[0]
#define B9 7
#define PIN_HIGH(port, pin)   (port).OUTSET.reg = (1<<(pin))
#define PIN_LOW(port, pin)    (port).OUTCLR.reg = (1<<((pin)))
#define PIN_HIGHX(port, pin)   (port).OUT.reg |= (1<<(pin))
#define PIN_LOWX(port, pin)    (port).OUT.reg &= ~(1<<((pin)))

#elif defined(ARDUINO_ARCH_STM32)
#define P8 GPIOA
#define B8 7
#define P9 GPIOC
#define B9 7
#define PIN_HIGH(port, pin)   (port)-> BSRR = (1<<(pin))
//#define PIN_LOW(port, pin)    (port)-> BSRR = (1<<((pin)+16))
#define PIN_LOW(port, pin)   (port)-> BRR = (1<<(pin))
#define PIN_HIGHX(port, pin)   (port)-> ODR |= (1<<(pin))
#define PIN_LOWX(port, pin)    (port)-> ODR &= ~(1<<((pin)))

#elif defined(ARDUINO_AVR_UNO)
#define P8 PORTB
#define B8 0
#define P9 PORTB
#define B9 1
#define PIN_HIGH(port, pin)   (port) |= (1<<(pin))
#define PIN_LOW(port, pin)    (port) &= ~(1<<(pin))
#define PIN_HIGHX(port, pin)   (port) |= (1<<(pin))
#define PIN_LOWX(port, pin)    (port) &= ~(1<<(pin))
#endif

#define TGL_ARD { digitalWrite(8, HIGH); digitalWrite(8, LOW); }
//#define TGL_ADS { *d8Port |= d8PinSet; *d8Port &= ~d8PinSet; }
#define TGL_ADS { *d8Port |= d8PinSet; *d8Port &= d8PinClr; }
#define TGL_RMW { PIN_HIGHX(P8, B8); PIN_LOWX(P8, B8); }
#define TGL_WO  { PIN_HIGH(P8, B8); PIN_LOW(P8, B8); }

#define TGL2 { TGL; TGL; }
#define TGL4 { TGL2; TGL2; }
#define TGL8 { TGL4; TGL4; }
#define TGL16 { TGL8; TGL8; }
#define TGL32 { TGL16; TGL16; }
#define TGL50 { TGL32; TGL16; TGL2; }

#if defined(__AVR__)
volatile uint8_t *d8Port;
uint8_t d8PinSet, d8PinClr;
#else
volatile uint32_t *d8Port;
uint32_t d8PinSet, d8PinClr;
#endif

void setup()
{
    Serial.begin(9600);
    Serial.print("toggle GPIO with OUTSET @ F_CPU = ");
    Serial.print(F_CPU / 1000000);
    Serial.println("MHz");
    pinMode(13, OUTPUT);
    pinMode(8, OUTPUT);  //toggle signal
    d8Port = portOutputRegister(digitalPinToPort(8));
    d8PinSet = digitalPinToBitMask(8);
    d8PinClr = ~d8PinSet;
    pinMode(9, OUTPUT);  //start, end signal
}

void loop()
{
    PIN_HIGH(P9, B9);  //digital#9 PC21
    TGL50;   //100 edges digital#8 PA7 
    PIN_LOW(P9, B9);
    digitalWrite(13, HIGH);
    delay(500);
    digitalWrite(13, LOW);
    delay(500);
}

David.