speed of digitalWrite and digitalRead on Arduino Due.

I've used code from this posting Fast DigitalWrite for Arduino Due - Arduino Due - Arduino Forum to enable fast reads and writes on the Due but I'm getting really strange results. Maybe someone can tell me what I'm doing wrong.
Firstly I made a library

#ifndef DigitalRW_Direct
#define DigitalRW_Direct

#include <Arduino.h>

inline void digitalWriteDirect(int pin, boolean val){
 if(val) g_APinDescription[pin].pPort -> PIO_SODR = g_APinDescription[pin].ulPin;
 else    g_APinDescription[pin].pPort -> PIO_CODR = g_APinDescription[pin].ulPin;
}

inline void digitalWriteDirectHigh(int pin){
 g_APinDescription[pin].pPort -> PIO_SODR = g_APinDescription[pin].ulPin;
}

inline void digitalWriteDirectLow(int pin){
g_APinDescription[pin].pPort -> PIO_CODR = g_APinDescription[pin].ulPin;
}

inline int digitalReadDirect(int pin){
 return !!(g_APinDescription[pin].pPort -> PIO_PDSR & g_APinDescription[pin].ulPin);
}

#endif

You'll see I used separate HIGH and LOW functions just to see if the if (val) comparison was a bottleneck.
I then used this test code

#include <Arduino.h>
#include <DigitalRW_Direct.h>




long sampleSize = 100000; // do a million times!
long lastTime;

void setup() {
  // put your setup code here, to run once:
  delay(50); // wait a bit. (Arduino Due prints nonsense to the serial port...??)
  Serial.begin(115200); // go really fast serial!
  Serial.print("Sample size: ");
  Serial.println(sampleSize);
  // I will store my value in this:
  int n;

  // and use these pins for writing or reading
  pinMode(7, OUTPUT);
  pinMode(8, INPUT);

  Serial.println("digitalWrite:");
  lastTime = micros();
  for (long i = 0; i < sampleSize; i++) {
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
    digitalWrite(7, HIGH);
    digitalWrite(7, LOW);
  }
  printTime( micros() - lastTime);

  Serial.println("digitalWriteFast High/Low:");
  lastTime = micros();
  for (long i = 0; i < sampleSize; i++) {
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
    digitalWriteDirectHigh(7);
    digitalWriteDirectLow(7);
  }
  printTime( micros() - lastTime);

  Serial.println("digitalWriteFast:");
  lastTime = micros();
  for (long i = 0; i < sampleSize; i++) {
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
    digitalWriteDirect(7, HIGH);
    digitalWriteDirect(7, LOW);
  }
  printTime( micros() - lastTime);
  Serial.println("digitalRead:");
  lastTime = micros();

  for (long i = 0; i < sampleSize; i++) {
    n = digitalRead(8);
  }
  printTime( micros() - lastTime);

  Serial.println("digitalReadFast:");
  lastTime = micros();
  for (long i = 0; i < sampleSize; i++) {
    n = digitalReadDirect(8);
  }
  printTime( micros() - lastTime);

}


void loop() {
  // put your main code here, to run repeatedly:

}

//-----------------------------------------------------------------------------
void printTime( long microsecs) {
  int mins, secs, msecs, usecs;

  msecs = floor(microsecs / 1000);
  usecs = microsecs % 1000;

  secs = floor(msecs / 1000);
  msecs = msecs % 1000;

  mins = floor(secs / 60);
  secs = secs % 60;
  Serial.print("time: ");
  Serial.println(microsecs);

  Serial.print(mins);
  Serial.print(" min ");
  Serial.print(secs);
  Serial.print(" sec ");
  Serial.print(msecs);
  Serial.print(" msec ");
  Serial.print(usecs);
  Serial.println(" usecs");
  Serial.println("");
  delay(20);
}

and unrolled the loops to take out the looping times as much as possible and to give my meter some time to get the frequency readings. I get these timings :
Sample size: 100000
digitalWrite:
time: 4810732
0 min 4 sec 810 msec 732 usecs

digitalWriteFast High/Low:
time: 648482
0 min 0 sec 648 msec 482 usecs

digitalWriteFast:
time: 77501
0 min 0 sec 77 msec 501 usecs

digitalRead:
time: 107290
0 min 0 sec 107 msec 290 usecs

digitalReadFast:
time: 26518
0 min 0 sec 26 msec 518 usecs
I find it hard to believe there is such a difference between digitalWriteFast High/Low and digitalWriteFast. What Am I missing.
Cheers
Mike