Fast, simple and small pins-lib with a C++ class but with 'virtual' its big

Blink.ino has a binary size of 1,054 bytes and digitalWrite() takes about 30 clock cycles. I want to make it smaller, faster and more intuitive coding style if possible. As my LED is attached to pin 13 and can not be changed under run-time I could as well hardcode the port.

Here is my first approach (a long and a short blink);

DigitalPin13 led(OUTPUT); // define and set pin as output
void setup() {
}
void loop() {
  led.high(); // use a method
  delay(500);
  led.low();
  delay(500);
  if (!led.digitalRead()) { // use a method
    led.high();
  }
  delay(100);
  if (led) { // use read operator
    led = 0; // use assignment operator
  }
  delay(500);
}

The above can be made for a single pin with this simple code which compiles to only 692 bytes:

class DigitalPin13 {
public:
  DigitalPin13(const uint8_t inOutMode);
  void pinMode(const uint8_t inOutMode);
  void high();
  void low();
  int digitalRead(void);
  void operator = (const uint8_t value);
  operator uint8_t() { return digitalRead(); }
  //operator uint8_t();
};

DigitalPin13::DigitalPin13(const uint8_t inOutMode) {
  pinMode(inOutMode);
}

inline void DigitalPin13::pinMode(const uint8_t inOutMode) {
  switch (inOutMode) {
  case OUTPUT:
    DDRB |= 1 << 5;
    break;
  case INPUT:
  case INPUT_PULLUP:
    DDRB &= ~(1 << 5);
    if (INPUT_PULLUP == inOutMode)
      high();
    break;
  }
}

inline void DigitalPin13::high(void) {
  PORTB |= 1 << 5;
}

inline void DigitalPin13::low(void) {
  PORTB &= ~(1 << 5);
}

inline int DigitalPin13::digitalRead(void) {
  return (PORTB & 1 << 5) > 0;
}

/*
void DigitalPin13::operator uint8_t() {
  return digitalRead();
}
*/

inline void DigitalPin13::operator = (const uint8_t value) {
  if (value)
    high();
  else
    low();
}

// =========================================

DigitalPin13 led(OUTPUT);

void setup() {
}

void loop() {
  led.high();
  delay(500);
  led.low();
  delay(500);
  if (!led.digitalRead()) {
    led.high();
  }
  delay(100);
  if (led) {
    led = 0;
  }
  delay(500);
}

So now I want to make use of inheritance so that e.g. pinMode() is only defined in a virtual class but then it compiles to 846 bytes:

class Pin {
public:
  Pin(void){
  };
  Pin(const uint8_t inOutMode);
  virtual void pinMode(const uint8_t inOutMode);
  virtual void high(void);
  virtual void low(void);
  virtual boolean digitalRead(void);
  //uint8_t operator uint8_t();
  //operator = (const uint8_t value);
private:
  virtual void pinModeOutput(void);
  virtual void pinModeInput(void);
  void pinModeInputPullup(void);
};

Pin::Pin(const uint8_t inOutMode) {
  pinMode(inOutMode);
}

void Pin::pinMode(const uint8_t inOutMode) {
  switch (inOutMode) {
  case OUTPUT:
    pinModeOutput();
    break;
  case INPUT:
    pinModeInput();
    break;
  case INPUT_PULLUP:
    pinModeInputPullup();
    break;
  }
}

void Pin::pinModeInputPullup(void) {
  pinModeInput();
  high();
}

/*
uint8_t Pin::operator uint8_t() {
 return digitalRead() != 0;
 }
 
 Pin::operator = (const uint8_t value) {
 if (value)
 high();
 else
 low();
 }
 */


// ======================

/*
class PwmPin: 
 public Pin {
 void analogWrite(int value);
 }
 
 void PwmPin::analogWrite(int value) {
 }
 */

class DigitalPin13: 
public Pin {
public:
  DigitalPin13() : 
  Pin() {
  };
  void pinModeOutput(void); 
  void pinModeInput(void); 
  void high();
  void low();
  boolean digitalRead(void);
};

//DigitalPin13::DigitalPin13(const uint8_t inOutMode) {
//  Pin.pinMode(inOutMode);
//}

void DigitalPin13::pinModeOutput(void) {
  DDRB |= 1 << 5;
}

void DigitalPin13::pinModeInput(void) {
  DDRB &= ~(1 << 5);
}


inline void DigitalPin13::high(void) {
  PORTB |= 1 << 5;
}

inline void DigitalPin13::low(void) {
  PORTB &= ~(1 << 5);
}

inline boolean DigitalPin13::digitalRead(void) {
  return (PORTB & 1 << 5) > 0;
}

DigitalPin13 led;

void setup() {
  led.pinMode(OUTPUT);
}

void loop() {
  led.high();
  delay(500);
  led.low();
  delay(500);
  if (led.digitalRead()) {
    led.low();
  }
}

Should I just make all the code for each pin and skip the inheritance or is there a way I can use the 'virtual' class so it becomes smaller in size?

Q2: And what if I make a new SPI-class where the SS-pin should be a parameter, how can the footprint be small under compile time, has all used modules/hardware is known during compile:

SPI display(MSBFIRST, DigitalPin13(OUTPUT));
void setup(){}
void loop() {
  display.transfer(7); // this method will call SS.high/low()
}

DigitalPin13::pinMode was most probably compiled inline. Now its virtual the function call is generated.
Also if the function was inline the switch would be removed.

What about the digitalWriteFast library? That is faster.

Thanks, that's more like it.