Timing and speed issues with pin change interrupt based routines - Arduino slow?

Here is an example of reading up to 8 pins in a pin-change interrupt. By getting down to basics we keep overheads low:

/*
ISR (PCINT0_vect)
 {
 // handle pin change interrupt for D8 to D13 here
 }  // end of PCINT0_vect

ISR (PCINT1_vect)
 {
 // handle pin change interrupt for A0 to A5 here
 }  // end of PCINT1_vect
*/

volatile byte oldPinD;

ISR (PCINT2_vect)
 {
 // handle pin change interrupt for D0 to D7 here

 byte d = PIND;

 // exclude pins that haven't changed
 byte changed = (d ^ oldPinD) & d;

if (changed & _BV (0))  // D0
  {
  PORTB |= _BV (0);   // toggle D8
  PORTB &= ~_BV (0);
  }

if (changed & _BV (1))  // D1
  {
  PORTB |= _BV (1);   // toggle D9
  PORTB &= ~_BV (1);
  }

if (changed & _BV (2))  // D2
  {
  PORTB |= _BV (2);   // toggle D10
  PORTB &= ~_BV (2);
  }
 
 // and so on
 
 oldPinD = d;
 }  // end of PCINT2_vect

void setup ()
  { 
  // pin change interrupt
  PCMSK2 |= 0xFF;  // all 8 pins
  PCIFR  |= _BV (PCIF2);   // clear any outstanding interrupts
  PCICR  |= _BV (PCIE2);   // enable pin change interrupts for D0 to D7
  
  // inputs
  for (byte i = 0; i <= 7; i++)
    digitalWrite (i, HIGH);  // pull-ups

  // outputs
  for (byte i = 8; i <= 11; i++)
    pinMode (i, OUTPUT);
  }

void loop () {}

Results (grounding D0 and D1 at the same time):

You can see it took just over 2 uS for the first reaction, and only 375 nS for the second one (of course they were both on the same interrupt).

I think the worst-case in your case would be if they were close together but not close enough to be serviced by the same interrupt.