fastest way to right shift an array of 4 bytes

If you need to shift an unsigned long very often (> 1000's times per second) and you need it to be as fast as possible and you have flash to spare,
then you should know it can be optimized especially if you do an inplace shift and even more when the shift factor is constant.

A test sketch shows how the technique works, moving blocks of 8 bits first and then shift the last few bits.
This testcode is not faster for shift values smaller than 8 but faster for larger values.

If the shift factor is fixed you can write a dedicated shift e.g. inline unsigned long shift17(unsigned long l) - see testcode -

//
// FILE: shiftOptimize.pde
//   BY: Rob Tillaart
// DATE: 2011-jul-19
//
union t
{
  byte b[4];
  unsigned long l;
}

volatile temp;
volatile unsigned long y = 0;
unsigned long start;
unsigned long times = 1000L;

void setup()
{  
  Serial.begin(115200);
  Serial.println("shiftOptimize 0.1");

  unsigned long sum1 = test1();
  unsigned long sum2 = test2();
  Serial.print("average speedup factor: ");
  Serial.println(1.0* sum1/sum2, 2);
   
  shift17();

}

unsigned long test1()
{
  Serial.println("#===================#");
  Serial.println("# Normal shift 0-31 #");
  Serial.println("#===================#");

  unsigned long sum = 0;
  unsigned long min = 9999;
  unsigned long max = 0;
  unsigned long duration;

  for (int a=0; a <32; a++)
  {
    temp.l = 0xAA555555;
    start = micros();
    for (long  i=0; i< times; i++)
    {
      y = temp.l >> a;
    }
    duration = micros() - start;
    sum += duration;
    if (duration < min) min = duration;
    if (duration > max) max = duration;
    Serial.print(a);
    Serial.print(", ");
    Serial.println(duration);
  }
  Serial.print("average: ");
  Serial.println(sum/32);
  Serial.print("min: ");
  Serial.println(min);
  Serial.print("max: ");
  Serial.println(max);
  return sum;
}

unsigned long test2()
{ 
  Serial.println("#============================#");
  Serial.println("# Speed optimized shift 0-31 #");
  Serial.println("#============================#");

  unsigned long sum = 0;
  unsigned long min = 9999;
  unsigned long max = 0;
  unsigned long duration;
  for (int a=0; a <32; a++)
  {
    temp.l = 0xAA555555;
    int t=a;  // keep copy of a
    start = micros();
    for (long  i=0; i< times; i++)
    {
      if (a < 8);
      else if (a < 16)
      {
        temp.b[0] = temp.b[1];
        temp.b[1] = temp.b[2];
        temp.b[2] = temp.b[3];
        temp.b[3] = 0;
        a -= 8;
      }  
      else if (a < 24)
      {
        temp.b[0] = temp.b[2];
        temp.b[1] = temp.b[3];
        temp.b[2] = 0;
        temp.b[3] = 0;
        a -= 16;
      }   
      else
      {
        temp.b[0] = temp.b[3];
        temp.b[1] = 0;
        temp.b[2] = 0;
        temp.b[3] = 0;
        a -= 24;
      }
      y = temp.l >> a;
      // if (y != 0xAA555555 >> t) Serial.print('.');
    }
    duration = micros() - start;
    sum += duration;
    if (duration < min) min = duration;
    if (duration > max) max = duration;
    a = t;
    Serial.print(a);
    Serial.print(", ");
    Serial.println(duration);
  }
  Serial.print("average: ");
  Serial.println(sum/32);
  Serial.print("min: ");
  Serial.println(min);
  Serial.print("max: ");
  Serial.println(max);

  return sum;
}

unsigned long shift17()
{
  Serial.println("#==============#");
  Serial.println("# Shift17 test #");
  Serial.println("#==============#");

  unsigned long sum = 0;
  unsigned long min = 9999;
  unsigned long max = 0;
  unsigned long duration;

  temp.l = 0xAA555555;
  int a = 17;

  start = micros();
  for (long  i=0; i< times; i++)
  {
    temp.b[0] = temp.b[2];
    temp.b[1] = temp.b[3];
    temp.b[2] = 0;
    temp.b[3] = 0;
    y = temp.l >> 1;
  } 
  duration = micros() - start;
  sum += duration;
  if (duration < min) min = duration;
  if (duration > max) max = duration;

  Serial.print(a);
  Serial.print(", ");
  Serial.println(duration);

  Serial.print("average: ");
  Serial.println(sum/32);
  Serial.print("min: ");
  Serial.println(min);
  Serial.print("max: ");
  Serial.println(max);

  return sum;
}

void loop()
{
}