For a project I needed both / 10 and %10 several times. Although the straightforward code was fast enough I investigated if it could be even faster. Could be useful e.g. for a library or so. This resulted in the following divmod10() function that calculates both the divide by 10 and the modulo 10 "simultaneously".
Note 1: the code is written for unsigned 32 bit math. It can not be used for signed numbers. One can derive a 16 bit version by removing one line and making the types 16 bit.
Note 2: tested only on a UNO
void divmod10(uint32_t in, uint32_t &div, uint32_t &mod)
{
// q = in * 0.8;
uint32_t q = (in >> 1) + (in >> 2);
q = q + (q >> 4);
q = q + (q >> 8);
q = q + (q >> 16); // not needed for 16 bit version
// q = q / 8; ==> q = in *0.1;
q = q >> 3;
// determine error
uint32_t r = in - ((q << 3) + (q << 1)); // r = in - q*10;
div = q + (r > 9);
if (r > 9) mod = r - 10;
else mod = r;
}
The code is based upon the divu10() code from the book Hackers Delight1. My insight was that the error formula in divu10() was in fact modulo 10 but not always. Sometimes it was 10 more. I wrote a small test-sketch that is not exhaustive but gives quite some confidence. The performance of the divmod10() is very good, the output of the test-sketch determines timing in microseconds per call.
i/10 38.6920
i%10 38.6120
divmod10 14.9240
Divmod10() is more than twice the speed of /10 or %10. When both are needed the gain is approx a factor 5, or ~60 micros per calculation.
Test-sketch
//
// FILE: divmod10.ino
// AUTHOR: Rob Tillaart
// DATE: 2013-05-20
//
// PUPROSE: fast divide and modulo by 10 in one function
//
// Code based upon book hackers delight
//
unsigned long start = 0;
unsigned long stop = 0;
volatile unsigned long q;
void setup()
{
Serial.begin(115200);
Serial.println("testing divmod10()\n");
Serial.print("i/10\t");
start = micros();
for (unsigned long i = 0; i < 1000; i++)
{
q = i/10;
}
stop = micros();
Serial.println((stop - start)/1000.0, 4);
Serial.print("i%10\t");
start = micros();
for (unsigned long i = 0; i < 1000; i++)
{
q = i%10;
}
stop = micros();
Serial.println((stop - start)/1000.0, 4);
Serial.println("--------------");
Serial.print("divmod10\t");
start = micros();
uint32_t m = 0;
uint32_t d = 0;
for (unsigned long i = 0; i < 1000; i++)
{
divmod10(i, d, m);
}
stop = micros();
Serial.println((stop - start)/1000.0, 4);
Serial.println("--------------");
Serial.println("test div 0..65535");
for (unsigned int i = 0; i< 65535; i++)
{
uint32_t a = i/10;
uint32_t d;
uint32_t m;
divmod10(i, d, m);
if ( abs(a - d) > 0)
{
Serial.print(i);
Serial.print("\t");
Serial.print(a);
Serial.print("\t");
Serial.println(d);
}
}
Serial.println("test mod 0..65535");
for (unsigned int i = 0; i < 65535; i++)
{
uint32_t a = i%10;
uint32_t d;
uint32_t m;
divmod10(i, d, m);
if ( abs(a - m) > 0)
{
Serial.print(i);
Serial.print("\t");
Serial.print(a);
Serial.print("\t");
Serial.println(m);
}
}
Serial.println("test div 0..maxlong (20K random samples)");
for (unsigned int i = 0; i< 20000; i++)
{
uint32_t n = random(0xFFFFFFFF);
uint32_t a = n/10;
uint32_t d;
uint32_t m;
divmod10(n, d, m);
if ( abs(a - d) > 0)
{
Serial.print(n);
Serial.print("\t");
Serial.print(a);
Serial.print("\t");
Serial.println(d);
}
}
Serial.println("test mod 0..maxlong (20K random samples)");
for (unsigned int i = 0; i< 20000; i++)
{
uint32_t n = random(0xFFFFFFFF);
uint32_t a = n%10;
uint32_t d;
uint32_t m;
divmod10(n, d, m);
if ( abs(a - m) > 0)
{
Serial.print(n);
Serial.print("\t");
Serial.print(a);
Serial.print("\t");
Serial.println(m);
}
}
Serial.println("done");
}
void loop()
{
}
void divmod10(uint32_t in, uint32_t &div, uint32_t &mod)
{
uint32_t q = (in >> 1) + (in >> 2);
q = q + (q >> 4);
q = q + (q >> 8);
q = q + (q >> 16);
q = q >> 3;
uint32_t r = in - ((q << 3) + (q << 1)); // r = in - q*10;
div = q + (r > 9);
if (r > 9) mod = r - 10;
else mod = r;
}
As always comments, remarks are welcome.
1 - http://www.amazon.com/Hackers-Delight-Edition-Henry-Warren/dp/0321842685/ -