Last week I did a simulation where I needed a lot of random true/false (1 random bit) values and it took quite some time as for every random bit the random() function is called. It suddenly became clear that I threw away 30 random bits when I asked for 1. That smelled like inefficiency so I dived into it and I implemented three functions that generate random bits more efficient by caching the unused bits. A test-sketch shows a serious performance improvement, so I like to share these results.
Disclaimer: the sketch is only tested on an UNO so I do not know if other boards (esp ARM) show similar performance gains (but I expect there are).
The functions are:
getRandomBits(n) fetches n random bits per iteration. This is a generic function.
getRandomByte() is an faster version for 8 bits/1 byte.
flip() is a faster version for 1 bit (simulates the flipping of a coin, head tail, true false, 0, 1)
Note that these functions do not replace random(n), except when n is a power of 2.
So expect random( 8 ) to be as random as getRandomBits(3);
bool flip()
{
static uint32_t buf = 0;
static uint8_t idx = 0;
if (idx)
{
buf >>= 1;
idx--;
}
else
{
buf = random(); // refill
idx = 30;
}
return buf & 0x01;
}
// note: parameter n is not checked
uint32_t getRandomBits(uint8_t n) // n = 1..31
{
static uint32_t buf = 0;
static uint8_t idx = 0;
uint32_t t = 0;
if (n > idx)
{
n -= idx;
t = buf << n;
buf = random(); // refill 31 bits
idx = 31;
}
// for performance (10-20%)
if (t == 0) t = buf & ((1 << n) - 1);
else t |= buf & ((1 << n) - 1);
buf >>= n;
idx -= n;
return t;
}
// has room for improvement.
uint8_t getRandomByte()
{
static uint32_t buf = 0;
static uint8_t idx = 0;
if (idx > 2)
{
buf = random(); // refill 31 bits
idx = 0;
}
uint8_t t = buf & 0xFF;
buf >>= 8;
idx++;
return t;
}
//
// FILE: getRandomBits.ino
// AUTHOR: Rob Tillaart
// VERSION: 0.1.00
// PURPOSE: faster fetching of random bits
// DATE: 2015-08-10
// URL:
//
// Released to the public domain
//
uint32_t start;
uint32_t d[3];
uint16_t count = 0;
bool flip()
{
static uint32_t buf = 0;
static uint8_t idx = 0;
if (idx)
{
buf >>= 1;
idx--;
}
else
{
buf = random(); // refill
idx = 30;
}
return buf & 0x01;
}
uint32_t getRandomBits(uint8_t n) // n = 1..31
{
static uint32_t buf = 0;
static uint8_t idx = 0;
uint32_t t = 0;
if (n > idx)
{
n -= idx;
t = buf << n;
buf = random(); // refill 31 bits
idx = 31;
}
// for performance (10-20%)
if (t == 0) t = buf & ((1 << n) - 1);
else t |= buf & ((1 << n) - 1);
buf >>= n;
idx -= n;
return t;
}
// room for improvement.
uint8_t getRandomByte()
{
static uint32_t buf = 0;
static uint8_t idx = 0;
if (idx > 2)
{
buf = random(); // refill 31 bits
idx = 0;
}
uint8_t t = buf & 0xFF;
buf >>= 8;
idx++;
return t;
}
void setup()
{
Serial.begin(115200);
Serial.println("Start getRandomBits 0.1.00");
Serial.println("Timing in micros for 1000 calls\n");
Serial.println("Flipping coin - random(2)");
count = 0;
start = micros();
for (int i = 0; i < 1000; i++) count += random(2);
d[0] = micros() - start;
Serial.print("time:\t");
Serial.println(d[0]);
Serial.print("count:\t");
Serial.println(count);
Serial.println("\nFlipping coin - flip()");
count = 0;
start = micros();
for (int i = 0; i < 1000; i++) count += flip();
d[2] = micros() - start;
Serial.print("time:\t");
Serial.println(d[2]);
Serial.print("count:\t");
Serial.println(count);
Serial.print("factor:\t");
Serial.println(1.0 * d[0] / d[2], 2);
Serial.println("\ngetRandomBits() - 1..8");
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomBits(1);
d[1] = micros() - start;
Serial.println(d[1]);
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomBits(2);
d[1] = micros() - start;
Serial.println(d[1]);
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomBits(3);
d[1] = micros() - start;
Serial.println(d[1]);
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomBits(4);
d[1] = micros() - start;
Serial.println(d[1]);
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomBits(5);
d[1] = micros() - start;
Serial.println(d[1]);
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomBits(6);
d[1] = micros() - start;
Serial.println(d[1]);
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomBits(7);
d[1] = micros() - start;
Serial.println(d[1]);
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomBits(8);
d[1] = micros() - start;
Serial.println(d[1]);
Serial.println("\ngetRandomByte()");
start = micros();
for (int i = 0; i < 1000; i++) count += getRandomByte();
d[1] = micros() - start;
Serial.println(d[1]);
}
void loop()
{
}
Start getRandomBits 0.1.00
Timing in micros for 1000 calls
Flipping coin - random(2)
time: 96788
count: 522
Flipping coin - flip()
time: 5372
count: 497
factor: 18.02
getRandomBits() - 1..8
10000
12088
14600
17040
19468
21932
24348
26740
getRandomByte()
20760
prelimenary conclusions:
- These first results show that for flipping a coin the performance improvement is a factor 18, when comparing a call to random(2) and the optimized flip(). Different test-runs gave numbers between 15 and 20, so substantially faster on average. Note that when the cache is refilled the call takes more time!
- The flip() implementation is also almost twice as fast as the generic call to getRandomBits(1).
- the getRandomByte() implementation is ~20% faster than getRandomBits( 8 ), enough to keep it.
TODO:
- make a decent library for these functions.
- squeeze some more uSecs from it
- test ...
As always remarks are welcome