I have now updated the code in two ways. I use integer instead of long integer in comparisons, which is possible in a significant part of the code. It reduced the time from about 25 us to 22 us. Then I introduced an additional look-up table to narrow the search in square table, so only one or two comparisons was needed with this table. Then the calculation time was reduced to 8 to 13 us. The statistics of error is still the same:
/*
This code is intended to test calculation speed of some aritmetric operations
and perhaps more. It may be done by using a testpin to be measured by oscilloscope
*/
const uint8_t TestOutPin = 13; // Test pin for signal to oscilloscope
const uint8_t TestOutPin2 = 5; // Test pin for to test speed
const uint8_t AnalogPotPin = A0; // Analog input pin that the potentiometer is attached to
const unsigned long int PrintPeriodMs = 3000;
const int Max10BitP1 = 1023 + 1;
const int Max9Bit = 511;
const float Rad60degr = 2 * M_PI / 6;
unsigned long int printTrigMillis = 0;
int i1 = 160;
int i2 = 170;
int i3 = 175;
long int li1 = 180;
long int li2 = 190;
long int li3 = 200;
float r1=0;
float r2=0;
int printCnt = 0;
void setTestPin() { //Uses testpin 13
PORTB |= 0B00100000;
};
void clearTestPin() { //Uses testpin 13
PORTB &= 0B11011111;
};
/*
Table based square root function for integers only. Accuracy within 1 % when input values are above 2500.
Update using integer instead of long integer when possible caused reduction from 22-25 us to 18-22 us.
It is resonable to suggest, that the long int comparisons is the part that recuire time.
The highest input x possible is 3xMax9Bit^2 = 783363. It recuire 4 long comparisons and 3 times long division by 4.
However, it seems this initial correction is quite fast.
Now changed x to to integer when possible, but the binary search is still time consuming
With update using look-up table to narrow binary search the time was reduced from 18-22 us to 8-13 us.
*/
int broendegaard3_isqrt(unsigned long x) { // Calculation time is in range 8 to 13 us.
static const unsigned int sq_table[] = {
3969, //0 the table is (index+63)^2
4096,4225,4356,4489,4624,4761,4900,5041,5184,5329, //1 to 10
5476,5625,5776,5929,6084,6241,6400,6561,6724,6889, //11-20
7056,7225,7396,7569,7744,7921,8100,8281,8464,8649, //21-30
8836,9025,9216,9409,9604,9801,10000,10201,10404,10609, //31-40
10816,11025,11236,11449,11664,11881,12100,12321,12544,12769,//41-50
12996,13225,13456,13689,13924,14161,14400,14641,14884,15129,//51-60
15376,15625,15876,16129,16384}; //61-65
static const byte lookUp_m[] = {
0, //0
1,2,4,6,8,10,12,13,15,17, //1-10
18,19,21,23,24,26,27,28,30,31, //11-20
33,34,35,36,38,39,40,41,43,44, //21-30
45,46,47,49,50,51,52,53,54,55, //31-40
56,57,58,59,60,61,62,63 }; //41-48
byte i=0;
byte j=0;
byte m=0;
byte n=64;
byte p=32;
unsigned int sq = 1;
int sqrr = 1;
if (x > 16129) { // if x above table range then divide x by 4 until it is in range.
// this initial correction is actually quite fast and require about 2 us.
do {
x >>= 2;
i++;}
while (x > 16129);
sq = x;}
else {
sq = x;
if (sq <= 1) { //return when x= 0 or x = 1.
return sq; }
else {
while (sq < 3969) { // if x below table range then increase x by factor 4 until it is in range
sq <<= 2;
j++; };
};
};
p= (sq >> 8) - 15; // uses loop up to get closer start of m, n and p in binary search with square values
m= lookUp_m[p];
if (p<=28) {
n=m+3;} //somewhat rough estimate. Sometimes n=m+2 could be OK
else {
if (p==33) { //single case of n=m+3 in this range
n= m+3;}
else
n= m+2;
};
p=m+1;
while (m+1 != n) { //binary search in table. This search require 14 us time.
if (sq < sq_table[p]) { //with update by look-up table it is about 3-6 us.
n = p;
p = (p+m) >> 1; }
else {
m = p;
p = (p+n) >> 1; };
};
if ((sq - sq_table[p]) > (sq_table[p+1] - sq)) { // correct to closest value in table
p++;};
if (j > 0) {
sqrr = (p+63) >> j; } //divide by two each time x was multiplied by 4 at start of function
else {
sqrr = (p+63) << i; }; //multiply by two each time x was divided by 4 at start of function
return sqrr;
};
void setup() {
Serial.begin(9600);
pinMode(TestOutPin, OUTPUT);
pinMode(TestOutPin2, OUTPUT);
digitalWrite(TestOutPin,LOW);
pinMode(AnalogPotPin, INPUT);
printTrigMillis = millis() + PrintPeriodMs;
}
void loop() {
// put your main code here, to run repeatedly:
r1 = random(58, Max9Bit); //phase-phase amplitude of BLDC EMF encoder
r2 = 2*M_PI*random(Max9Bit)/Max9Bit; //phase angle of V1 of BLDC EMF encoder
i1 = r1*sin(r2); //simulated voltage seen on ADC from V1 of EMF encoder
i2 = r1*sin(r2 + Rad60degr); //simulated voltage seen on ADC from V2 of EMF encoder
li3 = (long) i1*(i1-i2) + (long) i2*i2; // Do work with about 6 us calculation time
setTestPin();
i3 = broendegaard3_isqrt( li3 );
clearTestPin();
delay(1);
if ((millis() > printTrigMillis) && (printCnt<1000)) {
printTrigMillis += PrintPeriodMs;
printCnt++;
// print the results to the Serial Monitor:
//Serial.print("r1 = "); Serial.print(r1);
//Serial.print("\t r2 = "); Serial.print(r2);
r1=sqrt(li3);
//Serial.print("\t i1 = "); Serial.print(i1);
//Serial.print("\t i2 = "); Serial.print(i2);
//Serial.print("\t li3 = "); Serial.print(li3);
//Serial.print("\t i3 = "); Serial.print(i3);
//Serial.print("\t err% = ");
Serial.print((i3-r1)*100/r1);
Serial.println();
};
}
