Ciao, sto facendo un pò di esperimenti con la radice quadrata:
void setup(){
Serial.begin(57600);
}
void loop(){
Serial.println("Starting long calculation:");
start();
int a;
unsigned long t;
float ris=0;
t = micros();
for (a=0;a<6000;a++){
ris+=a;
}
t=micros()-t;
Serial.println(ris);
Serial.print("Time for:");
Serial.println(t);
t = micros();
for (a=0;a<6000;a++){
ris+=sqrt(a);
//Serial.print(ris);
}
Serial.println(ris);
Serial.print("Time sqrt:");
Serial.println(t);
t = micros();
for (a=0;a<6000;a++){
ris+=Q_rsqrt(a);
// Serial.print(ris);
}
t=micros()-t;
Serial.println(ris);
Serial.print("Time Q_rsqrt:");
Serial.println(t);
t = micros();
for (a=0;a<6000;a++){
ris+=Q_rsqrt2(a);
// Serial.print(ris);
}
t=micros()-t;
Serial.println(ris);
Serial.print("Time Q_rsqrt2:");
Serial.println(t);
delay(1000);
}
void start(){
double diff;
double maxDiff=0;
double midDiff=0;
double maxDiff2=0;
double midDiff2=0;
long count=0;
float a;
for (a=0;a<1000;a+=0.01){
//printf("ciclo: %d e %f\n", count, a);
count++;
diff = (float)( sqrt(a) ) - 1.0/Q_rsqrt(a);
midDiff+=diff;
if (abs(diff)>maxDiff){
maxDiff=abs(diff);/*
Serial.print("1_diff is:");
Serial.print(diff);
Serial.print(" at:");
Serial.println(a);*/
}
diff = (float)( sqrt(a) ) - 1.0/Q_rsqrt2(a);
midDiff2+=diff;
if (abs(diff)>maxDiff2){
maxDiff2=abs(diff);/*
Serial.print("2_diff is:");
Serial.print(diff);
Serial.print(" at:");
Serial.println(a);*/
}
}
Serial.print("maxdiff is:");
if (maxDiff!=0)
Serial.println(maxDiff, 7);
else
Serial.println("nodiff");
Serial.print("middiff is:");
Serial.println(midDiff/count, 7);
Serial.print("maxdiff2 is:");
if (maxDiff2!=0)
Serial.println(maxDiff2, 7);
else
Serial.println("nodiff");
Serial.print("middiff2 is:");
Serial.println(midDiff2/count, 7);
}
float Q_rsqrt( float number ){
long i;
float x2, y;
const float threehalfs = 1.5F;
x2 = number * 0.5F;
y = number;
i = * ( long * ) &y; // evil floating point bit level hacking
i = 0x5f3759df - ( i >> 1 ); // what the fuck?
y = * ( float * ) &i;
y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
// y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
return y;
}
float Q_rsqrt2( float number ){
long i;
float x2, y;
const float threehalfs = 1.5F;
x2 = number * 0.5F;
y = number;
i = * ( long * ) &y; // evil floating point bit level hacking
i = 0x5f3759df - ( i >> 1 ); // what the fuck?
y = * ( float * ) &i;
y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
return y;
}
risultati:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
Time for:72752Time sqrt:61048312
Time Q_rsqrt:317052
Time Q_rsqrt2:537008
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
Time for:72752
Time sqrt:124226644
Time c:317052
Time Q_rsqrt2:537012
dopo varie iterazioni, sqrt si assesta intorno a 566477768
cosa si evince?
- l'uso di SQRT ha tempi molto variabili, forse c'è una componente random?
- l'uso di Q_rsqrt è dal 200% al 1800% più veloce, però con una pesante perdita di precisione
- l'uso di Q_rsqrt2 è dal 100% al 1000% più veloce, con perdita minima della precisione
l'algoritmo non è mio, ma quello usato in Quake3. Notare che Q_rsqrt e Q_rsqrt2 NON sono la radice quadrata, ma l'inversa, ovvero 1/sqrt(), molto utile se usata per normalizzare dei vettori
se vi chiedete perchè stampo Serial.println(ris); è perchè se no il compilatore si accorge che i for non servono nulla ai fini del programma e bellamente non li esegue nemmeno (4 millisecondi fissi per tempo for, sqrt, etc..)
output(lungo):
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:187369712
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537016
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:218951880
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537016
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:250534052
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537016
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:282116220
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537012
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:313698396
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537012
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:345280564
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537016
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:376862736
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537008
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:408444904
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537008
Starting long calculation:
maxdiff is:0.0542393
middiff is:-0.0204092
maxdiff2 is:0.0001450
middiff2 is:-0.0000408
17996896.00
Time for:72752
309799.53
Time sqrt:440027084
0.00
Time Q_rsqrt:317060
0.00
Time Q_rsqrt2:537008
Starting long calculation: