um-FPU 3.1 vs Arduino SW FPU Library

I have a chunk of inverse kinematics code (for a robot) that I’ve been trying to optimize. I’ve posted it below as implemented with the software floating point library, as well as with the um-FPU 3.1.

I had hoped that the hardware FPU would speed it up, but either something is wrong with my implementation, or it is dramatically slower than the arduino fpu emulation.

The chunk of code takes approximately 600us on the arduino, and 1400us on the FPU using the user-defined functions on the FPU. I’ve verified that they are returning the same correct data. Ideally I’d want the execution time to be more like 100us. Any advice?

Arduino Code

 int delta_calcAngleYZ(float x0, float y0, float z0, float &theta) {

     float y1 = -0.5 * 0.57735 * f; // f/2 * tg 30
     y0 -= 0.5 * 0.57735 * e;    // shift center to edge
     // z = a + b*y
     float a = (x0*x0 + y0*y0 + z0*z0 +rf*rf - re*re - y1*y1)/(2*z0);
     float b = (y1-y0)/z0;
     // discriminant
     float d = -(a+b*y1)*(a+b*y1)+rf*(b*b*rf+rf); 
     if (d < 0) return -1; // non-existing point
     float yj = (y1 - a*b - sqrt(d))/(b*b + 1); // choosing outer point
     float zj = a + b*yj;
     theta = 180.0*atan(-zj/(y1 - yj))/pi + ((yj>y1)?180.0:0.0);
     return 0;
 }

Code using the FPU

 int delta_calcAngleYZ(float x0, float y0, float z0, float &theta) {

// The registers are defined and constants are loaded earlier, the FPU custom function operates on the registers and replicates exactly the math sequence in the arduino implementation.
        
     Fpu.write(SELECTA,fpu_x0); // load the input vars
     Fpu.write(FWRITEA);
     Fpu.writeFloat(x0);

     Fpu.write(SELECTA,fpu_y0);
     Fpu.write(FWRITEA);
     Fpu.writeFloat(y0);
     
     Fpu.write(SELECTA,fpu_z0);
     Fpu.write(FWRITEA);
     Fpu.writeFloat(z0);

          
Fpu.write(FCALL,1);// calculate d with function 1 stored on um-fpu
Fpu.write(FSTATUS,fpu_d);    // get the status of register 15 (status of d)

if ( Fpu.readStatus() && STATUS_SIGN == 1 ) return -1;

Fpu.write(FCMP2,fpu_y1,fpu_yj); // compare yj and y1 in the fpu

// if yj > y1, define theta this way
if ( Fpu.readStatus() && STATUS_SIGN == 1 ) {

Fpu.write(SELECTA,fpu_theta);
  Fpu.write(FCALL,2); // calculate version 1 of theta
} else {
  Fpu.write(FCALL,3); // calculate version 2 of theta
}
      Fpu.wait();
      Fpu.write(FREADA);
      theta = Fpu.readFloat();
     return 0;
 }

Hi,

While, I can't help you with your delay time, maybe looking here could help:

I was also wondering if you have tested your Delta code?

I am having trouble with the inverse kinematic part giving incorrect results.

Does it work for you?

Cheers, James

And here is the link :wink:

The IK code seems to be working for me. I’ll post it below for you to see. I’m also thinking of taking a different approach than before. Whereas I originally wanted to calculate the IK sequence to decide for each step of my stepper motors, I will now look into taking multiple steps for each IK calculation.

// robot geometry
 // (look at pics above for explanation)
 const float e = 4.8614;     // end effector
 const float f = 14.0296;     // base
 const float re = 12.5;
 const float rf = 5.9125;
 
 // trigonometric constants
 const float sqrt3 = sqrt(3.0);
 const float pi = 3.141592653;    // PI
 const float sin120 = sqrt3/2.0;   
 const float cos120 = -0.5;        
 const float tan60 = sqrt3;
 const float sin30 = 0.5;
 const float tan30 = 1/sqrt3;
const float y1 = -0.5 * 0.57735 * f; // f/2 * tg 30
const float g = 0.5 * 0.57735 * e;
const float stuff = rf*rf - re*re - y1*y1;


 int doo;
 float x0;
 float y0;
 float z0;
 float theta1;
 float theta2;
 float theta3;

// inverse kinematics
 // helper functions, calculates angle theta1 (for YZ-pane)
 int delta_calcAngleYZ(float x0, float y0, float z0, float &theta) {
     fpuMicros = micros();
     //float y1 = -0.5 * 0.57735 * f; // f/2 * tg 30
     y0 -= g;    // shift center to edge
     // z = a + b*y



     float a = (x0*x0 + y0*y0 + z0*z0 + stuff)/(2*z0);
     float b = (y1-y0)/z0;
     //
     // discriminant
     float d = -(a+b*y1)*(a+b*y1)+rf*(b*b*rf+rf); 
     if (d < 0) return -1; // non-existing point
     float yj = (y1 - a*b - sqrt(d))/(b*b + 1); // choosing outer point
     float zj = a + b*yj;
     theta = 180.0*atan(-zj/(y1 - yj))/pi + ((yj>y1)?180.0:0.0);
//Serial.println(micros()-fpuMicros);     
     return 0;

 }
 
 // inverse kinematics: (x0, y0, z0) -> (theta1, theta2, theta3)
 // returned status: 0=OK, -1=non-existing position
 int delta_calcInverse(float x0, float y0, float z0, float &theta1, float &theta2, float &theta3) {
         
     theta1 = theta2 = theta3 = 0;
     int status = delta_calcAngleYZ(x0, y0, z0, theta1);
     if (status == 0) status = delta_calcAngleYZ(x0*cos120 + y0*sin120, y0*cos120-x0*sin120, z0, theta2);  // rotate coords to +120 deg
     if (status == 0) status = delta_calcAngleYZ(x0*cos120 - y0*sin120, y0*cos120+x0*sin120, z0, theta3);  // rotate coords to -120 deg
     return status;
 }