Not passing array correctly to function

Hi am thinking of using the algorithm provided here as the basis for determining the line of best fit:

Here is the code adapted for the Arduino Mega:

#include <Wire.h>
#include <math.h>
//using namespace std;

struct Point
{
   double x;
   double y;
};

Point Collection [6][2] = {
    {1,9.3},
    {2,11.5},
    {3,12.25},
    {4,13.1},
    {5,14.2},
    {6,16.3}
};

void setup() {
    // fire up the serial interface for the monitor
    Serial.begin(115200);      // maximum for Mega 2560#
    // Set the headings and tab stops
    Serial.println("Linear Regression and Method of Least Squares");
    Serial.println("===================================================================================="); Serial.println();
}

void loop() {
  leastSqrRegression(*Collection, 6);
  while (1);
}

void leastSqrRegression(struct Point* xyCollection, int dataSize)
{
   if (xyCollection == NULL || dataSize == 0)
   {
      Serial.print("Empty data set!");
      return;
   }

   double SUMx = 0;     //sum of x values
   double SUMy = 0;     //sum of y values
   double SUMxy = 0;    //sum of x * y
   double SUMxx = 0;    //sum of x^2
   double SUMres = 0;   //sum of squared residue
   double res = 0;      //residue squared
   double slope = 0;    //slope of regression line
   double y_intercept = 0; //y intercept of regression line
   double SUM_Yres = 0; //sum of squared of the discrepancies
   double AVGy = 0;     //mean of y
   double AVGx = 0;     //mean of x
   double Yres = 0;     //squared of the discrepancies
   double Rsqr = 0;     //coefficient of determination

   //calculate various sums 
   for (int i = 0; i < dataSize; i++)
   {
     Serial.print(i); Serial.print(" : "); Serial.print((xyCollection + i)->x); Serial.print("\t"); Serial.println((xyCollection + i)->y);
      //sum of x
      SUMx = SUMx + (xyCollection + i)->x;
      //sum of y
      SUMy = SUMy + (xyCollection + i)->y;
      //sum of squared x*y
      SUMxy = SUMxy + (xyCollection + i)->x * (xyCollection + i)->y;
      //sum of squared x
      SUMxx = SUMxx + (xyCollection + i)->x * (xyCollection + i)->x;
   }

   //calculate the means of x and y
   AVGy = SUMy / dataSize;
   AVGx = SUMx / dataSize;

   //slope or a1
   slope = (dataSize * SUMxy - SUMx * SUMy) / (dataSize * SUMxx - SUMx*SUMx);

   //y itercept or a0
   y_intercept = AVGy - slope * AVGx;
   Serial.println(); Serial.println("----------------------------------------------------------------------");
   Serial.print("x mean(AVGx) = "); Serial.print(AVGx); Serial.print("\t");
   Serial.print("y mean(AVGy) = "); Serial.println(AVGy);

   Serial.println ("The linear equation that best fits the given data:");
   Serial.print ("   y = "); Serial.print(slope); Serial.print(" * x + "); Serial.println(y_intercept);
   Serial.println ("----------------------------------------------------------------------");
   Serial.println ("   Original (x,y)   (y_i - y_avg)^2     (y_i - a_o - a_1*x_i)^2");
   Serial.println ("----------------------------------------------------------------------");

   //calculate squared residues, their sum etc.
   for (int i = 0; i < dataSize; i++) 
   {
      //current (y_i - a0 - a1 * x_i)^2
      Yres = pow(((xyCollection + i)->y - y_intercept - (slope * (xyCollection + i)->x)), 2);

      //sum of (y_i - a0 - a1 * x_i)^2
      SUM_Yres += Yres;

      //current residue squared (y_i - AVGy)^2
      res = pow((xyCollection + i)->y - AVGy, 2);

      //sum of squared residues
      SUMres += res;
      
      Serial.print("\t");
      Serial.print((xyCollection + i)->x); Serial.print("\t");
      Serial.print((xyCollection + i)->y); Serial.print("\tres: ");
      Serial.print(res); Serial.print("\tYres: ");
      Serial.println(Yres);
   }

   //calculate r^2 coefficient of determination
   Rsqr = (SUMres - SUM_Yres) / SUMres;
   Serial.println();
   Serial.println("------------------------------------------------------------");
   Serial.print("Sum of (y_i - y_avg)^2 =             "); Serial.println(SUMres,3);
   Serial.print("Sum of (y_i - a_o - a_1*x_i)^2 =     "); Serial.println(SUM_Yres,3);
   Serial.print("Standard deviation(St) =             "); Serial.println( sqrt(SUMres / (dataSize - 1)),3);
   Serial.print("Standard error of the estimate(Sr) = "); Serial.println(sqrt(SUM_Yres / (dataSize-2)),3);
   Serial.print("Coefficent of determination(r^2) =   "); Serial.println((SUMres - SUM_Yres)/SUMres,3);
   Serial.print("Correlation coefficient(r) =         "); Serial.println(sqrt(Rsqr),3);
}

The code is almost working. Equally it is not working at all as expected.

Here is the sample output:

Linear Regression and Method of Least Squares

0 : 1.00 9.30
1 : 0.00 0.00
2 : 2.00 11.50
3 : 0.00 0.00
4 : 3.00 12.25
5 : 0.00 0.00


x mean(AVGx) = 1.00 y mean(AVGy) = 5.51
The linear equation that best fits the given data:
y = 4.50 * x + 1.01

Original (x,y) (y_i - y_avg)^2 (y_i - a_o - a_1*x_i)^2

1.00 9.30 res: 14.38 Yres: 14.38
0.00 0.00 res: 30.34 Yres: 1.02
2.00 11.50 res: 35.90 Yres: 2.23
0.00 0.00 res: 30.34 Yres: 1.02
3.00 12.25 res: 45.45 Yres: 5.10
0.00 0.00 res: 30.34 Yres: 1.02


Sum of (y_i - y_avg)^2 = 186.752
Sum of (y_i - a_o - a_1*x_i)^2 = 24.752
Standard deviation(St) = 6.111
Standard error of the estimate(Sr) = 2.488
Coefficent of determination(r^2) = 0.867
Correlation coefficient(r) = 0.931

As you can see I have added a few additional print statements to highlight what is happening in the first loop. It seems as if the array is not being passed correctly. The first data pair is {1,9.3} which is passed OK; the second data pair appears as (0,0); the third data pair is in fact the second pair {2,11.5} and so on. The correct solution should be f(x) = 1.28 * x + 8.3143.

I have been racking my brains on this for hours and finally given in. It is not likely the loop is incrementing incorrectly so either the pointer to the array is not being passed correctly or the loop is not accessing the pointers correctly and therefore failing to point to the correct data values. Can anyone help me spot where I have introduced an error (and why)?

Thanks,
Ric

You should pass Collection on its own. The array name (reference) is already a pointer to the array.

  leastSqrRegression(*Collection, 6);

This is a big red flag. Re-write the function so you can just pass it Collection.

void loop() {
  leastSqrRegression(*Collection, 6);
  while (1);
}

If you only want to call the function once, why are you calling it in loop()? Call it in setup() instead, and leave loop() empty.

marco_c:
You should pass Collection on its own. The array name (reference) is already a pointer to the array.

This is what I did originally but then the following error was given:

29: error: cannot convert 'Point ()[2]' to 'Point' for argument '1' to 'void leastSqrRegression(Point*, int)'

  leastSqrRegression(Collection, 6);

Ric

this...

struct Point
{
   double x;
   double y;
};

and this...

Point Collection [6][2] = {
    {1,9.3},
    {2,11.5},
    {3,12.25},
    {4,13.1},
    {5,14.2},
    {6,16.3}
};

What you're doing there is creating a two dimensional array of points, so 12 points total. I can't be totally sure, but it doesn't look like that is your intent. From your usage of Collection, it looks like you're really trying to just create a 1 dimensional array of 6 points and assign the x and y values using array initialization notation.

If the latter is what you're trying to accomplish, just drop the second dimension, use...

Point Collection [6] = {

From your usage of Collection, it looks like you're really trying to just create a 1 dimensional array of 6 points

The data in the array is for 6 points, each comprising an ordered pair, x,y. My understanding of the struct Point is that each instance of Point comprises two values, double x and double y. So yes, the array was set up (incorrectly (I might say, stupidly)) to provide just 6 data points.

Now that the array is correctly sized I can pass just "Collection" as I had it originally.

Really I am very grateful. Well spotted! Truly, well spotted!

Thanks,
Ric