A hard question about fft

Here is a very simple and slow example of a DFT:

// sample data array
float a[8] = {0, 0, 0, 0, 10, 10, 10, 10}; //square wave signal, period = 8 x sample time

void setup() {
 Serial.begin(9600);

 //sample time = 10 ms, signal frequency in this case = 1/period = 12.5 Hz.
 //DFT calculated for 0 to 40 Hz in steps of 0.5 Hz, assuming signal repeats 4 times, no window
 // expect peaks at 0Hz (DC offset), 12.5 Hz, 37.5 Hz, ...

 dft (a, 8, 10, 0, 40, 0.5, 4, 0);

}

void loop() {
}

// from https://www.instructables.com/Arduino-Frequency-Transform-DFT/
// some errors corrected SJR 10/2021
/*
 8 TERMS THAT NEED TO BE SPECIFIED
 1: an array of which dft need to be taken
 2:size of the array
 3:time interval between 2 reading in array in milliSECONDS
 4:lower value of frequency range in Hz
 5:upper value of frequency range in Hz
 6:size of steps for frequency range
 7:repetitions of signal array (minimum 1) higher number better accuracy but increased solution time
 8:  0 for no window, 1 for flat-top window, 2 for Hann window, 3 for Hamming window
     (if you do not have any idea about selecting window keep default 3)
 example:
     dft(a,110,0.5,0,30,0.5,10,3);
     here a is an array of size 110 element to be checked for 0 Hz to 30 Hz with 0.5 step (0,0.5,1,1.5, ... ,29,29.5,30)
     10 repetition and hamming window
     by- ABHILASH PATEL
*/

float dft(float a[], int arraysize, float interval, float f0, float fn, float stepsize, int reps, int window)
{
 float mag, sumi, sumr, ti, tr;
 int j, k;
 float twopi = 2.0 * PI;
 Serial.print("data array repetitions = ");
 Serial.println(reps);

 // apply windowing function
 if (window == 1) //flat-top window
 {
   for (int i = 0; i < arraysize; i++)
   {
     float b = PI * i / (arraysize);
     a[i] = a[i] * ( 1 - (1.93 * cos(2 * b)) + (1.29 * cos(4 * b)) - (0.388 * cos(6 * b)) + (0.028 * cos(8 * b)));
     // Serial.println(a[i]);
   }
 }

 if (window == 2) //hann window
 {
   for (int i = 0; i < arraysize; i++)
   {
     float b = twopi * i / (arraysize);
     a[i] = a[i] * 0.5 * (1 - cos(b));
     //Serial.println(a[i]);
   }
 }

 if (window == 3) //hamming window
 {
   for (int i = 0; i < arraysize; i++)
   {
     float b = twopi * i / (arraysize);
     a[i] = a[i] * (0.54 - 0.46 * cos(b));
   //  Serial.println(a[i]);
   }
 }

 // DFT calculation

 for (float f = f0; f <= fn; f = f + stepsize)
 {

   sumi = sumr = 0.0;
   k = 0;
   for (int i = 0; i < (arraysize * reps); i++)
   {
     j = i - k;
     if (j >= arraysize) {
       k = k + arraysize;  //signal repeat index offset
     }
     ti = a[j] * (sin(twopi * f * i * interval * 0.001)); //0.001 => time in seconds
     tr = a[j] * (cos(twopi * f * i * interval * 0.001));
     sumi = sumi + ti;
     sumr = sumr + tr;
   }
   mag = sqrt(sumi * sumi + sumr * sumr) / (arraysize * reps);
   Serial.print(f);
   Serial.print("\t");
   Serial.println(mag);
 }

}