Try this:

Code:

#define ARM_MATH_CM3

#include <arm_math.h>

float32_t data[5] = {1, 2, 3, 4, 5};

float32_t *d = data;

float32_t *pResult;

void setup() {

Serial.begin(9600);

}

void loop() {

arm_rms_f32(d, 5, pResult);

while(1){

}

}

void arm_rms_f32(

float32_t * pSrc,

uint32_t blockSize,

float32_t * pResult)

{

float32_t sum = 0.0f; /* Accumulator */

float32_t in; /* Tempoprary variable to store input value */

uint32_t blkCnt; /* loop counter */

#ifndef ARM_MATH_CM0

/* Run the below code for Cortex-M4 and Cortex-M3 */

/* loop Unrolling */

blkCnt = blockSize >> 2u;

/* First part of the processing with loop unrolling. Compute 4 outputs at a time.

** a second loop below computes the remaining 1 to 3 samples. */

while(blkCnt > 0u)

{

/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */

/* Compute sum of the squares and then store the result in a temporary variable, sum */

in = *pSrc++;

sum += in * in;

in = *pSrc++;

sum += in * in;

in = *pSrc++;

sum += in * in;

in = *pSrc++;

sum += in * in;

/* Decrement the loop counter */

blkCnt--;

}

/* If the blockSize is not a multiple of 4, compute any remaining output samples here.

** No loop unrolling is used. */

blkCnt = blockSize % 0x4u;

#else

/* Run the below code for Cortex-M0 */

/* Loop over blockSize number of values */

blkCnt = blockSize;

#endif /* #ifndef ARM_MATH_CM0 */

while(blkCnt > 0u)

{

/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */

/* Compute sum of the squares and then store the results in a temporary variable, sum */

in = *pSrc++;

sum += in * in;

/* Decrement the loop counter */

blkCnt--;

}

/* Compute Rms, store the result in the destination and print it */

Serial.println(sqrt(sum / (float32_t) blockSize));

}

#include <arm_math.h>

float32_t data[5] = {1, 2, 3, 4, 5};

float32_t *d = data;

float32_t *pResult;

void setup() {

Serial.begin(9600);

}

void loop() {

arm_rms_f32(d, 5, pResult);

while(1){

}

}

void arm_rms_f32(

float32_t * pSrc,

uint32_t blockSize,

float32_t * pResult)

{

float32_t sum = 0.0f; /* Accumulator */

float32_t in; /* Tempoprary variable to store input value */

uint32_t blkCnt; /* loop counter */

#ifndef ARM_MATH_CM0

/* Run the below code for Cortex-M4 and Cortex-M3 */

/* loop Unrolling */

blkCnt = blockSize >> 2u;

/* First part of the processing with loop unrolling. Compute 4 outputs at a time.

** a second loop below computes the remaining 1 to 3 samples. */

while(blkCnt > 0u)

{

/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */

/* Compute sum of the squares and then store the result in a temporary variable, sum */

in = *pSrc++;

sum += in * in;

in = *pSrc++;

sum += in * in;

in = *pSrc++;

sum += in * in;

in = *pSrc++;

sum += in * in;

/* Decrement the loop counter */

blkCnt--;

}

/* If the blockSize is not a multiple of 4, compute any remaining output samples here.

** No loop unrolling is used. */

blkCnt = blockSize % 0x4u;

#else

/* Run the below code for Cortex-M0 */

/* Loop over blockSize number of values */

blkCnt = blockSize;

#endif /* #ifndef ARM_MATH_CM0 */

while(blkCnt > 0u)

{

/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */

/* Compute sum of the squares and then store the results in a temporary variable, sum */

in = *pSrc++;

sum += in * in;

/* Decrement the loop counter */

blkCnt--;

}

/* Compute Rms, store the result in the destination and print it */

Serial.println(sqrt(sum / (float32_t) blockSize));

}

Output:

Quote

3.32

Regards!