Recording and converting PDM audio data into a wav. file using Arduino Giga Display Microphone

Hello,

I just wanted to ask this forum and get some guidance about the possibility of using the built in microphone on the Arduino Giga Display shield for recording and saving audio data as a wav. file to an SD card. The end goal of this project is to then send this audio file to OpenAI's Whisper to use as a cloud based speech to text service. So far I have run problems while researching. It looks like the microphone on the Giga display shield records audio as PDM data, for which wav. files are PCM audio and thus I would have to somehow convert between the two. In addition, I looked into maybe using the built in 3.5 jack on the Giga to plug in a microphone, but I am not sure if this is possible because I cannot find anything (non AI generated responses) that say that the Giga has I2S which can directly turn the audio captured by the mic into PCM audio. I would like clarification on this if anyone knows better/has experience with this.

Sincerely,
Louis

A .wav file is typically just a header followed by 16 bit analog data sampled at 44.1 kHz.

It is far from being the only format suitable for transmitting sound to some on line service, but you can read more about .wav file formats here.

I typically dump raw 16 bit analog sound samples to SD card in binary format, then convert the data to .wav format using something like this on a PC:

/* make_wav.c
 * base code written by Kevin Karplus
 * Reads raw int16_t audio data file and creates .wav formatted output
 */
 //  https://karplus4arduino.wordpress.com/2011/10/08/making-wav-files-from-c-programs/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "make_wav.h"

#define S_RATE  (16000)

//int16_t buffer[BUF_SIZE];

// Function prototypes
int getFileSize(FILE* inFile);
char filename[50]={0};

int main(int argc, char * argv)
{
    int i;
    printf("\nInput file? ");
    scanf("%s",&filename);
    FILE* wavFile = fopen(filename, "rb");
    if (!wavFile)
    {
        fprintf(stderr, "Unable to open %s\n", filename);
        return 1;
    }
    int filelength = getFileSize(wavFile);
    printf("File contains %d bytes\n",filelength);

    // create local buffer for entire audio snippet
    const int buf_size=filelength/2;
    int16_t buffer[buf_size];

    float x=0, x2=0, xmax=-1E6, xmin=1E6;
    float t;
    int bytesRead = fread(buffer, 2, buf_size, wavFile);
    for (i=0; i<buf_size; i++) {
    t=buffer[i];
    x += t;
    x2 += t*t; //for rms calc, if desired

    if (xmax < t) xmax=t;
    if (xmin > t) xmin=t;
    buffer[i]=t*5;

  //dump
  //  if (i> 34040 && i<34050) printf(" >%d, %d\n",i,buffer[i]);
    }
    printf ("xbar = %8.1f, xmax = %8.1f, xmin = %8.1f\n", x/buf_size, xmax, xmin);
    // output file
    char outfile[20];
    char dot[2]=".";
    char *token;
    token = strtok(filename,dot);
    snprintf(outfile, sizeof(outfile),"%s.wav",token);
    write_wav(outfile, buf_size, buffer, S_RATE);
    printf("%s written\n",outfile);

    return 0;
}
// find the file size
int getFileSize(FILE* inFile)
{
    int fileSize = 0;
    fseek(inFile, 0, SEEK_END);

    fileSize = ftell(inFile);

    fseek(inFile, 0, SEEK_SET);
    return fileSize;
}

make_wav.h

/* make_wav.h
 * Fri Jun 18 17:06:02 PDT 2010 Kevin Karplus
 https://karplus4arduino.wordpress.com/2011/10/08/making-wav-files-from-c-programs/
 */

#ifndef MAKE_WAV_H
#define MAKE_WAV_H
#include <inttypes.h>

void write_wav(char * filename, uint32_t num_samples, int16_t * data, uint32_t s_rate);
    /* open a file named filename, write signed 16-bit values as a
        monoaural WAV file at the specified sampling rate
        and close the file
    */

#endif

make_wav.c

/* make_wav.c
 https://karplus4arduino.wordpress.com/2011/10/08/making-wav-files-from-c-programs/
 * Creates a WAV file from an array of ints.
 * Output is monophonic, signed 16-bit samples
 * copyright
 * Fri Jun 18 16:36:23 PDT 2010 Kevin Karplus
 * Creative Commons license Attribution-NonCommercial
 *  http://creativecommons.org/licenses/by-nc/3.0/
 */
// fixed by using write binary "wb" sjr

#include <stdio.h>
#include "make_wav.h"
#include <assert.h>
#include <inttypes.h>

void write_little_endian(uint32_t word, uint32_t num_bytes, FILE *wav_file)
{
    uint8_t buf;
    while(num_bytes>0)
    {   buf = word & 0xff;
        fwrite(&buf, 1,1, wav_file);
        num_bytes--;
    word >>= 8;
    }
}

/* information about the WAV file format from
    http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
 */

void write_wav(char * filename, uint32_t num_samples, int16_t * data, uint32_t s_rate)
{
    FILE* wav_file;
    uint32_t sample_rate;
    uint16_t num_channels;
    uint16_t bytes_per_sample;
    uint32_t byte_rate;
    uint32_t i;    /* counter for samples */

    num_channels = 1;   /* monoaural */
    bytes_per_sample = 2;

    if (s_rate<=0) sample_rate = 44100;
    else sample_rate = (uint32_t) s_rate;

    byte_rate = sample_rate*num_channels*bytes_per_sample;

    printf("writing wav file, samples %u\n", num_samples);
    printf("bytes per sample %u, channels %u, sample rate %u\n",bytes_per_sample,
           num_channels,sample_rate);

    wav_file = fopen(filename, "wb"); //binary

    assert(wav_file);   /* make sure it opened */

    /* write RIFF header */
    fwrite("RIFF", 1, 4, wav_file);
    write_little_endian(36 + bytes_per_sample* num_samples*num_channels, 4, wav_file);
    fwrite("WAVE", 1, 4, wav_file);

    /* write fmt  subchunk */
    fwrite("fmt ", 1, 4, wav_file);
    write_little_endian(16, 4, wav_file);   /* SubChunk1Size is 16 */
    write_little_endian(1, 2, wav_file);    /* PCM is format 1 */
    write_little_endian(num_channels, 2, wav_file);
    write_little_endian(sample_rate, 4, wav_file);
    write_little_endian(byte_rate, 4, wav_file);
    write_little_endian(num_channels*bytes_per_sample, 2, wav_file);  /* block align */
    write_little_endian(8*bytes_per_sample, 2, wav_file);  /* bits/sample */

    /* write data subchunk */
    fwrite("data", 1, 4, wav_file);
    write_little_endian(bytes_per_sample*num_samples*num_channels, 4, wav_file);
    printf("data chunk size %ld\n",bytes_per_sample* num_samples*num_channels);
    for (i=0; i< num_samples; i++)
    {
        write_little_endian((uint16_t)(data[i]),bytes_per_sample, wav_file);
    }

    fclose(wav_file);
}

It's PCM digital data, of course. There's not analog data in a computer. :wink:

I don't know that much about PDM or how to convert but the Audacity website has a little tutorial about how regular PCM works: Every sample (i.e. 44,100 samples per second for CD audio) represents the wave amplitude (or height) at one instant in time.

I also don't know about the Giga but I wouldn't be surprised if the PDM is converted to analog (which I believe is easy) and then you'd be able to sample it with a regular ADC to get regular PCM.

That phrase, and the keyword "sampled", defines the stored data as digital. That could hardly be made clearer.

Other sample rates are accommodated in the format, and 16 kHz is popular with MCUs and is fine for quality voice recording.

.wav files are PCM also.
You just need to add the header to the file to be recognized as audio.

Never did it inside arduino ide (in matlab is just 1 line of code, in Python might be similar).

Thank you for the response. So do you think the PDM data from the Giga Display microphone can be converted to .wav format? And would it be possible to do this conversion directly on the the Giga instead of on a PC.

For the PDM audio, I'm heavily referencing this documentation:
https://docs.arduino.cc/tutorials/giga-display-shield/microphone-tutorial/

But because I am very new at coding and know very little about how audio is stored as digital/analog data on computers. I'm pretty lost. Right now it sounds like to me I can record the PDM audio using that documentation example onto an SD card (which I don't even know how to do and will have to figure out), then convert the data to .wav using the script you sent.

So I read that Audacity article about PCM but I don;t really know how to apply the information there. Could you clarify about converting the PDM to analog? I know PDM represents an analog wave using density of ones and zeros, but If I am understanding you correctly you are saying I have to convert this to analog (I'm not sure what you mean by that, do you mean an actual analog wave?) and then sample this with a regular ADC. The Giga has ADC channels but I am not quite sure how to use these either. Essentially though you are saying I could convert the PDM binary into an actual analog wave then maybe use the ADC channel on the Giga to convert this to PCM? Or maybe I'm completely misinterpreting.

Post #2 explained how to DO that, in outline.

Your incoming analog audio is converted to array of 16 bit digital samples:

// Buffer to read samples into, each sample is 16-bits
short sampleBuffer[512];

Dump the 1024 byte (512 16-bit) sample array to an SD card and add the headers with the C code posted in post #2.

Of course you can make the Giga program more complicated and write the .wav header, but since you usually don't know the amount of audio data to be collected in advance, you have to rewind the file and update the header info with the file size before finally closing it.

The following code is an example of dumping raw samples onto SD (actually, QSPI flash) and works on the Adafruit Clue.

Since the Clue processor is relatively slow, I had to buffer up 1024 samples to avoid glitches while writing the data to the file. File size is fixed in this example to 250 blocks.

//working 4/6/2024
// upped gain to 42 (default was 20, too quiet)
/*
  This example reads audio data from the on-board PDM microphone
  and saves to a QSPI flash file audio.dat
  // 2Mb flash = 2097152 bytes, 4096 512 byte blocks
*/

#include <Adafruit_Arcada.h>

Adafruit_Arcada arcada;
#include <PDM.h>
#define SAMPLES 1024

// buffer for audio samples, each sample is 16-bits
// setting a larger buffer avoids dropping audio samples
// writes to flash are slow!

int16_t sampleBuffer[SAMPLES];

// number of samples read
volatile int samplesRead;

File file;

void setup() {
  Serial.begin(115200);
  while (!Serial) yield();

  // configure the data receive callback
  PDM.onReceive(onPDMdata);


  if (!arcada.arcadaBegin()) {
    while (1);
  }
  //Arcada_FilesystemType
  arcada.filesysBegin(ARCADA_FILESYS_QSPI);

  file = arcada.open("/audio.dat", O_CREAT | O_WRITE);
  if (!file) {
    Serial.println("\r output file open failure");
    while (1) yield();
  }
  PDM.setBufferSize(2048);  //bytes!
  // initialize PDM with:
  // - one channel (mono mode)
  // - a 16 kHz sample rate
  if (!PDM.begin(1, 16000)) {
    Serial.println("Failed to start PDM!");
    while (1) yield();
  }
  // optionally set the gain, defaults to 20
  PDM.setGain(42);
  Serial.println("recording");
}
int nframes = 250;

void loop() {
  // wait for samples to be read
  if (samplesRead) {
    int bytes_written = file.write((char *)sampleBuffer, 2*SAMPLES);
    //    Serial.println(nframes);
    nframes--;
    if (nframes == 0 || bytes_written < 2*SAMPLES) { //done, or out of space on filesys
      file.close();
      Serial.println("stopped");
      arcada.filesysListFiles();
      Serial.flush();
      while (1) yield();
    }
    samplesRead = 0;
  }
}

void onPDMdata() { //callback
  // query the number of bytes available
  int bytesAvailable = PDM.available();
   // read into the sample buffer
  PDM.read(sampleBuffer, bytesAvailable);

  // 16-bit, 2 bytes per sample
  samplesRead = bytesAvailable / 2;
}

I’ve worked with PDM mics on development boards before, and yeah, the conversion process can be confusing if you're new to audio formats. What helped me was just recording the PDM data as-is and then converting it externally. It’s more flexible when you're still figuring stuff out, you don’t have to mess with headers or estimate file lengths in advance on the board itself.

This topic was automatically closed 180 days after the last reply. New replies are no longer allowed.