Sound module for Arduino Uno is not working properly

Hi guys, I'm working on a project using this sound module connected to an Arduino Uno:
Link

However it is not working properly, even when I wired it correctly according to this website:
Website

I checked it a lot to ensure I wired it correctly so I don't think that's the issue.

Heres the original code I ran that didn't work:

#define SOUND_INPUT A0
#define SOUND_TRIGGER 7  // still used for presence detection if needed

const byte MAX_USERS = 3;
const unsigned int RECORD_DURATION = 7000;   // 7 seconds
const unsigned int LOGIN_DURATION = 15000;   // 15 seconds
const byte SLICE_DURATION = 150;             // Check every 150 ms
const byte ID_LENGTH = 15;                   // Max characters per ID

const byte RECORD_SLICES = RECORD_DURATION / SLICE_DURATION; // 46
const int LOGIN_SLICES = LOGIN_DURATION / SLICE_DURATION;    // 100
const int MATCH_TOLERANCE = 100;  // Allowed deviation per sample when matching

// Store analog sound patterns and user IDs
int userPatterns[MAX_USERS][RECORD_SLICES];
char userIDs[MAX_USERS][ID_LENGTH];
byte userCount = 0;

void setup() {
  Serial.begin(9600);
  while (!Serial);
  Serial.println("Voice Recognition Simulation Ready.");
}

void loop() {
  Serial.println("Type '1' to register or '2' to login:");
  while (!Serial.available());
  char option = Serial.read();
  while (Serial.available()) Serial.read(); // flush extras

  if (option == '1') {
    registerUser();
  } else if (option == '2') {
    loginUser();
  } else {
    Serial.println("Invalid option. Try again.");
  }
}

void registerUser() {
  if (userCount >= MAX_USERS) {
    Serial.println("User limit reached.");
    return;
  }

  Serial.println("Prepare to speak. Recording in 3 seconds...");
  delay(3000);
  Serial.println("Recording now for 7 seconds...");

  for (int i = 0; i < RECORD_SLICES; i++) {
    userPatterns[userCount][i] = analogRead(SOUND_INPUT);
    Serial.println(SOUND_INPUT);
    delay(SLICE_DURATION);
  }

  Serial.println("Recording complete.");
  Serial.println("Enter a user ID to associate (max 14 characters):");

  // Flush buffer and wait for clean input
  while (Serial.available()) Serial.read();
  while (!Serial.available());

  readLine(userIDs[userCount], ID_LENGTH);
  Serial.print("User registered with ID: ");
  Serial.println(userIDs[userCount]);
  userCount++;
}

void loginUser() {
  Serial.println("Prepare to speak. Listening in 3 seconds...");
  delay(3000);
  Serial.println("Listening for 15 seconds...");

  int loginPattern[LOGIN_SLICES];
  unsigned long startTime = millis();

  for (int i = 0; i < LOGIN_SLICES; i++) {
    loginPattern[i] = analogRead(SOUND_INPUT);
    Serial.println(SOUND_INPUT);
    delay(SLICE_DURATION);
  }

  unsigned long endTime = millis();
  float timeSpentSeconds = (endTime - startTime) / 1000.0;
  Serial.print("Actual listening time: ");
  Serial.print(timeSpentSeconds, 2);
  Serial.println(" seconds");

  Serial.println("Listening complete.");
  Serial.println("Enter user ID to attempt login:");

  while (Serial.available()) Serial.read();
  while (!Serial.available());

  char inputID[ID_LENGTH];
  readLine(inputID, ID_LENGTH);

  Serial.print("Attempting login with ID: ");
  Serial.println(inputID);

  int userIndex = -1;
  for (int i = 0; i < userCount; i++) {
    if (strcmp(userIDs[i], inputID) == 0) {
      userIndex = i;
      break;
    }
  }

  if (userIndex == -1) {
    Serial.println("User ID not found.");
    return;
  }

  // Attempt to find matching 7s pattern within 15s login window
  bool match = false;
  for (int i = 0; i <= LOGIN_SLICES - RECORD_SLICES; i++) {
    bool windowMatch = true;
    for (int j = 0; j < RECORD_SLICES; j++) {
      int diff = abs(loginPattern[i + j] - userPatterns[userIndex][j]);
      if (diff > MATCH_TOLERANCE) {
        windowMatch = false;
        break;
      }
    }
    if (windowMatch) {
      match = true;
      break;
    }
  }

  if (match) {
    Serial.println("Login successful!");
  } else {
    Serial.println("Login failed: Pattern mismatch.");
  }
}

// Reads a full line of serial input into a char array
void readLine(char* buffer, byte length) {
  byte i = 0;
  while (true) {
    if (Serial.available()) {
      char c = Serial.read();
      if (c == '\n' || c == '\r') {
        buffer[i] = '\0';
        break;
      }
      if (i < length - 1) {
        buffer[i++] = c;
      }
    }
  }
}

Heres the result I got in the serial monitor after I ran it:

"Voice Recognition Simulation Ready.
Type '1' to register or '2' to login:
(I typed 1)
Prepare to speak. Recording in 3 seconds...
Recording now for 7 seconds...

14

14

14

14

14

14

14

14...

Recording complete.

Enter a user ID to associate (max 14 characters):
(I typed ID #1)

User registered with ID: ID #1

Type '1' to register or '2' to login:
(I typed 2)

Prepare to speak. Listening in 3 seconds...

Listening for 15 seconds...
14

14

14

14

14

14

14

14

14

14

14

Actual listening time: 15.02 seconds

Listening complete.

Enter user ID to attempt login:
(I typed ID #1)

Attempting login with ID: ID #1

Login successful!

Type '1' to register or '2' to login: "

As you can see, the repetitive 14's are an issue. Ive eliminated a significant part of the issue to be the sound module and not the code because when I ran this simple sketch:

const int microphonePin = A0;


void setup() {
  Serial.begin(9600);

}

void loop() {

    int val = analogRead(microphonePin);
    Serial.println(val);
delay (1000);




}

These are the readings I got in the serial monitor:

"13
12
13
13
12
12
12
13
12
12
12
13
12"
In that time frame, I screamed, slammed the table and did all sorts of things but couldn't get a huge difference in the readings. When I unplugged it however I got these readings:
"224
338
377
414
448
476"

Which was a huge jump from before.

For context, my goal with the sound module is to distinguish between different human voices by assigning each user ID a list of numerical values that represent the voltage fluctuations caused by their voice saying the same word from the same distance. The system listens for 15 seconds to give me some buffer time, and within that interval, it checks if any 7-second segment matches the 7-second voice pattern associated with the ID I'm trying to log in with. If a match is found, it's considered a successful identification.

From my research, I realize this method is quite unreliable due to the lack of precision and accuracy—this module is essentially just a basic sound detector. Still, it's a starting point that helps me ease into the process without jumping straight into complex solutions. Eventually, I plan to move on to more advanced hardware. That said, if anyone has suggestions for alternative modules that are better suited for voice recognition, I’d be glad to hear them—I'd much rather invest time in learning a more capable tool than spend it troubleshooting something that's ultimately limited and just a dead end.

I know that Artduino has some, and I am sure SEEED Studio will also, not sure about Espressif, but there is a good chance since AI is all the rage. Just click the hardware tab at the top somewhere on this Arduino site. Also, asking Google or an AI what boards have voice recognition will likely give you a decent list.

Sales sites are useless. Datasheets are requested.

Please extract the wiring and post it here.

Google on "Arduino + voice recognition" would surely bring up useful links.

I did some research into them, but the other voice recognition modules I found were targeted towards recognizing specific commands and not the actual voice itself. Things like this: Link. Perhaps I'm missing something?

My apologies, here's the datasheet: Datasheet

Wiring used:


I didn't use the LED, and I connected to digital pin 7 and not 1, but otherwise the same thing.

When I did some research, the best I could find were modules that could only be used for recognizing voice commands and not recognizing the voice behind it, like this one mentioned earlier: Link
And I couldn't find any other modules that offered a different feature other than recognizing voice commands.

Sorry, I will not be able to help.

1 Like

When anything is connected to pins 0 or 1 on an Arduino Uno R3 you will have a problem loading code. Do not do it.

Basically that "sound module" is just a microphone with a bit of gain. You have two options with it.

  1. The output will change from high to low when the input to the microphone exceeds the voltage set by the ten turn screw pot.
  2. The output will be an analogue output of the sound signal.

This module is totally useless for what you want to because when it is wired as configuration 2) it is not sensitive enough to do what you want.

Also to recognise a voice is a lot more difficult than you think it is. It takes using an AI algorithm to perform analysis on the speech waveform.

You will not find a module to do this.

These might be useful links to read Data for AI sound recognition

Rather old now but your best bet would be:-
get-started-with-machine-learning-on-arduino

You need an Arduino Nano 33 BLE or Arduino Nano 33 sense board for this.

Note that the number of voices you want to recognise is limited, and get slower the more voices you have to check.

When I tried it a few years ago I was not very impressed.

1 Like

Where did you find that code?

Do you have a good reason to believe it will do anything useful?

In my wiring, I didn't connect to digital pin 0 or 1, I connected to 7. I viewed both the links you shared; the second only focused on gesture and speech recognition through commands. The gesture recognition is interesting but not relevant at the moment. However, the first one, specifically the use of the NXP Kinetis KL82 to recognize and differentiate environmental sounds, caught my attention. Do you think it could differentiate human voices if trained correctly in the same way it can differentiate environmental sounds? On another note, I found this AI framework that differentiate speakers but it is quite complex and needs to be integrated with a Linux system or something of the sort: Install NeMo Framework — NVIDIA NeMo Framework User Guide

However, I would prefer to use the NXP Kinetis KL82 microcontroller rather than the AI framework, since it is a microcontroller, which I have much more experience with than installing complex frameworks on Linux systems.

I made it, I hoped it could differentiate human voices through converting the analog signal to digital and then comparing the stored voltage propagations with the one that's being used to sign in. But it didn't work in practice. I haven't gone as far as to test the theory of it yet since the readings im getting even with proper wiring and code are wacky but I found that even if it was working, the sound module is incapable of differentiating complex sound waves like human voices so its useless to continue down this rabbit hole.

When I read the linked data sheet, I see the analog voltage is the VOLUME LEVEL of the sound. The digital output is based on the same volume level, but switched 0 to 1 and back based on the POT setting.

The unit DOES NOT do anything with the sound frequency which is what your project needs.

That sort of approach actually does work, if done at very fast sample rate and making the right sort of comparison (the simplest is time shifted cross-correlation).

But it is not very accurate: unforgiving of slight changes in speech rate, intonation and pronunciation. And of course it is completely speaker-dependent.

The thing is that if you record one sound twice, the numbers you get back are totally different each time. This is because there is no synchronisation between the two recordings, so they both start gathering A/D readings at different points on the input waveform. Therefore you need to do "something" with the raw recordings to work out the differences and similarities between the two.

What ever you do you will need to train your system, irrespective of the way it is actually implemented. I would have thought that some sort of neural network might be the way to do this. Neural network

I first saw the technique used in the early 1970s as an undergraduate attending a lecture given in an after hours session by visiting academics. It had only 16 neutrals and used TTL logic to implement it. Even so it was quite successful. Since then chips containing hundreds of thousands of neutrals have been made, and there used to be lots of four inch thick plus books, about them in the academic book shops.

Anyway try looking up "voice prints" for the sort of thing you need to use. It is possible but, as you have found, it is way way more complex than you first thought.