#include <EEPROM.h>
#include <DNSServer.h>
#include <HTTPClient.h>
#include <HTTPUpdate.h>
#include <NTPClient.h>
#include <WiFi.h>
#include <WiFiClientSecure.h>
#include <WiFiUdp.h>
#include <WebServer.h>
#include <ESPmDNS.h>
#include <FS.h>
#include <LittleFS.h>
#include <ArduinoJson.h>
#include <TimeLib.h>
#include <vector>
#include <algorithm>
#include <ctime>
#include "Audio.h"
#include "esp_heap_caps.h"
#include "driver/temperature_sensor.h"
#include <driver/i2s.h>
// ===================================================================
// ==========================AUDIO OUTPUT============================
// ===================================================================
#define MAX98357A_I2S_DOUT 37
#define MAX98357A_I2S_BCLK 36
#define MAX98357A_I2S_LRC 35
Audio audio;
const size_t audioBufferSize = 64 * 1024;
uint8_t* audioBufferPSRAM = nullptr;
// ===================================================================
// ==========================MIC======================================
// ===================================================================
#define I2S_WS 15
#define I2S_SD 13
#define I2S_SCK 2
#define I2S_PORT I2S_NUM_0
#define I2S_SAMPLE_RATE (16000)
#define I2S_SAMPLE_BITS (16)
#define I2S_READ_LEN (16 * 1024)
#define RECORD_TIME (5)
#define I2S_CHANNEL_NUM (1)
#define FLASH_RECORD_SIZE (I2S_CHANNEL_NUM * I2S_SAMPLE_RATE * I2S_SAMPLE_BITS / 8 * RECORD_TIME)
File file;
const char filename[] = "/audio.wav";
const int headerSize = 44;
const char* transcription = "";
==========================================================
void initFile() {
LittleFS.remove(filename);
file = LittleFS.open(filename, "w");
if (!file) {
Serial.println("File is not available!");
return;
}
uint8_t header[headerSize];
wavHeader(header, FLASH_RECORD_SIZE);
file.write(header, headerSize);
scanFiles();
}
void i2sInit() {
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = I2S_SAMPLE_RATE,
.bits_per_sample = i2s_bits_per_sample_t(I2S_SAMPLE_BITS),
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 8,
.dma_buf_len = 1024,
.use_apll = false,
.tx_desc_auto_clear = true,
.fixed_mclk = 0
};
i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);
const i2s_pin_config_t pin_config = {
.bck_io_num = I2S_SCK,
.ws_io_num = I2S_WS,
.data_out_num = -1,
.data_in_num = I2S_SD
};
i2s_set_pin(I2S_PORT, &pin_config);
}
void i2s_adc_data_scale(uint8_t *d_buff, uint8_t *s_buff, uint32_t len) {
uint32_t j = 0;
uint32_t dac_value = 0;
for (int i = 0; i < len; i += 2) {
dac_value = ((((uint16_t)(s_buff[i + 1] & 0xf) << 8) | ((s_buff[i + 0]))));
d_buff[j++] = 0;
d_buff[j++] = dac_value * 256 / 2048;
}
}
void i2s_adc(void *arg) {
int i2s_read_len = I2S_READ_LEN;
int flash_wr_size = 0;
size_t bytes_read;
char *i2s_read_buff = (char *)calloc(i2s_read_len, sizeof(char));
uint8_t *flash_write_buff = (uint8_t *)calloc(i2s_read_len, sizeof(char));
i2s_read(I2S_PORT, (void *)i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
i2s_read(I2S_PORT, (void *)i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
Serial.println(" *** Recording Start *** ");
initFile();
while (flash_wr_size < FLASH_RECORD_SIZE) {
i2s_read(I2S_PORT, (void *)i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
i2s_adc_data_scale(flash_write_buff, (uint8_t *)i2s_read_buff, i2s_read_len);
file.write((const uint8_t *)flash_write_buff, i2s_read_len);
flash_wr_size += i2s_read_len;
ets_printf("Sound recording %u%%\n", flash_wr_size * 100 / FLASH_RECORD_SIZE);
ets_printf("Never Used Stack Size: %u\n", uxTaskGetStackHighWaterMark(NULL));
}
file.close();
free(i2s_read_buff);
i2s_read_buff = NULL;
free(flash_write_buff);
flash_write_buff = NULL;
scanFiles();
if (WiFi.status() == WL_CONNECTED) {
uploadaudioFile();
}
vTaskDelete(NULL);
}
void wavHeader(uint8_t *header, int wavSize) {
header[0] = 'R';
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
unsigned int fileSize = wavSize + headerSize - 8;
header[4] = (uint8_t)(fileSize & 0xFF);
header[5] = (uint8_t)((fileSize >> 8) & 0xFF);
header[6] = (uint8_t)((fileSize >> 16) & 0xFF);
header[7] = (uint8_t)((fileSize >> 24) & 0xFF);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f';
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = 0x10;
header[17] = 0x00;
header[18] = 0x00;
header[19] = 0x00;
header[20] = 0x01;
header[21] = 0x00;
header[22] = 0x01;
header[23] = 0x00;
header[24] = 0x80;
header[25] = 0x3E;
header[26] = 0x00;
header[27] = 0x00;
header[28] = 0x00;
header[29] = 0x7D;
header[30] = 0x01;
header[31] = 0x00;
header[32] = 0x02;
header[33] = 0x00;
header[34] = 0x10;
header[35] = 0x00;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (uint8_t)(wavSize & 0xFF);
header[41] = (uint8_t)((wavSize >> 8) & 0xFF);
header[42] = (uint8_t)((wavSize >> 16) & 0xFF);
header[43] = (uint8_t)((wavSize >> 24) & 0xFF);
}
void uploadaudioFile() {
Serial.println("Uploading audio file...");
WiFiClient client;
HTTPClient http;
http.begin(client, stt_url);
http.addHeader("X-API-KEY", encrypted_api_key);
http.addHeader("Content-Type", "audio/wav");
File uploadaudioFile = LittleFS.open(filename, "r");
if (!uploadaudioFile) {
Serial.println("Failed to open file for reading");
return;
}
int httpResponseCode = http.sendRequest("POST", &uploadaudioFile, uploadaudioFile.size());
if (httpResponseCode > 0) {
Serial.printf("File uploaded successfully, response code: %d\n", httpResponseCode);
String response = http.getString();
Serial.println("Response from server: ");
Serial.println(response);
DynamicJsonDocument doc(1024);
DeserializationError error = deserializeJson(doc, response);
if (!error) {
const char* transcription = doc["transcription"];
Serial.print("Transcription: ");
Serial.println(transcription);
} else {
Serial.println("Failed to parse JSON response");
}
} else {
Serial.printf("Error uploading file: %s\n", http.errorToString(httpResponseCode).c_str());
}
uploadaudioFile.close();
http.end();
}
void waitForSpeechEnd() {
while (audio.isRunning()) {
audio.loop();
yield();
}
}
void setup() {
Serial.begin(115200);
if (psramFound()) {
Serial.println("PSRAM found and ready to use");
} else {
Serial.println("No PSRAM found");
}
audioBufferPSRAM = (uint8_t*)heap_caps_malloc(audioBufferSize, MALLOC_CAP_SPIRAM);
if (audioBufferPSRAM == nullptr) {
Serial.println("Error: Failed to allocate audio buffer in PSRAM");
} else {
Serial.println("Audio buffer allocated in PSRAM");
}
i2sInit();
xTaskCreate(i2s_adc, "i2s_adc", 4096, NULL, 2, NULL);
delay(500);
audio.setPinout(MAX98357A_I2S_BCLK, MAX98357A_I2S_LRC, MAX98357A_I2S_DOUT);
audio.setVolume(100);
audio.connecttospeech("TEST TEST", "en");
waitForSpeechEnd();
}