Esp32 error 400 upload spiffs wav file to web server using http post

i was trying to transcribe a wav file generated by esp32 connected to an inmp441, then upload it to a python flask server, transcribe it using local whisper and return it to the esp32 to show it on the serial printer. my problem is that i was unable to upload the wav file, http error 400. i tried to test the whisper server with a curl command and a saved wav file and it works correctly.

#include <driver/i2s.h>
#include <SPIFFS.h>     
#include <WiFi.h>       
#include <HTTPClient.h>

#define I2S_WS 10
#define I2S_SD 11
#define I2S_SCK 12
#define I2S_PORT I2S_NUM_0
#define I2S_SAMPLE_RATE   (16000)
#define I2S_SAMPLE_BITS   (16)
#define I2S_READ_LEN      (16 * 1024)
#define RECORD_TIME       (5) //Seconds
#define I2S_CHANNEL_NUM   (1)
#define FLASH_RECORD_SIZE (I2S_CHANNEL_NUM * I2S_SAMPLE_RATE * I2S_SAMPLE_BITS / 8 * RECORD_TIME)

File file;
const char filename[] = "/recording.wav";
const int headerSize = 44;
bool isWIFIConnected;

void setup() {
  
  Serial.begin(115200);
  SPIFFSInit();
  i2sInit();
  xTaskCreate(i2s_adc, "i2s_adc", 1024 * 3, NULL, 1, NULL);
  delay(500);
  xTaskCreate(wifiConnect, "wifi_Connect", 4096, NULL, 0, NULL);
}

void loop() {
  
}

void SPIFFSInit(){
  if(!SPIFFS.begin(true)){
    Serial.println("SPIFFS initialisation failed!");
    while(1) yield();
  }

  //SPIFFS.format();
  SPIFFS.remove(filename);
  file = SPIFFS.open(filename, FILE_WRITE);
  if(!file){
    Serial.println("File is not available!");
  }

  byte header[headerSize];
  wavHeader(header, FLASH_RECORD_SIZE);

  file.write(header, headerSize);
  listSPIFFS();
}

void i2sInit(){
  i2s_config_t i2s_config = {
    .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
    .sample_rate = I2S_SAMPLE_RATE,
    .bits_per_sample = i2s_bits_per_sample_t(I2S_SAMPLE_BITS),
    .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
    .communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
    .intr_alloc_flags = 0,
    .dma_buf_count = 64,
    .dma_buf_len = 1024,
    .use_apll = 1
  };

  i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);

  const i2s_pin_config_t pin_config = {
    .bck_io_num = I2S_SCK,
    .ws_io_num = I2S_WS,
    .data_out_num = -1,
    .data_in_num = I2S_SD
  };

  i2s_set_pin(I2S_PORT, &pin_config);
}


void i2s_adc_data_scale(uint8_t * d_buff, uint8_t* s_buff, uint32_t len)
{
    uint32_t j = 0;
    uint32_t dac_value = 0;
    for (int i = 0; i < len; i += 2) {
        dac_value = ((((uint16_t) (s_buff[i + 1] & 0xf) << 8) | ((s_buff[i + 0]))));
        d_buff[j++] = 0;
        d_buff[j++] = dac_value * 256 / 2048;
    }
}

void i2s_adc(void *arg)
{
    
    int i2s_read_len = I2S_READ_LEN;
    int flash_wr_size = 0;
    size_t bytes_read;

    char* i2s_read_buff = (char*) calloc(i2s_read_len, sizeof(char));
    uint8_t* flash_write_buff = (uint8_t*) calloc(i2s_read_len, sizeof(char));

    i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
    i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
    
    Serial.println(" *** Recording Start *** ");
    while (flash_wr_size < FLASH_RECORD_SIZE) {
        //read data from I2S bus, in this case, from ADC.
        i2s_read(I2S_PORT, (void*) i2s_read_buff, i2s_read_len, &bytes_read, portMAX_DELAY);
        //example_disp_buf((uint8_t*) i2s_read_buff, 64);
        //save original data from I2S(ADC) into flash.
        i2s_adc_data_scale(flash_write_buff, (uint8_t*)i2s_read_buff, i2s_read_len);
        file.write((const byte*) flash_write_buff, i2s_read_len);
        flash_wr_size += i2s_read_len;
        ets_printf("Sound recording %u%%\n", flash_wr_size * 100 / FLASH_RECORD_SIZE);
        ets_printf("Never Used Stack Size: %u\n", uxTaskGetStackHighWaterMark(NULL));
    }
    file.close();

    free(i2s_read_buff);
    i2s_read_buff = NULL;
    free(flash_write_buff);
    flash_write_buff = NULL;
    
    listSPIFFS();

    if(isWIFIConnected){
      uploadFile();
    }
    
    vTaskDelete(NULL);
}

void example_disp_buf(uint8_t* buf, int length)
{
    printf("======\n");
    for (int i = 0; i < length; i++) {
        printf("%02x ", buf[i]);
        if ((i + 1) % 8 == 0) {
            printf("\n");
        }
    }
    printf("======\n");
}

void wavHeader(byte* header, int wavSize){
  header[0] = 'R';
  header[1] = 'I';
  header[2] = 'F';
  header[3] = 'F';
  unsigned int fileSize = wavSize + headerSize - 8;
  header[4] = (byte)(fileSize & 0xFF);
  header[5] = (byte)((fileSize >> 8) & 0xFF);
  header[6] = (byte)((fileSize >> 16) & 0xFF);
  header[7] = (byte)((fileSize >> 24) & 0xFF);
  header[8] = 'W';
  header[9] = 'A';
  header[10] = 'V';
  header[11] = 'E';
  header[12] = 'f';
  header[13] = 'm';
  header[14] = 't';
  header[15] = ' ';
  header[16] = 0x10;
  header[17] = 0x00;
  header[18] = 0x00;
  header[19] = 0x00;
  header[20] = 0x01;
  header[21] = 0x00;
  header[22] = 0x01;
  header[23] = 0x00;
  header[24] = 0x80;
  header[25] = 0x3E;
  header[26] = 0x00;
  header[27] = 0x00;
  header[28] = 0x00;
  header[29] = 0x7D;
  header[30] = 0x01;
  header[31] = 0x00;
  header[32] = 0x02;
  header[33] = 0x00;
  header[34] = 0x10;
  header[35] = 0x00;
  header[36] = 'd';
  header[37] = 'a';
  header[38] = 't';
  header[39] = 'a';
  header[40] = (byte)(wavSize & 0xFF);
  header[41] = (byte)((wavSize >> 8) & 0xFF);
  header[42] = (byte)((wavSize >> 16) & 0xFF);
  header[43] = (byte)((wavSize >> 24) & 0xFF);
  
}


void listSPIFFS(void) {
  Serial.println(F("\r\nListing SPIFFS files:"));
  static const char line[] PROGMEM =  "=================================================";

  Serial.println(FPSTR(line));
  Serial.println(F("  File name                              Size"));
  Serial.println(FPSTR(line));

  fs::File root = SPIFFS.open("/");
  if (!root) {
    Serial.println(F("Failed to open directory"));
    return;
  }
  if (!root.isDirectory()) {
    Serial.println(F("Not a directory"));
    return;
  }

  fs::File file = root.openNextFile();
  while (file) {

    if (file.isDirectory()) {
      Serial.print("DIR : ");
      String fileName = file.name();
      Serial.print(fileName);
    } else {
      String fileName = file.name();
      Serial.print("  " + fileName);
      // File path can be 31 characters maximum in SPIFFS
      int spaces = 33 - fileName.length(); // Tabulate nicely
      if (spaces < 1) spaces = 1;
      while (spaces--) Serial.print(" ");
      String fileSize = (String) file.size();
      spaces = 10 - fileSize.length(); // Tabulate nicely
      if (spaces < 1) spaces = 1;
      while (spaces--) Serial.print(" ");
      Serial.println(fileSize + " bytes");
    }

    file = root.openNextFile();
  }

  Serial.println(FPSTR(line));
  Serial.println();
  delay(1000);
}

void wifiConnect(void *pvParameters){
  isWIFIConnected = false;
  char* ssid = "*****";
  char* password = "*****";

  WiFi.begin(ssid, password);
  WiFi.setTxPower(WIFI_POWER_8_5dBm);
  while(WiFi.status() != WL_CONNECTED){
    vTaskDelay(500);
    Serial.print(".");
  }
  isWIFIConnected = true;
  while(true){
    vTaskDelay(1000);
  }
}

void uploadFile(){
  file = SPIFFS.open(filename, FILE_READ);
  if(!file){
    Serial.println("FILE IS NOT AVAILABLE!");
    return;
  }
  Serial.println("===> Upload FILE to python flask Server");
  Serial.println(file.name());
  Serial.println(file.size());

  HTTPClient client;
  client.begin("http://192.168.1.112:5000/upload");
  client.addHeader("Content-Type", "multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW");
  String boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW";
  String body = "--" + boundary + "\r\n";
  body += "Content-Disposition: form-data; name=\"file\"; filename=\"recording.wav\"\r\n";
  body += "Content-Type: audio/wav\r\n\r\n";

  int fileSize = file.size();
  uint8_t *fileBuffer = new uint8_t[fileSize];
  file.read(fileBuffer, fileSize);
  file.close();

  // Send the file data
  int httpResponseCode = client.sendRequest("POST", body + String((char*)fileBuffer, fileSize) + "\r\n--" + boundary + "--");
    
  Serial.print("httpResponseCode : ");
  Serial.println(httpResponseCode);
  

  if(httpResponseCode == 200){
    String response = client.getString();
    Serial.println("==================== Transcription ====================");
    Serial.println(response);
    Serial.println("====================      End      ====================");
  }else{
    Serial.println("Error");
  }
  delete[] fileBuffer;
  
  client.end();
}

this is the python code

from flask import Flask, abort, request
from flask_cors import CORS
from tempfile import NamedTemporaryFile
import whisper
import torch
import logging


logging.basicConfig(level=logging.DEBUG)

# Check if NVIDIA GPU is available
torch.cuda.is_available()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load the Whisper model:
model = whisper.load_model("base")

app = Flask(__name__)

CORS(app)

@app.route("/")
def hello():
    return "Whisper service active!!"


@app.route('/upload', methods=['POST'])
def handler():
    if not request.files:
        # If the user didn't submit any files, return a 400 (Bad Request) error.
        abort(400)
    #    return "file not uploaded"

    # For each file, let's store the results in a list of dictionaries.
    results = []

    # Loop over every file that the user submitted.
    for filename, handle in request.files.items():
        # Create a temporary file.
        # The location of the temporary file is available in `temp.name`.
        temp = NamedTemporaryFile(delete=False)
        # Write the user's uploaded file to the temporary file.
        # The file will get deleted when it drops out of scope.
        handle.save(temp)
        # Let's get the transcript of the temporary file.
        result = model.transcribe(temp.name, language="spanish", fp16=False)
        # Now we can store the result object for this file.
        results.append({
            'filename': filename,
            'transcript': result['text'],
        })

    # This will be automatically converted to JSON.
    return {'results': results}
    

app.run(host='192.168.1.112', debug = True)

the arduino IDE debug

  File name                              Size
=================================================
  recording.wav                        163884 bytes
=================================================

===> Upload FILE to python flask Server
recording.wav
163884
[  9396][D][HTTPClient.cpp:293] beginInternal(): protocol: http, host: 192.168.1.112 port: 5000 url: /upload
[  9558][D][HTTPClient.cpp:574] sendRequest(): request type: 'POST' redirCount: 0

[  9781][D][HTTPClient.cpp:1112] connect():  connected to 192.168.1.112:5000
[ 11065][D][HTTPClient.cpp:1257] handleHeaderResponse(): code: 400
[ 11071][D][HTTPClient.cpp:1260] handleHeaderResponse(): size: 167
[ 11077][D][HTTPClient.cpp:618] sendRequest(): sendRequest code=400

httpResponseCode : 400
Error
[ 11084][D][HTTPClient.cpp:373] disconnect(): still data in buffer (167), clean up.

[ 11094][D][HTTPClient.cpp:380] disconnect(): tcp stop

Thanks for posting all the code, and output as code. Before getting to your stated issue.... it's excessive to start a separate task to connect to WiFi. It could just as easily be called from setup, since it just runs briefly and then the task ends with an infinite loop.

The wavHeader code is long and tediously lists 44 byte offsets. Computers are really good at counting. There's also no need to mask with 0xFF if you're assigning to byte -- any "extra" bits get lopped off anyway. You do need a cast to avoid a narrowing warning, but something like the following is more readable and less error-prone:

constexpr int headerSize = 44;

#define U32_BYTES_LE(x) static_cast<byte>(x), \
  static_cast<byte>(x >> 8), static_cast<byte>(x >> 16), static_cast<byte>(x >> 24)

void wavHeader(byte *dest, int wavSize) {
  unsigned chunkSize = wavSize + headerSize - 8;
  byte header[] = {
    'R', 'I', 'F', 'F',
    U32_BYTES_LE(chunkSize),
    'W', 'A', 'V', 'E',
    'f', 'm', 't', ' ',
    U32_BYTES_LE(0x10),
    0x01, 0x00, 0x01, 0x00, 0x80, 0x3E, 0x00, 0x00,
    0x00, 0x7D, 0x01, 0x00, 0x02, 0x00, 0x10, 0x00,
    'd', 'a', 't', 'a',
    U32_BYTES_LE(wavSize),
  };
  static_assert(sizeof(header) == headerSize);
  memcpy(dest, header, sizeof(header));
}

The static_assert catches mistakes like forgetting the space at the end of the "fmt " tag

This topic's title mentions web sockets, but I don't see any of those. You might as well print the response body when the response is not 200 -- the error message can be helpful. If you have a random boundary assigned to a variable, you should use that variable when possible; the order of these two statements can be flipped to be:

  String boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW";
  client.addHeader("Content-Type", "multipart/form-data; boundary=" + boundary);

Does your ESP32 board have PSRAM? Is it Enabled (near the bottom of the board-specific options in the bottom half of the Tools menu)? If not, the maximum String size is 64KB. If you exceed the max, the alloc/concat has no effect and the object is marked invalid. Your .wav is 100KB bigger.

You could try a .wav that is under the limit, which would have to be short and perhaps low-res, to see if all the other code works.

It would be more efficient overall to implement a Stream that can do the HTTP multipart stuff; that wraps the File, which is also a subclass of Stream. HTTPClient::sendRequest has an overload that takes a Stream.

Has the server code been tested with something like curl, which can do the upload?

curl -v -F file=@recording.wav 'http://192.168.1.112:5000/upload'

hi kenB thanks for the response, the curl command works fine, i have a esp32-s3-n16r8, the psram is enabled on the arduino IDE, i flipped the boundary string as you suggest.
im a relative newbie using esp32, could you enlighten me about to do a more efficient multipart stream.
i tried to send the audio file directly as follow but get the same http 400 error

HTTPClient client;
  client.begin("http://192.168.1.112:5000/upload");
  client.addHeader("Content-Type", "audio/wav");
  int httpResponseCode = client.sendRequest("POST", &file, file.size());
  Serial.print("httpResponseCode : ");
  Serial.println(httpResponseCode);

i recorded just 2 seconds, the file decrease to 65k but not change at all.

with this modification to the code iam able to copy the file to the server but it is corrupted. the file have 33kb and inside have the wav header but cant play it.

HTTPClient client;
  client.begin("http://192.168.1.112:5000/upload");
  String boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW";
  client.addHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
  String body = "--" + boundary + "\r\n";
  body += "Content-Disposition: form-data; name=\"file\"; filename=\"recording.wav\"\r\n";
  body += "Content-Type: audio/wav\r\n\r\n";
  body += file.readString();
  body += "\r\n--" + boundary + "--\r\n";
  // Calculate the content length
  int contentLength = body.length();
  // Set the Content-Length header
  client.addHeader("Content-Length", String(contentLength));
  int httpResponseCode = client.sendRequest("POST", body );
    
  Serial.print("httpResponseCode : ");
  Serial.println(httpResponseCode);

At any point, you can test the maximum possible String length -- considering both the hard-coded limit and available unfragmented heap -- with a single loop like this

  for (size_t z = 6000; ; z += 10000) {
    Serial.print(z);
    String s;
    if (s.reserve(z)) {
      Serial.println(" is OK");
      delay(10);
    } else {
      Serial.println(" is too big");
      delay(2500);  // if you need to pause to see the result
      break;
    }
  }

It's the same status code, 400 Bad Request, but the response body may have details for what it wrong.

In this case, the Python server code is expecting request.files; a direct upload that is not multipart will do abort(400), which won't say anything useful. (You could add a message to that.) But maybe it will when trying multipart, and there's an error with it. So definitely do the

    String response = client.getString();
    Serial.println(response);

regardless of the httpResponseCode. You could alter the server to handle both direct or multipart. The direct upload of a File as a Stream is -- as you have discovered -- much simpler on the client/Arduino side. But you should also be able to make the multipart work as well.

You mean 33KB out of 65KB? Is any data that is present actually correct, but some of it is missing (which parts); or is it all garbled with the wrong bytes? Does the header have the correct lengths after the "RIFF" and "data"? What is the Content-Length that is sent? Is that the complete file length plus the several dozen other bytes that are added?

It will take a little time to put together a multipart stream.

i was recording 5 seconds 160884bytes, the file created on the server have only 32812bytes, some of the data saved are correct.
first lines of the file uploaded>

RIFF$q WAVEfmt      €>   }ÿ  data q   ÿ                  û ý þ þ þ þ ý ý  þ þ þ ý ý ÿ ÿ         þ þ ý ý   ú ü ü û û û û   ü ü ü ü ü ú ú  ù ú ú ø ø ó ó  ñ ÷ ÷ ÷ ÷ ù ù 	 þ ü ü ü ü ÿ ÿ  ý ù ù õ õ ö ö 
 ü ú ú ÷ ÷ ý ý  ÿ ý ý ü ü ü ü   û ù ù ô ô ö ö  û ø ø ÷ ÷ ú ú  ø ÷ ÷ ù ù ø ø þ ô ö ö ø ø ù ù ý ù ÷ ÷ ø ø ù ù ù ø ù ù ù ù ÷ ÷ ÷ õ ö ö ó ó ÷ ÷ õ ÷ ô ô õ õ ö ö õ ÷ ÷ ÷ ö ö ú ú ô þ     ý ý ü ü ñ û û û       í        í 

the content length is calculated with the total body and send it as addheader

An aside about examining binary data

When dealing with binary data, use something like hexdump, which should be installed on Linux or Mac

$ hexdump -C test_8u_16.wav | head -n 4
00000000  52 49 46 46 84 64 00 00  57 41 56 45 66 6d 74 20  |RIFF.d..WAVEfmt |
00000010  12 00 00 00 01 00 01 00  80 3e 00 00 80 3e 00 00  |.........>...>..|
00000020  01 00 08 00 00 00 64 61  74 61 cd 3f 00 00 80 80  |......data.?....|
00000030  80 80 80 80 80 7f 7e 80  7e 80 7e 7e 7e 7f 7d 7f  |......~.~.~~~.}.|

I found that file in the ESP8266Audio library I happen to have installed. Looking at the (little-endian) size after "RIFF", 0x6484 is 8 less than the size of the file, 25740 bytes, as expected. The size of the "fmt " chunk happens to be 0x12 bytes instead of your 0x10; the "data" starts in the expected place accordingly, that many bytes after the four-byte size.

The file fragment you posted has some binary-to-text conversion: for one thing looks like all the 00 are presented as spaces (hex 20)

00000000  52 49 46 46 24 71 02 20  57 41 56 45 66 6d 74 20  |RIFF$q. WAVEfmt |
00000010  10 20 20 20 01 20 01 20  e2 82 ac 3e 20 20 20 7d  |.   . . ...>   }|
00000020  01 c3 bf 02 20 10 20 64  61 74 61 20 71 02 20 20  |.... . data q.  |

And looking at the text here in the forum, there is the sign at offset 24, which has 0x80 in the wavHeader function. But that's the appropriate character only on Windows; in any case, the UTF-8 encoding for it is that "e2 82 ac", as shown just above. So brute-force reversing this -- unfortunately changing any legitimate 20 to 00 (like at the end of "fmt ")

$ pbpaste | tr ' ' '\000' | iconv -f utf-8 -t windows-1252 | hexdump -C
00000000  52 49 46 46 24 71 02 00  57 41 56 45 66 6d 74 00  |RIFF$q..WAVEfmt.|
00000010  10 00 00 00 01 00 01 00  80 3e 00 00 00 7d 01 ff  |.........>...}..|
00000020  02 00 10 00 64 61 74 61  00 71 02 00 00 03 00 ff  |....data.q......|

For whatever reason, after "7d 01", at offset 31 should be "00" but is instead 0xFF. I don't know enough about the .wav format to say whether that is significant.

Anyway, the particulars of how the binary data got mangled don't matter if you can avoid it in the first place :slight_smile: The "RIFF" size is 0x027124, or 160036. That's a lot closer to 160884, but still off by eight hundred bytes, more than all the extra stuff that is added. But not nearly as bad as 32812.

Back to the main problem

To troubleshoot this, verify the size/length at each step; e.g. did concatenating the body actually work? And you don't actually have to set the Content-Length manually when sending a String, HTTPClient will do it for you.

So for example

  int contentLength = body.length();
  Serial.println(contentLength);  // did the concat work?
  int httpResponseCode = client.sendRequest("POST", body );

Unfortunately, that doesn't tell you about the next step: how many bytes were actually sent. If you use a Stream, HTTPClient will print a debug message with that info. ESP32 actually has a StreamString that combines the two (HTTPClient uses it). You'll need

#include <StreamString.h>

and then change the body initialization slightly

  StreamString body;
  body += "--" + boundary + "\r\n";

Unlike with a String, for a Stream, the sendRequest takes a pointer; and because it's a stream, you need to pass the length, otherwise it won't set the Content-Length

  int contentLength = body.length();
  Serial.println(contentLength);  // did the concat work?
  int httpResponseCode = client.sendRequest("POST", &body, contentLength);

With Debug messages enabled, the output will be something like

[  2134][D][HTTPClient.cpp:812] sendRequest(): Stream payload written: 161000
[  3063][D][HTTPClient.cpp:1257] handleHeaderResponse(): code: 200

(I made up that 161000.) Does it match the contentLength, or the 33KB?

i will try your sugestions and get back.