esp32_cam acces and process image

Hey, I am not a total beginner with Arduino programming, but neither am I an expert. I am trying to use tensorflow-lite on a esp32_cam to classify images. I defined the following sub-tasks that i need to solve:

  • Take photo
  • Reduce size of photo to (e.g.) 28x28 pixels grayscale
  • run inference with trained model

For now I am stuck between point 1 and 2 and cannot get it solved. What i have done so far: I save an image into buffer using esp_camera_fb_get().
Afterwards I put the values from the buffer inside a 2D array. However, when I print out some of these values, they never become 0 nor 255, even though I coverd the whole lens or put a bright light source close to it.
I have four questions:

  • How do I correctly record an image?
  • How can I convert it into a 2D array?
  • How can I downsample the size from (e.g.) 160x120 to 28x28?
  • how can I correctly Serial.print() each pixel value to copy the values and plot them on my computer (e.g. with python matplotlib) and check if everything works alright?
 #define  CAMERA_MODEL_AI_THINKER
 #include <esp_camera.h>
 #include "camera_pins.h"

 #define FRAME_SIZE FRAMESIZE_QQVGA
 #define WIDTH 160
 #define HEIGHT 120    

 uint16_t img_array [HEIGHT][WIDTH] = { 0 };


 bool setup_camera(framesize_t);
 void frame_to_array(camera_fb_t * frame);
 void print_image_shape(camera_fb_t * frame);
 bool capture_image();

 void setup() {
     Serial.begin(115200);
     Serial.println(setup_camera(FRAME_SIZE) ? "OK" : "ERR INIT");
 }

 void loop() {
     if (!capture_image()) {
         Serial.println("Failed capture");
         delay(2000);

         return;
     }

     //print_features();
     delay(3000);
 }



 bool setup_camera(framesize_t frameSize) {
     camera_config_t config;

     config.ledc_channel = LEDC_CHANNEL_0;
     config.ledc_timer = LEDC_TIMER_0;
     config.pin_d0 = Y2_GPIO_NUM;
     config.pin_d1 = Y3_GPIO_NUM;
     config.pin_d2 = Y4_GPIO_NUM;
     config.pin_d3 = Y5_GPIO_NUM;
     config.pin_d4 = Y6_GPIO_NUM;
     config.pin_d5 = Y7_GPIO_NUM;
     config.pin_d6 = Y8_GPIO_NUM;
     config.pin_d7 = Y9_GPIO_NUM;
     config.pin_xclk = XCLK_GPIO_NUM;
     config.pin_pclk = PCLK_GPIO_NUM;
     config.pin_vsync = VSYNC_GPIO_NUM;
     config.pin_href = HREF_GPIO_NUM;
     config.pin_sscb_sda = SIOD_GPIO_NUM;
     config.pin_sscb_scl = SIOC_GPIO_NUM;
     config.pin_pwdn = PWDN_GPIO_NUM;
     config.pin_reset = RESET_GPIO_NUM;
     config.xclk_freq_hz = 20000000;
     config.pixel_format = PIXFORMAT_GRAYSCALE;
     config.frame_size = frameSize;
     config.jpeg_quality = 12;
     config.fb_count = 1;

     bool ok = esp_camera_init(&config) == ESP_OK;

     sensor_t *sensor = esp_camera_sensor_get();
     sensor->set_framesize(sensor, frameSize);

     return ok;
 }



 bool capture_image() {

     camera_fb_t * frame = NULL;
     frame = esp_camera_fb_get();

     print_image_shape(frame);

     frame_to_array(frame);

     esp_camera_fb_return(frame);

     if (!frame)
         return false;

     return true;
 }


 void print_image_shape(camera_fb_t * frame){

     // print shape of image and total length (=heigth*width)
     Serial.print("Width: ");
     Serial.print(frame->width);
     Serial.print("\tHeigth: ");
     Serial.print(frame->height);
     Serial.print("\tLength: ");
     Serial.println(frame->len);
 }

 void frame_to_array(camera_fb_t * frame){

     int len = frame->len;
     char imgBuffer[frame->len];
     int counter = 0;

     uint16_t img_array [HEIGHT][WIDTH] = { 0 };

     int h_counter = 0;
     int w_counter = 0;

     // write values from buffer into 2D Array
     for (int h=0; h < HEIGHT; h++){
         //Serial.println(h);
         for (int w=0; w < WIDTH; w++){
             //Serial.println(w);
             int position = h*(len/HEIGHT)+w;

             //Serial.println(position);
             img_array[h][w] = {frame->buf[position]};

             //Serial.print(img_array[h][w]);
             //Serial.print(",");
             //delay(2);
         }
     }


     //Serial.println("Current frame:");

     Serial.println("=====================");

 }

camera_pin.h:

#if defined(CAMERA_MODEL_WROVER_KIT)
#define PWDN_GPIO_NUM    -1
#define RESET_GPIO_NUM   -1
#define XCLK_GPIO_NUM    21
#define SIOD_GPIO_NUM    26
#define SIOC_GPIO_NUM    27

#define Y9_GPIO_NUM      35
#define Y8_GPIO_NUM      34
#define Y7_GPIO_NUM      39
#define Y6_GPIO_NUM      36
#define Y5_GPIO_NUM      19
#define Y4_GPIO_NUM      18
#define Y3_GPIO_NUM       5
#define Y2_GPIO_NUM       4
#define VSYNC_GPIO_NUM   25
#define HREF_GPIO_NUM    23
#define PCLK_GPIO_NUM    22

#elif defined(CAMERA_MODEL_ESP_EYE)
#define PWDN_GPIO_NUM    -1
#define RESET_GPIO_NUM   -1
#define XCLK_GPIO_NUM    4
#define SIOD_GPIO_NUM    18
#define SIOC_GPIO_NUM    23

#define Y9_GPIO_NUM      36
#define Y8_GPIO_NUM      37
#define Y7_GPIO_NUM      38
#define Y6_GPIO_NUM      39
#define Y5_GPIO_NUM      35
#define Y4_GPIO_NUM      14
#define Y3_GPIO_NUM      13
#define Y2_GPIO_NUM      34
#define VSYNC_GPIO_NUM   5
#define HREF_GPIO_NUM    27
#define PCLK_GPIO_NUM    25

#elif defined(CAMERA_MODEL_M5STACK_PSRAM)
#define PWDN_GPIO_NUM     -1
#define RESET_GPIO_NUM    15
#define XCLK_GPIO_NUM     27
#define SIOD_GPIO_NUM     25
#define SIOC_GPIO_NUM     23

#define Y9_GPIO_NUM       19
#define Y8_GPIO_NUM       36
#define Y7_GPIO_NUM       18
#define Y6_GPIO_NUM       39
#define Y5_GPIO_NUM        5
#define Y4_GPIO_NUM       34
#define Y3_GPIO_NUM       35
#define Y2_GPIO_NUM       32
#define VSYNC_GPIO_NUM    22
#define HREF_GPIO_NUM     26
#define PCLK_GPIO_NUM     21

#elif defined(CAMERA_MODEL_M5STACK_WIDE)
#define PWDN_GPIO_NUM     -1
#define RESET_GPIO_NUM    15
#define XCLK_GPIO_NUM     27
#define SIOD_GPIO_NUM     22
#define SIOC_GPIO_NUM     23

#define Y9_GPIO_NUM       19
#define Y8_GPIO_NUM       36
#define Y7_GPIO_NUM       18
#define Y6_GPIO_NUM       39
#define Y5_GPIO_NUM        5
#define Y4_GPIO_NUM       34
#define Y3_GPIO_NUM       35
#define Y2_GPIO_NUM       32
#define VSYNC_GPIO_NUM    25
#define HREF_GPIO_NUM     26
#define PCLK_GPIO_NUM     21

#elif defined(CAMERA_MODEL_AI_THINKER)
#define PWDN_GPIO_NUM     32
#define RESET_GPIO_NUM    -1
#define XCLK_GPIO_NUM      0
#define SIOD_GPIO_NUM     26
#define SIOC_GPIO_NUM     27

#define Y9_GPIO_NUM       35
#define Y8_GPIO_NUM       34
#define Y7_GPIO_NUM       39
#define Y6_GPIO_NUM       36
#define Y5_GPIO_NUM       21
#define Y4_GPIO_NUM       19
#define Y3_GPIO_NUM       18
#define Y2_GPIO_NUM        5
#define VSYNC_GPIO_NUM    25
#define HREF_GPIO_NUM     23
#define PCLK_GPIO_NUM     22

#else
#error "Camera model not selected"
#endif
1 Like

How do I correctly record an image?

look at the " Take and Save Photo Sketch" code.

that takes a picture

 camera_fb_t * fb = NULL;
  
  // Take Picture with Camera
  fb = esp_camera_fb_get();  
  if(!fb) {
    Serial.println("Camera capture failed");
    return;
  }

then you have the payload (image) in fb->buf which is fb->len byte long.

Once you are done playing with the buffer you need to terminate with

  esp_camera_fb_return(fb);

How can I convert it into a 2D array?

The data you get is a JPEG format ==> read about how data is represented (see Syntax and structure)

if you look at the bytes in the payload, you'll find 0xFFD8 which is the start of a JPEG file (SOI Marker = Start of image) and you'll have to read to 0xFFD9 which is the end of a JPEG file (EOI Marker = End of image)

You need a JPEG Decoder to get a pixel bitmap (possibly see that library - I've never used it though )

How can I downsample the size from (e.g.) 160x120 to 28x28?

there are tons of approach to this in image theory. Look for "Source Code for Image Resampling in C/C++ " or "Source Code Image Resize" and you'll find some source codes available. A crude and fast approach is to move a "window" (say 5x5 pixels) over the image and generate 1 pixel by doing some sort of average "color". Another one is to just keep 1 pixel every 5 pixels and loose the rest of the information... That would lead to a 32x24 bitmap. Note that you don't respect image size ratio so you'll get distortion so you'll make your life more complicated.

how can I correctly Serial.print() each pixel value to copy the values and plot them on my computer (e.g. with python matplotlib) and check if everything works alright?

almost just as you say. go through the buffer and Serial.write() the bytes (or the buffer) and have some code on the other end listening to the stream of data and rebuild an image.

thanks a lot for your comments!

I was wondering if I need to decode jpeg, as i choose pixel_format = PIXFORMAT_GRAYSCALE and there is an extra format PIXFORMAT_JPEG.
When printing out

for (int i = o; i <5; i++){

     Serial.println(fb>buf[i]);
}

I get the output:

29

30

32

28

37

So it does not seem to be encoded. As I have mentioned in the original post, I have tried to print a couple of values and they did not become nor 255 whenever I put a finger or light source on the camera. Is there anything i need to set up for sensitivity or so?

missed the PIXFORMAT_GRAYSCALE. Indeed that should nor deliver any JPEG

if auto-white balance is activated it might impact what you get back?
May be try with PIXFORMAT_RAW ?

I tried this before and it compiles without erros but results in a loop of reboots, saying that this format is not supported:

rst:0xc (SW_CPU_RESET),boot:0x13 (SPI_FAST_FLASH_BOOT)
configsip: 0, SPIWP:0xee
clk_drv:0x00,q_drv:0x00,d_drv:0x00,cs0_drv:0x00,hd_drv:0x00,wp_drv:0x00
mode:DIO, clock div:2
load:0x3fff0018,len:4
load:0x3fff001c,len:1044
load:0x40078000,len:8896
load:0x40080400,len:5828
entry 0x400806ac
[E][camera.c:1140] camera_init(): Requested format is not supported
E (1557) gpio: gpio_isr_handler_remove(396): GPIO isr service is not installed, call gpio_install_isr_service() first
[E][camera.c:1270] esp_camera_init(): Camera init failed with error 0x106
Guru Meditation Error: Core  1 panic'ed (LoadProhibited). Exception was unhandled.
Core 1 register dump:
PC      : 0x400d14a2  PS      : 0x00060330  A0      : 0x800d14d3  A1      : 0x3ffb1f00  
A2      : 0x00000000  A3      : 0x00000000  A4      : 0x00000106  A5      : 0x00000001  
A6      : 0x00000001  A7      : 0x00000000  A8      : 0x800d14a0  A9      : 0x3ffb1ee0  
A10     : 0x00000000  A11     : 0x00000000  A12     : 0x3ffbebe8  A13     : 0x00000003  
A14     : 0x00000001  A15     : 0x00000000  SAR     : 0x00000004  EXCCAUSE: 0x0000001c  
EXCVADDR: 0x0000003c  LBEG    : 0x400014fd  LEND    : 0x4000150d  LCOUNT  : 0xfffffffb  

Backtrace: 0x400d14a2:0x3ffb1f00 0x400d14d0:0x3ffb1f80 0x400d28e3:0x3ffb1fb0 0x4008ce31:0x3ffb1fd0

Rebooting...

I just tried this piece of code on my ESP32_CAM

#include "esp_camera.h"
#include "soc/soc.h"           // Disable brownour problems
#include "soc/rtc_cntl_reg.h"  // Disable brownour problems
#include "driver/rtc_io.h"
#include "img_converters.h" // see https://github.com/espressif/esp32-camera/blob/master/conversions/include/img_converters.h

// Pin definition for CAMERA_MODEL_AI_THINKER
#define PWDN_GPIO_NUM     32
#define RESET_GPIO_NUM    -1
#define XCLK_GPIO_NUM      0
#define SIOD_GPIO_NUM     26
#define SIOC_GPIO_NUM     27

#define Y9_GPIO_NUM       35
#define Y8_GPIO_NUM       34
#define Y7_GPIO_NUM       39
#define Y6_GPIO_NUM       36
#define Y5_GPIO_NUM       21
#define Y4_GPIO_NUM       19
#define Y3_GPIO_NUM       18
#define Y2_GPIO_NUM        5
#define VSYNC_GPIO_NUM    25
#define HREF_GPIO_NUM     23
#define PCLK_GPIO_NUM     22


// our call back to dump whatever we got in binary format
size_t jpgCallBack(void * arg, size_t index, const void* data, size_t len)
{
  uint8_t* basePtr = (uint8_t*) data;
  for (size_t i = 0; i < len; i++) {
    Serial.write(basePtr[i]);
  }
  return 0;
}

void setup() {
  WRITE_PERI_REG(RTC_CNTL_BROWN_OUT_REG, 0); //disable brownout detector

  Serial.begin(115200);

  camera_config_t config;
  config.ledc_channel = LEDC_CHANNEL_0;
  config.ledc_timer = LEDC_TIMER_0;
  config.pin_d0 = Y2_GPIO_NUM;
  config.pin_d1 = Y3_GPIO_NUM;
  config.pin_d2 = Y4_GPIO_NUM;
  config.pin_d3 = Y5_GPIO_NUM;
  config.pin_d4 = Y6_GPIO_NUM;
  config.pin_d5 = Y7_GPIO_NUM;
  config.pin_d6 = Y8_GPIO_NUM;
  config.pin_d7 = Y9_GPIO_NUM;
  config.pin_xclk = XCLK_GPIO_NUM;
  config.pin_pclk = PCLK_GPIO_NUM;
  config.pin_vsync = VSYNC_GPIO_NUM;
  config.pin_href = HREF_GPIO_NUM;
  config.pin_sscb_sda = SIOD_GPIO_NUM;
  config.pin_sscb_scl = SIOC_GPIO_NUM;
  config.pin_pwdn = PWDN_GPIO_NUM;
  config.pin_reset = RESET_GPIO_NUM;
  config.xclk_freq_hz = 20000000;
  config.pixel_format = PIXFORMAT_GRAYSCALE;

  if (psramFound()) {
    config.frame_size = FRAMESIZE_QQVGA; // FRAMESIZE_ + QVGA|CIF|VGA|SVGA|XGA|SXGA|UXGA
    config.fb_count = 2;
  } else {
    Serial.println(F("ps RAM not found"));
    return;
  }

  // Init Camera
  esp_err_t err = esp_camera_init(&config);
  if (err != ESP_OK) {
    Serial.printf("Camera init failed with error 0x%x", err);
    return;
  }

  camera_fb_t * fb = NULL;

  // Take Picture with Camera
  fb = esp_camera_fb_get();

  if (!fb) {
    Serial.println("Camera capture failed");
    return;
  }

  // DUMP THE PIXELS AS ASCII TO SERIAL
  Serial.printf("\n\nWidth = %u, Height=%u\n", fb->width, fb->height);
  for (size_t i = 0; i < fb->len; i++) {
    if (i % 16 == 0) Serial.printf("\n%06u\t", i);
    if (fb->buf[i] < 0x10) Serial.write('0');
    Serial.print(fb->buf[i], HEX);
  }

  Serial.println(F("\n\n---------------------\nPREPARE TO CAPTURE TO FILE\n"));
  delay(6000);

  frame2jpg_cb(fb, 10, jpgCallBack, NULL); // see https://github.com/espressif/esp32-camera/blob/b02992a5b88571bdad0660ee652a620aaabc994a/conversions/include/img_converters.h#L54

  esp_camera_fb_return(fb);
}

void loop() {}

when you boot it sets up the ESP and takes a picture (grayscale 160x120) and prints out to the Serial output at 115200 bauds all the pixels. (you get 19200 bytes with is correct for 160x120 grayscale pixels on 8 bits)

then it prints "PREPARE TO CAPTURE TO FILE" and waits for 6 seconds

and then I call a function to generate a jpg file from that framebuffer (see frame2jpg_cb() )

This way I'm able to see the pixel data and also build a file I can see on my computer.

So what I do is open a Serial software that can handle binary data (I use CoolTerm on my Mac) and when I see the message "PREPARE TO CAPTURE TO FILE", I activate the file capture feature of CoolTerm to get the stream of bytes into a file. Once completed I stop the capture, rename the file to pic.jpg.

Here is a sample of what I got, a poor quality grayscale image of my pencil pot and scissors

pic.jpg

So it appears that the capture is working fine.

I then put my finger in front of the lens hoping to get a full black image and started the process again.

The image generated is a noisy, mostly grey/dark image but not pitch black

pic2.jpg

so the data points are thus not 0x00 or 0xFF

[color=purple]
[color=blue]000000[/color]   2425272A2C2C25262A232B2C232C2B26
[color=blue]000016[/color]   2B25292120271B2529262A272721201F
[color=blue]000032[/color]   2E24262429252328232D252428222723
[color=blue]000048[/color]   282A2D2B23292A2725272A2D22282927
[color=blue]000064[/color]   292B282A2B2D27272E282D2B1C282E24
[color=blue]000080[/color]   24272A2C262323262826262923281D23
[color=blue]000096[/color]   2820232223252B2224202923281D271E
[color=blue]000112[/color]   232421211C231F2322251F201E1A1B1C
[color=blue]000128[/color]   1E1D1E1E1821221D1A21201E191C1C1B
...
[/color]

I then added some code to disable the white balance

  // disable white balance and white balance gain
  sensor_t * sensor = esp_camera_sensor_get();
  sensor->set_whitebal(sensor, 0);       // 0 = disable , 1 = enable
  sensor->set_awb_gain(sensor, 0);       // 0 = disable , 1 = enable

just before that line

 camera_fb_t * fb = NULL;

and ran again with my finger in front of the sensor. This time it's darker..

pic3.jpg

So I'd say try to play with the sensors parameters to see how it influences what's captured

hope this helps

pic.jpg

pic2.jpg

pic3.jpg

3 Likes

Awesome, this was very helpful already! I got your script working so far and will try to implement the necessary changes into my application. Checking the sensors whitebalance and awb_gain was a great hint and I will investigate this a bit.

This is how i would print an image from grayscale data in python:

with open("img_test.txt") as f:
    str_img_test = f.read()

img_test = np.array(str_img_test.split(",")[:-1], dtype=np.uint8)
img_test = img_test.reshape(120, 160)

plt.figure()
plt.imshow(img_test)
plt.axis('off')
plt.show()

Where img_test.txt is the serial output recording like

255,220,180,56,78

with a total length of 160*120.
A bit strange was, that I had to reshape to (120, 160) to get the correct image, but at least it now worked with

for (int i = 0; i < fb->len; i++){
    Serial.print(fb->buf[i]);
    Serial.print(",");
}

Thanks again for your help!

It all depends how the bytes are arranged in the frame buffer, row first I suppose