ESP8266 code crashes, memory overwritten - stackoverflow?

This keeps me up at night for the last few days. I try to control these LED matrices with an ESP8266 microcontroller.

First, the minimum code I could come up with that shows my issue (also attached as a zip file - also including headers):

WiFiDisplay.ino

#include "RunningText.h"

RunningText rt;

const long interval = 100;
unsigned long previousMillis = 0;

void setup() {
    Serial.begin(115200);
    Serial.println(" \r\nINIT \r\n");

    String text = "Hi, this is an exceptionally long text that I want to display on my LED panels";
    char* textInput = (char*)malloc((text.length() + 1) * sizeof(char));
    text.toCharArray(textInput, text.length() + 1);

    rt.setText(textInput, strlen(textInput));
}

void loop() {
    unsigned long currentMillis = millis();
    if (currentMillis - previousMillis >= interval) {
        previousMillis = currentMillis;
        rt.updatePosition();
    }
}

RunningText.cpp

#include "RunningText.h"
#include "MatrixConstants.h"
#include <Arduino.h>

#define MAX_IN_USE 8

int buf[MAX_IN_USE * 4] = { 0 };

/*
* Overrrides the current buffer content
*
* Parameter arr: 1d array containing the new buffer content
*/
void setWholeBuffer(int* arr) {
    for (int x = 0; x < (MAX_IN_USE * 8); x++) {
        Serial.print("|");
        Serial.print(buf[x]);
        Serial.print(":");
        buf[x] = arr[x];
        Serial.print(buf[x]);
    }
}

/*
* Puts a symbol in the running buffer so it can be displayed later.
*
* Parameter x: how far to the right the symbol should be placed in pixels
* Parameter arr: array containing the symbol, see MatrixConstants.h file for examples
* Parameter len: width of the symbol in pixels
*/
void RunningText::symbolInRunningBuffer(int x, int* arr, int len) {
    for (int i = 0; i < len; i++) {
        runningBuffer[x + i] = *(arr + i);
    }
}

/*
* Sets the text that should be displayed.
*
* Parameter text: the text that should be displayed
* Parameter len: the number of characters
*/
void RunningText::setText(char *text, int len) {
    bufferWidth = (len * 6) + (MAX_IN_USE * 16);
    runningBuffer = (int*)malloc(bufferWidth * sizeof(int));

    int pos = 0;
    symbolInRunningBuffer(pos, empty, MAX_IN_USE * 8);
    pos += (MAX_IN_USE * 8);

    for (int i = 0; i < len; i++) {
        symbolInRunningBuffer(pos, letters[text[i] - 32], 5);
        pos += 5;
        runningBuffer[pos] = 0;
        pos++;
    }

    symbolInRunningBuffer(pos, empty, MAX_IN_USE * 8);

    position = 0;
}


/*
* Shifts the whole text one pixel to the left.
* Stops when the whole text has been displayed
*/
void RunningText::updatePosition() {
    if (position < (bufferWidth - (MAX_IN_USE * 8))) {
        Serial.println(position);
        setWholeBuffer(runningBuffer + position);
        position++;
    }
}

MatrixConstants.cpp is also attached (only contains constants)

Example output from the serial console:

INIT 

0
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|18755581:0|18750000:0|1075843120:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:01
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:1272
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:83
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:84
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:85
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:1276
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127|127:07
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127|127:0|0:08
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127|127:0|0:0|0:689
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|1073651700:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127|127:0|0:0|0:68|68:125
 ets Jan  8 2013,rst cause:4, boot mode:(3,6)

wdt reset
load 0x4010f000, len 1384, room 16 
tail 8
chksum 0x2d
csum 0x2d
v4ceabea9
~ld
 
INIT 

0
|0:0|0:0|...

and so on and so on....

My general problem is that I update an array a couple of times (10 times in this example; setWholeBuffer() in RunningText.cpp) but suddenly the program completely stops responding until the hardware watchdog resets and everything starts over again (rst cause:4, boot mode:(3,6)).

As you can see in the last output line of setWholeBuffer, the value of buf[x.] is 1073651700 at one point which leads me to believe that there is some kind of a memory problem. The number 1073651700 is always the same after a reset.

I am developing in Visual Studio 2017 using visualmicro. When I compile and upload the code with the arduino IDE, I get only one output line from setWholeBuffer().

Settings for the programmer:

I would be really happy if you could help me find the issue because I don't think I can fix it on my own anymore, I have spent too many nights on this :confused:

WiFiDisplay.zip (3.24 KB)

I've just had a quick look at some of the code without following thoroughly. But I found this:

#define MAX_IN_USE 8

int buf[MAX_IN_USE * 4] = { 0 };    // int buf[32]  

/*
* Overrrides the current buffer content
*
* Parameter arr: 1d array containing the new buffer content
*/
void setWholeBuffer(int* arr) {
    for (int x = 0; x < (MAX_IN_USE * 8); x++) {  =>  64 iterations  
        Serial.print("|");
        Serial.print(buf[x]);
        Serial.print(":");
        buf[x] = arr[x];       // overflow here int buf[32] to buf[63] ??
        Serial.print(buf[x]);
    }
}

Also, you are aware that an int on an ESP8266 is 4 bytes ?

On the ESP8266 the int variable is 32 bits long and not 16 bits. Better to use something like uint16_t instead of int so your sure of the variable size.

The setWholeBuffer function might be causing the WDT reset because it's not yielding control for the core to perform housekeeping. Try putting a delay(1) or yield() within the for-next loop.

A section of the ESP 8266 Beginners Guide (Sharing CPU time with the RF part) explains in more detail what Riva mentioned.

Thanks for the int size hint. I am aware of that, I made this mistake when shrinking the code to provide a minimum example.

The software watchdog is not the problem, disabling it alltogether with ESP.wdtDisable(); does not make a difference. I tried to include yield or delay into the for loop but that does not make a difference.

My controller gets reset by the hardware watchdog a couple of seconds after the program stops doing anything apparently.

The general problem is that the crash does not occur somewhere in my code but in the waiting part in the main loop. If I add some debug output like this:

void loop() {
    unsigned long currentMillis = millis();
    if (currentMillis - previousMillis >= interval) {
        Serial.println("Start of if");
        previousMillis = currentMillis;
        rt.updatePosition();
        Serial.println("End of if");
    }
}

I get the following output:

INIT 

Start of if
0
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|18755832:0|18750000:0|1075843120:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0End of if
Start of if
1
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127End of if
Start of if
2
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8End of if
Start of if
3
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8End of if
Start of if
4
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8End of if
Start of if
5
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127End of if
Start of if
6
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127|127:0End of if
Start of if
7
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127|127:0|0:0End of if
Start of if
8
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127|127:0|0:0|0:68End of if
Start of if
9
|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|1073649380:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:0|0:127|127:8|8:8|8:8|8:127|127:0|0:0|0:68|68:125End of if


// This is the part where for about 3-4 seconds no data is sent over serial at all, so the main loop is not being executed anymore. After this time, the hardware watchdog resets the microcontroller and it starts all over again.


 ets Jan  8 2013,rst cause:4, boot mode:(3,6)

wdt reset
load 0x4010f000, len 1384, room 16 
tail 8
chksum 0x2d
csum 0x2d
v4ceabea9
~ld

As you can see, the "End of if" text is still sent right before the program completely stops responding.

Any you have an explanation for this taken from post #1 ?

buf[x] = arr[x];       // overflow here int buf[32] to buf[63] ??

Oh wow, sorry, I totally overlooked that.
That actually was my issue, I can't believe that I was so blind, maybe I should try rubber duck debugging more often...
Thank you a lot, I am so happy that this finally works!