Arduino Becomes Unstable after long use? Need some help.

Hello, I’m having trouble with my code. It becomes very unstable after 1-2 weeks of continuously running. The sketch connects to a server via mqtt/tinygsm and every 3 hours sends “tm” that triggers the server to start sending ids that it then broadcasts via a lora module in order to receive measurements that sends back to the server. When it receives a measurement, it sends it back and receives the next id via the ‘r’ case in the switch. It also sends sms messages up to 150~ chars(due to memory limits a assume). If no sms command is received then it runs smoothly for about 2 weeks, but if it is used then the timer starts becoming looser, and if a ‘r + (id)’ command is issued then it acts as it has received the previous id in line. The sketch is using 86% of memory and about 70% of dynamic memory. Any help or suggestions is appreciated.

adafruit-fona.ino (4.7 KB)

OP’s code:

///////////////////////////CREDENTIALS///////////////////////////
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
#define station_id "****"

#define username "****"

#define password "****"

#define owner_id "****" 

//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////

const char* apn = "internet";
//Vodafone: internet OR webonly.vodafone.gr OR internet.vodafone.gr
//Cosmote: internet
//Wind: gnet.b-online.gr OR gint.b-online.gr
//Q-Telecom: myq
//Taza Mobile: internet.vodafone.gr

#define TINY_GSM_MODEM_SIM800
#define RFM95_CS 10
#define RFM95_RST 11
#define RFM95_INT 2
#define RF95_FREQ 434.0
#include <TinyGsmClient.h>
#include <PubSubClient.h>
#include <SPI.h>
#include <RH_RF95.h>
#include <SoftwareSerial.h>
RH_RF95 rf95(RFM95_CS, RFM95_INT);

unsigned long time = 0;
uint8_t buf[150];
uint8_t len = sizeof(buf);

char module_id[25];
char radiopacket[20];
unsigned long lastPost = 10800000;
unsigned long postInterval = 10800000;
short int tries = 2;
SoftwareSerial SerialAT(8, 9);
TinyGsm modem(SerialAT);
TinyGsmClient client(modem);
PubSubClient mqtt(client);

void reconnect() {
  if (!modem.waitForNetwork() ||  !modem.gprsConnect(apn, "", "")) {
    initialize_modem();
  }
  if (mqtt.connect(station_id, username, password))
  {
    mqtt.subscribe(station_id, (uint8_t)0);
  }
}

void initialize_radio_module() {
  if (!rf95.init()) {
    initialize_radio_module();
  } else {
    rf95.setFrequency(RF95_FREQ);
    rf95.setTxPower(23, false);
  }
}

void initialize_modem() {
  modem.restart();
  if (!modem.waitForNetwork()) {
    initialize_modem();
  }
  if (!modem.gprsConnect(apn, "", "")) {
    initialize_modem();
  }
}

void initialize_mqtt() {
  mqtt.setServer("****", 1883);
  mqtt.setCallback(mqttCallback);
  mqtt.connect(station_id, username, password);
  mqtt.subscribe(station_id, (uint8_t)0);
}

void send_sms(String& phone, String& txt) {
  modem.sendAT(GF("+CMGF=1"));
  modem.waitResponse();
  modem.sendAT(GF("+CSCS=\"UCS2\""));
  modem.waitResponse();
  modem.sendAT(GF("+CSMP=17,167,0,8"));
  modem.waitResponse();
  modem.sendAT(GF("+CMGS=\""), phone, GF("\""));
  if (modem.waitResponse(GF(">")) != 1) {
  }
  modem.stream.print(txt);
  modem.stream.write(0x1A);
  modem.stream.flush();
  delay(1000);
  mqtt.publish(owner_id, "sms");
}

void getMeasurements(String &mod, int n) {
  memset(buf, 0, 150);
  if (n > 0) {
    sendMessage(mod);
    delay(250);
    if (rf95.waitAvailableTimeout((n * 4300))) {
      len = sizeof(buf);
      if (rf95.recv(buf, &len)) {
        mqtt.publish(owner_id, (char *)buf);
        return;
      } else {
        delay(400);
        sendMessage(module_id);
        getMeasurements(mod, --n);
      }
    } else {
      delay(400);
      sendMessage(module_id);
      getMeasurements(mod, --n);
    }
  } else {
    mod += ":mnf"; //module_id + :mnf , module not found
    mod.toCharArray(module_id, mod.length() + 1);
    mqtt.publish(owner_id, module_id);
  }
}

void sendMessage(String Message) {
  Message.toCharArray(radiopacket, 20);
  rf95.send((uint8_t *)radiopacket, strlen(radiopacket));
  rf95.waitPacketSent();
}
void mqttCallback(char *topic, byte *payload, unsigned int len) {
  /*
    f : irrigation off
    n: irrigation on
    h: station ping
    s: station sms
    r{module_id}: refresh measurements for module
  */
  char* cmd = (byte*)malloc(len);
  memcpy(cmd, payload, len);
  switch (cmd[0]) {
    case 'h': //responds to user's ping
      mqtt.publish(owner_id, "h");
      free(cmd);
      break;
    case 'r': { //refreshes measurements from a module
        memset(module_id, 0, 25);
        String x = "";
        for (int i = 1; i < len; i++) {
          x += cmd[i];
        }
        getMeasurements(x, tries);
        free(cmd);
        break;
      }
    case 's': { //sends sms
        String phone, msg = "";
        for (int i = 1; i < 41; i++)
        {
          phone += cmd[i];
        }
        for (int i = 41; i < len; i++)
        {
          msg += cmd[i];
        }
        send_sms(phone, msg);
        free(cmd);
        break;
      }
  }
}


void setup() {
  delay(5000);
  SerialAT.begin(4800);
  pinMode(RFM95_RST, OUTPUT);
  digitalWrite(RFM95_RST, HIGH);
  digitalWrite(RFM95_RST, LOW);
  delay(10);
  digitalWrite(RFM95_RST, HIGH);
  delay(10);
  initialize_radio_module();
  initialize_modem();
  initialize_mqtt();
}


void loop()
{
  time = millis();
  if (!mqtt.connected()) {
    reconnect();
  } else {
    if (time - lastPost >= postInterval) { 
      lastPost = time;
      mqtt.publish(owner_id, "tm");
    }
    mqtt.loop();
  }
}

I note that you're using both malloc and Strings. Dynamic memory allocation is risky business with the small amount of RAM an Arduino has.

I haven't tried to see what the problem is, but I'd get rid of String and malloc before digging any deeper.

It becomes very unstable after 1-2 weeks of continuously running.

That is a symptom of memory corruption caused by the use to the String class. See this page.

wildbill: I note that you're using both malloc and Strings. Dynamic memory allocation is risky business with the small amount of RAM an Arduino has.

I haven't tried to see what the problem is, but I'd get rid of String and malloc before digging any deeper.

Malloc was taken straight by the pubsubclient library example, and I free it after every use.

You might get away with the malloc, although I notice that you assume it's successful, but use of String as well means that eventually, your heap will show signs of fragmentation.

Once the heap is in fragments, there will likely be a malloc failure, either of your explicit call or one done behind the scenes on behalf of the String class. Once that happens, all bets are off.

Examples are meant to illustrate how things work, not to give you code to copy. Many of them have issues that make them not suitable for production. I think you've found one.

If you want it to be stable then best bet is to stop copying examples and write some good code.

wildbill: You might get away with the malloc, although I notice that you assume it's successful, but use of String as well means that eventually, your heap will show signs of fragmentation.

Once the heap is in fragments, there will likely be a malloc failure, either of your explicit call or one done behind the scenes on behalf of the String class. Once that happens, all bets are off.

So your suggestion is to replace all the Strings ?

DmitriDrozo: So your suggestion is to replace all the strings ?

No, all the Strings

All the Strings (note the upper case S), yes.

Truly, there's no point to using malloc either. You have a small finite amount of memory, you might as well have a static array of the size of your biggest message if you know it. Just be sure that you check for buffer overrun in case of an occasional larger one that doesn't fit.

The Evils of Strings page that I linked has suggestions on how to replace the use of Strings with string functions.

groundFungus: The Evils of Strings page that I linked has suggestions on how to replace the use of Strings with string functions.

Yes, I already read that and I'm trying to use it, thank you.

wildbill: All the Strings (note the upper case S), yes.

Truly, there's no point to using malloc either. You have a small finite amount of memory, you might as well have a static array of the size of your biggest message if you know it. Just be sure that you check for buffer overrun in case of an occasional larger one that doesn't fit.

Inside the switch case, I need to separate the incoming mqtt message in two out of the three switch cases. Say that Strings become char arrays, is the safest method to copy them memcpy ? Also should I make them global variables and memset them to 0 (wipe them) on every call or keep them as local variables ?

DmitriDrozo:
Inside the switch case, I need to separate the incoming mqtt message in two out of the three switch cases. Say that Strings become char arrays, is the safest method to copy them memcpy ? Also should I make them global variables and memset them to 0 (wipe them) on every call or keep them as local variables ?

Keep them local if you can. Memcpy is fine as long as you ensure you avoid copying off the end of the array. If you need to memset, locals won’t avoid that - they’re not initialized for you.

I don’t see the point of the cmd variable - for your purposes, you can just use payload instead.

Note also, you have recursive calls in getMeasurements. That probably isn’t helping with your stability problems either by giving you a better chance of crashing the stack into the heap.

wildbill: Keep them local if you can. Memcpy is fine as long as you ensure you avoid copying off the end of the array. If you need to memset, locals won't avoid that - they're not initialized for you.

I don't see the point of the cmd variable - for your purposes, you can just use payload instead.

Note also, you have recursive calls in getMeasurements. That probably isn't helping with your stability problems either by giving you a better chance of crashing the stack into the heap.

Ok thank you I will try to fix all the mentioned. The point of the cmd is that without it the payload "disappears" inside the switch for some reason.

If you are going to use a String, make a String variable to do all the String work. In the setup reserve a buffer size for the string String Reserve. The buffer can be cleared with a "" and to add to the String use concat(). Using a String buffer to do your String operations will not make memory holes. The trick is to set a good buffer size for the String work you are doing. I found that starting out with a String reserve(50) to be well and good. Using the String functions you can have Serial printed the working String buffer byte size used to get an idea of how big or small to make the reserve. I tend to make a reserve about 10 bytes larger then the highest number used. The link to reserve, also has links to other String functions.

Let the holy war commence.

Last question I promise. Since the first value of the cmd array is the command, in order to copy the array into the pieces that I want I use a for loop and not memcpy, I assume since it’s a local variable it will get cleaned up. Is this safe/ok ?

//this
String x = "";
for (int i = 1; i < len; i++) {
     x += cmd[i];
}

//turns into this
char x[len-1];
for (int i = 1; i <= len; i++) {
     x[i - 1] = cmd[i];
}
i <= len

An array of length “len” does not have an element with the index “len”

AWOL:

i <= len

An array of length “len” does not have an element with the index “len”

It’s coming from the mqttCalback function that receives mqtt messages.