Arduino locks up, powering down doesn't fix but serial monitor does

I have a data logging program that records data from my home heating system and logs it to ThinkSpeak. I monitor 5 zones, the boiler on/off, temperature to the system and a data point number. When any of the zones or the boiler turns on, I log the data until it turns off and then for some time after all is off. In general, the program works well, however, two times in the last 10 months, the system has locked up. Both times I powered the Arduino down and then powered it up without it having any effect - it was still locked up. Not until I connected it to a computer and opened the serial monitor would it start up again and work fine. The first time it happened, I was away for some time, so I had a neighbor cycle the power with no effect. Just now it did it again so I cycled the power two times, one for 30 seconds the other for 2 hours with no luck. Do I have a millis() rollover problem? Even if I did, wouldn't cycling the power correct it? Any suggestions on where I could start looking for the problem?

Here's my code (I'm not an experienced coder so please excuse the rough stuff! I had to remove comments to shorten it for this post):

//************************************************************/
  long duration = 900;   
  unsigned long DELAYalloff= 120000; 
   unsigned long DELAYon = 60000;  

 
#include <SparkFunESP8266WiFi.h>
#include <OneWire.h>
#include <DallasTemperature.h>
#define ONE_WIRE_BUS 7

OneWire oneWire(ONE_WIRE_BUS);
 
DallasTemperature sensors(&oneWire);

 
const char mySSID[] = "WesternDigital 2.4GHz";
const char myPSK[] = "XXXXXX";



char thingSpeakAddress[] = "api.thingspeak.com";
String APIKey = "XXXXXXXX";            
long lastConnectionTime = 0;  

int zone1 = 6;   
int zone2 = 2;   
int zone3 = 3;    
int zone4 = 4;   
int zone5 = 5;   
int boiler = 1;  
int detect = 0;
float temp = 100; 

int prev1 = 1;       
int prev2 = 1; 
int prev3 = 1;
int prev4 = 1;
int prev5 = 1;
int prevB = 1;

int status1 = 1;       
int status2 = 1;         
int status3 = 1;
int status4 = 1;
int status5 = 1;
int statusB = 1;
int statussd = 1000;
int dataNo = 0;  
int lastOffFlag = 0;   
long lastOff = 0;      
unsigned long lastDataOut = millis();


void setup() 
{
  int status;
  Serial.begin(9600);
  Serial.println(F("Boiler_Logger_WiFi_5.1"));

  
  pinMode(zone1,INPUT);         
  pinMode(zone2,INPUT);          
  pinMode(zone3,INPUT);         
  pinMode(zone4,INPUT);          
  pinMode(zone5,INPUT);         
  
 
  digitalWrite(zone1,HIGH);     
  digitalWrite(zone2,HIGH);      
  digitalWrite(zone3,HIGH);      
  digitalWrite(zone4,HIGH);
  digitalWrite(zone5,HIGH);

   sensors.begin();           
  
   status = esp8266.begin();
  if (status <= 0)
  {
    Serial.println(F("Unable to communicate with shield. Looping forever."));
    while(1) ;
  }

  esp8266.setMode(ESP8266_MODE_STA); // Set WiFi mode to station
  if (esp8266.status() <= 0) // If we're not already connected
  {
    if (esp8266.connect(mySSID, myPSK) < 0)
    {
      Serial.println(F("Error connecting"));
      while (1) ;
    }    
  }

 
  Serial.print(F("My IP address is: "));
  Serial.println(esp8266.localIP());
  delay(3000);
  Serial.println(F("Ready to record ..."));
 
}

void loop()
{

 

  status1 = digitalRead( zone1);     
  status2 = digitalRead( zone2);    
  status3 = digitalRead( zone3);
  status4 = digitalRead( zone4);
  status5 = digitalRead( zone5);
  statusB = analogRead( boiler);

  if (analogRead( detect) == 0)      
  {
    Serial.println("Recording Stopped");
    
    while (analogRead(detect) ==0)
    {
     delay(50);  
    }
    
    Serial.println(F("******************Start Recording again*******************"));

  }
  if (statusB > 300)         
    {
   statusB = 0;              // boiler is on
    }
    else
    {
    statusB = 1;              // boiler is off
    }
   
 
    if(statusB !=prevB || status1 !=prev1 || 
          status2 !=prev2 || status3 !=prev3 || status4 !=prev4 || status5 !=prev5)
    {
     updateThingSpeak();                
   }
   
 
   if (statusB != prevB)          
    {
    prevB = statusB;
    }
  if (status1 != prev1)    
  {
    prev1 = status1;         
  }
 
 if (status2 != prev2)    
  {
    prev2 = status2;        
  }
  if (status3 != prev3)    
  {
    prev3 = status3;         
  }
  if (status4 != prev4)    
  {
    prev4 = status4;         
  }
  if (status5 != prev5)     
  {
    prev5 = status5;         
  }


  if(status1 ==0 || status2 == 0 || status3 == 0 || status4 == 0 || status5 == 0 || statusB == 0 )
  {
    lastOffFlag = 0;          
   
    if(millis()-lastDataOut >= DELAYon) 
    {
    updateThingSpeak();                
    }
  }
  //Test to see if ALL zones are off.  If yes, print for duration

  if(status1 ==1 && status2 == 1 && status3 == 1 && status4 == 1 && status5 == 1 && statusB == 1 )
  {
   
  //Since all zones are off, look for the first time the last zone was turned off
    if(lastOffFlag == 0)     
    {
      
      if(millis()-lastDataOut >= DELAYalloff)  
     { 
       updateThingSpeak();              
       lastOff = millis()-DELAYalloff;        
                                           
       lastOffFlag = 1;         
      
     }
      
    }

    //Since all zones are off, test to see if the duration has not been met 

    if(millis() - lastOff  <= duration*1000 ) 
    {
      if(millis()-lastDataOut >= DELAYalloff) 
     { 
       updateThingSpeak();               
     }
    }
  }
}

void updateThingSpeak()
{
   // Create a client, and initiate a connection
  ESP8266Client client;

  if (client.connect(thingSpeakAddress, 80) <= 0)
  {
    Serial.println(F("Failed to connect to server."));
    return;
  }
  Serial.println(F("Connected."));
  sensors.requestTemperatures(); 
  delay(500);                   
  temp = (sensors.getTempFByIndex(0));
  if(millis()- lastConnectionTime <17000)
  {
    delay(17000-(millis()-lastConnectionTime));
  }
  dataNo +=1;  
    
  //read boiler again in case it has changed during the delay
  if (analogRead( boiler) > 300)          
    {
   statusB = 0;              // boiler is on
    }
    else
    {
    statusB = 1;              // boiler is off
    }
   // Set up our  post parameters:
   String params;
  params += "field1=" + String(statusB)+ "&";
  params += "field2="   + String(digitalRead( zone1))+ "&";
  params += "field3="   + String(digitalRead( zone2))+ "&";
  params += "field4="   + String(digitalRead( zone3))+ "&";
  params += "field5="   + String(digitalRead( zone4))+ "&";
  params += "field6="   + String(digitalRead( zone5))+ "&";
  params += "field7="   + String(temp)+ "&";
  params += "field8="   + String(dataNo);

  Serial.println(F("Posting to thingspeak!"));
  Serial.println(params);
  Serial.print("Data Points: ");
  Serial.println(dataNo);
  
    client.print("POST /update HTTP/1.1\n");
    client.print("Host: api.thingspeak.com\n");
    client.print("Connection: close\n");
    client.print("X-THINGSPEAKAPIKEY: " + APIKey + "\n");
    client.print("Content-Type: application/x-www-form-urlencoded\n");
    client.print("Content-Length: ");
    client.print(params.length());
    client.print("\n\n");
    client.print(params);
    lastConnectionTime = millis();
    

  // available() will return the number of characters
  // currently in the receive buffer.
  while (client.available())
    Serial.write(client.read()); // read() gets the FIFO char
 

  
  // connected() is a boolean return value - 1 if the 
  // connection is active, 0 if it's closed.
    if (client.connected())
    client.stop(); // stop() closes a TCP connection.
    lastDataOut=millis();  //last time data printed
  }

Here is the last bit of data before it crashed:

created_at entry_id ID field1 field2 field3 field4 field5 field6 field7 field8

2017-10-20 23:41:17 UTC 8920 0 1 1 1 1 1 71.71 12941
2017-10-20 23:42:21 UTC 8921 0 1 1 1 1 1 71.6 12942
2017-10-20 23:44:25 UTC 8922 0 1 1 1 1 1 71.71 12944

I suspect your chip is being backpowered by one of the connected devices, so when you power it down it's still running.

Opening serial monitor causes the DTR autoreset circuit to reset the microcontroller (assuming it's an AVR based board without native USB), so that guarantees you a reset condition.

Yes, a rollover issue would be cleared by resetting the Arduino or cycling the power.

You did not provide a wiring diagram.

It should not matter but you did not tell us what Arduino you have.

It is good that your program is mostly working. I would not change it for this: When you suffix variable names with numbers, it is time to learn about arrays.

  if (analogRead( detect) == 0)

What, EXACTLY, is connected to this pin? Why doesn't the detect variable have pin in the name, to make it clear that it holds a pin number?

Why are you expecting an analog value of exactly 0?

 .
 .
 .
 etc

There is not a snowball's chance in hell that the code you posted will compile. Why are you wasting our time?

PaulS:

  if (analogRead( detect) == 0)

What, EXACTLY, is connected to this pin? Why doesn't the detect variable have pin in the name, to make it clear that it holds a pin number?

Why are you expecting an analog value of exactly 0?

 .

.
.
etc



There is not a snowball's chance in hell that the code you posted will compile. Why are you wasting our time?

Sorry, I had to remove things to get it below the 9000 char limit for posts so I got rid of boring code. I just now edited the code to put back all the code but took out all comments which of course makes following the code much harder.

I have a switch onboard that when I was recording to SD card I would flip it to stop recording while I removed card. I no longer do that. Don't know why I used analogread!

DrAzzy:
I suspect your chip is being backpowered by one of the connected devices, so when you power it down it's still running.

I don't think so - Nothing connected is powered - boiler on/off just senses light from an LED, zone on/off are just contacts that close....... All LEDs on the board go off when power is removed.

Thanks for the comment

I suspect there are two problems. The occasional random lockup is probably due to the use of the String data type. Over time, this can fragment memory and fail. You should switch to using normal C strings (null-terminate arrays of characters).

Not resetting when cycling power is something else. I would agree with DrAzzy that the board must be getting power from somewhere (and yes, it could even be supplied through one of the IO pins).

dnwheeler:
You should switch to using normal C strings (null-terminate arrays of characters).

Not resetting when cycling power is something else. I would agree with DrAzzy that the board must be getting power from somewhere (and yes, it could even be supplied through one of the IO pins).

Thanks for your comment. Can you please give me more of a hint as to what to use instead of String?

Also, would not the LEDs on the Uno board and the WiFi board stay light if it was still getting power - they aren't.

Normal C strings are implemented using an array of characters. Each character of the string is placed into a separate entry in the array, followed by a null-terminator (with a value of zero) to signify the end of the string.

Start reading here: https://www.arduino.cc/en/Reference/String

... and a C string constant for initializing an array or for other purposes is text between double quotes "like this". The compiler knows that this is a C string and ends it with a null terminator automatically.

Thanks for all the comments - I'll look at C strings also.

It doesn't seem that there are any thoughts about what could cause the lock up or cause the lock up not to be cleared by power cycling. Can anybody suggest how I can trouble shoot it better when it happens. Before, I just cycled power and activated the heat system so that there would be some data to record. When I did that a couple of times without success, I connected a computer to the UNO board (without taking power away) and opened the serial monitor. It then started working. What can I do to troubleshoot other than having a computer connected all the time (Not practical since it only happens every 3 months or so)? Maybe if I routinely cycle power before it happens (like put a timer on the electrical outlet that turns off power every week), I can at least keep it running....... not too elegant, but that's where I am now.

Since there seems to be some doubt about whether the unit is being back powered, next time it happens pull the Uno's reset pin to LOW for a couple of seconds instead of removing the power. See if the behaviour is any different.

If you think there are memory issues that result in the unit failing, then try sticking a line like this:

volatile byte wasteSpace[1600];

at the beginning of your sketch. Waste the most of your device's RAM with this, but leave enough for the sketch to run with some room. Test while connected to your computer. If memory is causing a problem, this should cause it to fail sooner and give you more opportunities to debug.

  status = esp8266.begin();
  if (status <= 0)
  {
    Serial.println(F("Unable to communicate with shield. Looping forever."));
    while(1) ;
  }

These empty infinite while loops aren't great for error conditions. Instead, use digitalWrite() and delay() with the onboard LED to generate flash codes inside of the while.

Maybe if I routinely cycle power before it happens (like put a timer on the electrical outlet that turns off power every week), I can at least keep it running

Pre this last resort, you could also enable the hardware watchdog on your Uno with a few small additions to your code. Better to fix any errors first if you can though.