Mega 2560 EEPROM errors

I've noticed on one Mega that some EEPROM addresses are losing values and resetting themselves back to zeros.
I use 2 or 4 addresses to split up long numbers from water meters.
Nothing has changed in software and a duplicate system works fine.
Values are nowhere close to overflowing or anything like that.
Typically today, two addresses that had 1 and 140 yesterday, are now 0 and 1 (1 is a typical day's value).
Data writing is a couple of values each day and a couple of reads, so nothing like close to the write limit.
No other addresses are affected.
Off the top of my head, addresses will be somewhere in the 0 to 99 range.
Has anyone come across this problem?
I'm tempted to write the data to some unused addresses while keeping the originals going.

Please post an example program that shows how you are writing to and reading from EEPROM

Thanks Bob
It’sa huge sketch with 20+ tabs.
The read and write are unchanged over several years.
The identical sketch on a backup system with far more reads and writes works fine as do other reads and writes on the misbehaving Mega.
I count pulses from gas, electricity and water meters. The process is the same for all.
Read 4 addresses with one byte each; reconstruct the count as a long number; add the new count; deconstruct the number into 4 bytes and write them back.
The electricity meter counts Whr, so lots of counts, gas comes next, then water with one or two counts a day, so not exactly working hard. Water counts are so low that I don’t use 4 bytes, only 2

The point is that nothing has changed.

All the other gubbins in in this tab (the function) are to do with reading packets from Xbees and a bit about reading an RTC.

The function is called in the main loop when the Xbee with the right ID calls in

It’s the second time it’s happened - this time it got to 14 counts, then it looks as though the two bytes reset to zeros.

I think the EEPROM is defective, but stand to be corrected.

void i_WATER_METER()//added "i_"
{//void bracket
        if(lengthByteLSB==22&&calculatedID==water_meter)//was ID
        {
            timeLast_WM=timeNow_WM;//added 13may2017
            timeNow_WM=millis()/1000;//was millis()/1000+3 ??? why? +3//added 13may2017
            timeElapsed_WM=timeNow_WM-timeLast_WM;//added 13may2017
            Serial.print(F("E/T, sec"));Serial.print(F("\t"));Serial.print(timeElapsed_WM);Serial.print(F("  "));//removed "ln"  //added 13may2017
            digitalClockDisplay();//added02nov2017
/*
             Serial.print(hour());
  printDigits(minute());
  printDigits(second());
  Serial.print(" ");
  Serial.print(day());
  Serial.print(" ");
  Serial.print(month());
  Serial.print(" ");
  Serial.print(year()); 
  Serial.print(); //removed ln 02nov2017

       */   
        //Serial.print(F("water meter..."));
        Serial.print(F("LONG"));Serial.print(F("\t"));
              for(int i = 1; i < 8; i++)//was <6, now <8//21jun2016
              {
                byte skip = Serial1.read();
              }
            
                uint16_t digMSB = Serial1.read();//CHANGED FROM INT 03MAR2018
                uint16_t digLSB = Serial1.read();//CHANGED FROM INT 03MAR2018
     
                if(digLSB==0x02)
                {
                  Serial.print(F("**NO COUNT**"));//Serial.print(F("\t"));//removed 09jun2016
                }

                else if (digLSB==0x00)
                {

                  water_meter_count=(EEPROM.read(1)*256 + EEPROM.read(2));
                  water_meter_count=water_meter_count+1;
                  Serial.print (F("**COUNTED**"));Serial.print(F("\t"));
                 

                  
                }
                int water_meter_count_loByte = lowByte(water_meter_count);
                int water_meter_count_hiByte = highByte(water_meter_count);
                EEPROM.write(1,water_meter_count_hiByte);//water_meter_count_loByte//swapped 24mar2016
                EEPROM.write(2,water_meter_count_loByte);//water_meter_count_hiByte//swapped 24mar2016
    
                uint16_t WM_BAT_MSB=Serial1.read();//CHANGED FROM INT 03MAR2018
                uint16_t WM_BAT_LSB=Serial1.read();//CHANGED FROM INT 03MAR2018
                uint16_t WM_TEMP_MSB=Serial1.read();//CHANGED FROM INT 03MAR2018
                uint16_t WM_TEMP_LSB=Serial1.read();//CHANGED FROM INT 03MAR2018
                //Serial.print(F("data"));Serial.print(F("\t"));
                Serial.print(F("\t"));
                Serial.print(WM_BAT_MSB);Serial.print(F("\t"));
                Serial.print(WM_BAT_LSB);Serial.print(F("\t"));
                Serial.print(WM_TEMP_MSB);Serial.print(F("\t"));
                Serial.print(WM_TEMP_LSB);Serial.print(F("\t"));

                EEPROM.write(7,WM_BAT_MSB);//NEW 23APR2016
                EEPROM.write(8,WM_BAT_LSB);//NEW 23APR2016
    
                water_meter_BATTERY = (WM_BAT_MSB*256 +WM_BAT_LSB )*3.00*1.20/1000.00;//water_meter_ANAL_1_MSB//water_meter_ANAL_2_LSB
                //water_meter_TEMP = (Serial1.read()*256 + Serial1.read());//water_meter_ANAL_2_MSB//water_meter_ANAL_2_LSB
                //Serial.print(Serial1.read());
                
                water_meter_TEMP = (( WM_TEMP_MSB*256 + WM_TEMP_LSB)*1200.00)/1023.00/10.00;//*1200.00/1023.00/10.00;//LM35//water_meter_ANAL_2_MSB//water_meter_ANAL_2_LSBSerial.print(Serial1.read());//WM_TEMP_MSB*256 +
                
                Serial.print(F("WM_TEMP"));Serial.print(F("\t"));Serial.print(water_meter_TEMP);Serial.print(F("\t"));
                Serial.print(F("WM_BATTERY"));Serial.print(F("\t"));Serial.print(water_meter_BATTERY);Serial.print(F("\t"));
                Serial.print(F("WM_COUNT"));Serial.print(F("\t"));Serial.print(water_meter_count);Serial.print(F("\t\t"));//added one more tab 01apr2016//removed 08apr2016//Serial.print(F("\t"));//added \t (tab) 8thjun2016

                Serial.print(F("HEADER"));Serial.print(F(","));Serial.print(ID,HEX);Serial.print(F(","));Serial.print(digMSB,HEX);Serial.print(F(","));Serial.print(digLSB,HEX);Serial.print(F(","));//was MSB MSB, changed to MSB LSB 24aug2016
                Serial.print(WM_BAT_MSB,HEX);Serial.print(F(","));Serial.print(WM_BAT_LSB,HEX);Serial.print(F(","));Serial.print(WM_TEMP_MSB,HEX);Serial.print(F(","));Serial.print(WM_TEMP_LSB,HEX);Serial.print(F(","));
                Serial.print(water_meter_TEMP);Serial.print(F(","));Serial.print(water_meter_BATTERY);Serial.print(F(","));Serial.print(water_meter_count);Serial.println(F(","));
                          
                        // /* for(int i = 1; i < 7; i++)
                          //{
                          //Serial.print(Serial1.read());
                          //Serial.print(F(";"));
                           //byte skip = Serial1.read();
                       //   }// commented out
        }//end of water meter section - first if

}//void

The electricity meter counts Whr, so lots of counts,
...
...
I think the EEPROM is defective, but stand to be corrected.

An eeprom cell (address) can be written 100,000 times. To check that, move the eeprom address a bit up (e.g. 32, no idea how many addresses you use); if the problem disappears, you know what caused it.

Instead if using EEPROM.write, use EEPROM.update to write a byte; it will only write if the value changed.

PS
And you can make life a little easier if you use EEPROM.put and EEPROM.get to save / read an integer in one go.

Thanks sterretje for the info.
I was aware of the 100,000 writes, but I don't think it has anything like that, even accidentally.
Again, I was aware of the gets, puts and update, but things move slowly here and still on the old EEPROM.
Update looks quite interesting so I will give it a go
I will shift the addresses right up into known unused addresses and keep the original going as well.
It's puzzling me why it's just that Mega, and just that count.
I'll also set up some dummy counts on the suspect addresses to see if that triggers a reset - I think it has always been less than 1000 counts.
Cheers

The electricity meter counts Whr, so lots of counts

How many per day?

Hello sterretje

The electricity meter has an LED on it that flashes every Whr.
A counter (Nano) local to the meter displays 24-hr Whr counts that are typically 18000 (18kWhr).
The Nano divides the raw Whr count by 10, so 1800 per day.
The divide by 10 count is transmitted by XBee to the Megas that coordinate everything.
That count is updated to the EEPROM spread over 4 bytes (addresses).

At this point, I start to get a bad feeling.

1800 writes a day, round up to 2000, 100,000 divided by 2000 is 50-days

The Mega without the problem has been running for 281 days and my records show that it has clocked up just over 409,000 counts/EEPROM writes which is 4x the specified limit.

The Mega with the problem is newer and has only run for less than a month the problem is with water meter counts that might be 1 or 2 counts per day, so about 40 counts/writes, nowhere near 100,000 writes.

I know 409,000 count is correct because I check it against the actual meter count, and any errors would soon show up.

So, I conclude

  1. I have significantly pushed the EEPROM write limit
  2. Another Mega has developed EEPROM problems early in it's life
  3. I need to do something to reduce the number of writes or find a better memory (FRAM maybe?)

I'm guessing, but EEPROM memory can't be without faults.

What I will do is look at your update EEPROM suggestion and will set up some new addresses for the water meter to see what happens.

I don't believe it's software related as I use the same simple read, increment write sequence for all the counts, so 5 channels (3 meters on 2 Megas) work and one doesn't.

The Mega2560 datasheet confirms 100,000 writes, but with 4k bytes of EEPROM there should be scope for regular changes of addresses before the 100,000 limit

Here is an item (one of many) on EEPROM lifetime.

Thanks again for your input