Reading cattle EID tags

I also tried doing it like this, where I just cut out the code, make a new function getfulltag(), and paste the code into getfulltag() and call getfulltag() in the space where the original code was, also not successful.

void getfulltag()
    {
      while ((mastFile.available()) && (!fulltag))          // runs until file ends or full tagnumber found
         {                                                     
              eidtest[y] = mastFile.read();                                     
              if (eidtest[y] == 13)                    // get rid of all \n and \r within eid number
                eidtest[y] = mastFile.read();                          
              if (eidtest[y] == 10)    
                eidtest[y] = mastFile.read();          
              if (eidtest[y] == ',')      // reset EID number if ',' is found  
              {
                eidtest[0] = '\0';
                y = 0;
              }
              if (eidtest[y] == '!')      // reset EID number if '!' is found and ++ position of current eid record
              {
                eidtest[0] = '\0';
                y = 0;
                mastFileindex++;
              }              
              if (eidtest[y] == ' ')                          // get rid of spaces in ALLFLEX eid nos
                eidtest[y] = '\0';                               
              if (isAlphaNumeric(eidtest[y]))                 // if valid eid character, put into array
                y++;                  
              if (y == 15)
              {          
                eidtest[y] = '\0'; 
                fulltag = true;
              }                                      
         }
    }
       
    void assignExtraInfo()   // takes tag number from reader and checks it for match in mastfile.txt                              // runs each time a tag is read by the reader
    {
       eidtest[0] = '\0';
       vidtest[0] = '\0';
       hok[0] = '\0';
       draft = '0';
       fulltag = false;
       match = false;
       mastFileindex = 0;
       y = 0;
       File mastFile = SD.open(downFilename);    
       while ((mastFile.available()) && (!match))              // runs until eid match found between mastfile and read tag
       {         
         getfulltag();                       
         if (fulltag)
         {          
           if(strcmp(EID, eidtest) == 0)         // CHECK FULL TAGNUMBER IN MASTFILE AGAINST CURRENT EID 
           {
             mastFilepos = mastFile.position();
             eidtest[0] = '\0';
             match = true;            
             int z = 0;             
             byte dump = mastFile.read();        // remove comma after tag number            
             if (dump == 13)                    
               dump = mastFile.read();                          
             if (dump == 10)    
               dump = mastFile.read();           
             do                                  // IF MATCH, GIVE VALUES TO CURRENT EID'S VID, HOK, DRAFT
             {               
               vidtest[z] = mastFile.read();               
               if (vidtest[z] == 13)                    
                 vidtest[z] = mastFile.read();                          
               if (vidtest[z] == 10)    
                 vidtest[z] = mastFile.read();               
               z++;
             }   while (vidtest[z - 1] != ',');           
             vidtest[z - 1] = '\0';
             z = 0;                
             do           
             {
               hok[z] = mastFile.read();
               if (hok[z] == 13)                    
                 hok[z] = mastFile.read();                          
               if (hok[z] == 10)    
                 hok[z] = mastFile.read();               
               z++;
             }   while (hok[z - 1] != ',');
             hok[z - 1] = '\0';
             z = 0;                                         
             draft = mastFile.read();             
             if (draft == 13)                    
               draft = mastFile.read();                          
             if (draft == 10)    
               draft = mastFile.read();                           
             if (draft == 49)                                // if draft == 1
               digitalWrite(26, HIGH); 
             else  
               digitalWrite(26, LOW);   
             if ((hok[0] == 'B') && (hok[1] == 'W'))         // determine which group was counted
               BWint++;
             if ((hok[0] == 'L') && (hok[1] == 'H'))
               LHint++;
             if ((hok[0] == 'L') && (hok[1] == 'K'))
               LKint++;
             if ((hok[0] == 'S') && (hok[1] == 'H'))
               SHint++;
             if ((hok[0] == 'S') && (hok[1] == 'K'))
               SKint++;
             if ((hok[0] == 'S') && (hok[1] == 'L'))
               SLint++;
             if ((hok[0] == 'R') && (hok[1] == 'K'))
               RKint++;  
           }
           else
           {  
             y = 0;
             eidtest[y] = '\0';
             vidtest[0] = 63;
             vidtest[1] = 63;
             vidtest[2] = 63;
             vidtest[3] = 63;
             vidtest[4] = 63;
             vidtest[5] = '\0';
             hok[0] = 63;
             hok[1] = 63;
             hok[2] = '\0';
             draft = '0';
             fulltag = false;
           }  
         }
       }
       mastFile.close();                                 
    }

Your descriptions of what you do and what does not work are too vague to help.

Please post a copy of the masterfile and some "EIDs that were just read",
so I can get a better feeling for the problem, not for your solution.

In order to use a binary search, your records (or keys) have to be sorted and fixed length.
I doubt that your masterfile has these attributes.

Whether you use a binary search, hash or something else,
I think the additional data should be a header of the masterfile, so you have to open only one file.

The goal is to pull the masterfile record (or the notInMasterfile result) with minimal sector reads (SD).

Attached is the masterfile. It is sorted alphabetically, but the records are not of equal length. Some of the visual id's are one or two digits longer.

An example of an EID that is read: 982000409194255

There are many double entries in the masterfile, this is simply to populate the masterfile with over 1,000 entries. When the first match is found the search stops and the doubles are ignored. Once all 1,000 actual rfid are attached to our animals the real unique list of numbers will populate the file.

So as I understood it, to do a binary search all I had to do was to do a spot check in the middle of the file, to determine whether the "just read" EID is located before or after the EID read from the middle of the masterfile. If it is alphabetically before the middle of the file, do a spot check at the end of the first quarter of the file, and so on. if I do this 7 or 8 times I will be left with less than ten records to search, totalling the number of actual searches done to under 20. Which should be quick enough to let the system work.

In order to repeat this process 8 times though, I need to cut up my current function (assignExtraInfo() ) into two functions that I can call whenever needed, if I cannot do this it will result in a few hundred lines of repeated code.

So in short I think I can get away with the code I currently have to do a binary search... or am I missing it here with miles?

mastfile.txt (29.7 KB)

I could add spaces to the shorter visual ID records to give the lines equal lengths, if needed.

Your testdata is absolutly unrealisic and so nearly useless.

Why do you use an '!' as a record delimiter?

You could split the records in a fixed and a variable part (adds one offset per record),
or add a key offset pair for each record.

Best performance would be via a hash that maps the EID to a sorted list of EIDs + offsets.

Whandall:
Your testdata is absolutly unrealisic and so nearly useless.

Ok. What would realistic test data look like? It's works, just need to speed up the search.

Whandall:
Why do you use an '!' as a record delimiter?

I'm very new to programming, I had a hard time with not using something visible like '!'.

Whandall:
You could split the records in a fixed and a variable part (adds one offset per record),
or add a key offset pair for each record.

Best performance would be via a hash that maps the EID to a sorted list of EIDs + offsets.

Thanks but I have no idea what you mean.

If I could just get my function split up I'd be done with this project. So I guess my main question at this stage is: what happens with my SD.open(downFilename, FILE_READ); once another function is called from within the function where the SD file is opened from. Does the file close automatically? Does it remain open?

This is what the serial output looks like when I read tags. Index is the number of the record where a match was found, pos is the byte position in the file. It all checks out.

982000406761295,AR356,SH,0 - Index in mastfile: 1047 - Pos in mastfile: 30934
900194000002207,T028,SH,1 - Index in mastfile: 1027 - Pos in mastfile: 30353
982000406761314,AR380,SL,0 - Index in mastfile: 1053 - Pos in mastfile: 31114
982000409194255,X060,RK,0 - Index in mastfile: 1060 - Pos in mastfile: 31324
156111111111821,Y050,BW,0 - Index in mastfile: 0 - Pos in mastfile: 17
982000405739183,X079,LH,1 - Index in mastfile: 1040 - Pos in mastfile: 30730
982000409194272,Y024,LK,1 - Index in mastfile: 1067 - Pos in mastfile: 31527

Got my functions split up and got the binary search going. The database is currently being split up in four sections only, but the search time has already reduced from over 1,300 millis (when searching from top to bottom) to 530 millis at most when using this method. I will split up the sections further and I'm sure the search time will reduce to something around 100 millis which will be more than adequate. Thanks for putting me onto the right track.

void binSearch()   
    {
       eidtest[0] = '\0';
       vidtest[0] = '\0';
       hok[0] = '\0';
       draft = '0';
       fulltag = false;
       match = false;
       mastFileindex = 0;
       y = 0;
       mastFile = SD.open(downFilename, FILE_READ); 
       searchstart = millis(); 

       Serial.print(F("Checking at 1/2, mastfile seek: "));
       mastFile.seek(mastFile.size() / 2);                   
       Serial.println(mastFile.position());
       getfulltag();
       checkEIDpos();
       
       if (EIDalpha == 0)                             
         checkMatchandAssignData();
         
       if (EIDalpha == -1)                            
         {
          Serial.print(F("Checking at 1/4, mastfile seek: "));
          mastFile.seek(mastFile.size() / 4);               
          Serial.println(mastFile.position());
          getfulltag();
          checkEIDpos();
         
          if (EIDalpha == 0)
            checkMatchandAssignData();

          if (EIDalpha == -1)
            {
               Serial.print(F("EID in 0/8 - 2/8, mastfile seek: "));
               mastFile.seek(0);                       
               Serial.println(mastFile.position());
               while ((mastFile.position() < (mastFile.size() / 4)) && (!match))              
                {         
                   while ((mastFile.available()) && (!fulltag))                                                             
                     getfulltag();                                                    
                   if (fulltag)         
                     checkMatchandAssignData();  
                }               
            }

          else if (EIDalpha == 1)
            {
               Serial.print(F("EID in 2/8 - 4/8, mastfile seek: "));
               mastFile.seek(mastFile.size() / 4);     
               Serial.println(mastFile.position());
               while ((mastFile.position() < (mastFile.size() / 2)) && (!match))              
                {         
                   while ((mastFile.available()) && (!fulltag))                                                             
                     getfulltag();                                                    
                   if (fulltag)         
                     checkMatchandAssignData();  
                } 
            }
                   
         }
                  
       else if (EIDalpha == 1)                              
         {
           Serial.print(F("Checking at 3/4, mastfile seek: "));
           mastFile.seek(mastFile.size() / 4 * 3);          
           Serial.println(mastFile.position());
           getfulltag();
           checkEIDpos();
         
           if (EIDalpha == 0)
             checkMatchandAssignData();

           if (EIDalpha == -1)
             {
                Serial.print(F("EID in 4/8 - 6/8, mastfile seek: "));
                mastFile.seek(mastFile.size() / 2);     
                Serial.println(mastFile.position());
                while ((mastFile.position() < (mastFile.size() / 4 * 3)) && (!match))              
                {         
                   while ((mastFile.available()) && (!fulltag))                                                             
                     getfulltag();                                                    
                   if (fulltag)         
                     checkMatchandAssignData();  
                }                
             }
           else if (EIDalpha == 1)
             {
                Serial.print(F("EID in 6/8 - 8/8, mastfile seek: "));
                mastFile.seek(mastFile.size() / 4 * 3);     
                Serial.println(mastFile.position());
                while ((mastFile.available()) && (!match))              
                {         
                   while ((mastFile.available()) && (!fulltag))                                                             
                     getfulltag();                                                    
                   if (fulltag)         
                     checkMatchandAssignData();  
                }  
             }
         }   
       mastFile.close();                                 
    }

Serial output with all the debugging info looks like this:

Checking at 1/2, mastfile seek: 15772

982000405739183 > 826000009991838 : EID > eidtest of current sector
Checking at 3/4, mastfile seek: 23658

982000405739183 > 826000009991838 : EID > eidtest of current sector
EID in 6/8 - 8/8, mastfile seek: 23658

search time before match: 505
982000405739183,X079,LH,1 - Index in sector: 240 - Pos in mastfile: 30730
Checking at 1/2, mastfile seek: 15772

156111111111821 < 826000009991838 : EID < eidtest of current sector
Checking at 1/4, mastfile seek: 7886

156111111111821 < 826000009991838 : EID < eidtest of current sector
EID in 0/8 - 2/8, mastfile seek: 0

search time before match: 201
156111111111821,Y050,BW,0 - Index in sector: 1 - Pos in mastfile: 17
Checking at 1/2, mastfile seek: 15772

982000409194255 > 826000009991838 : EID > eidtest of current sector
Checking at 3/4, mastfile seek: 23658

982000409194255 > 826000009991838 : EID > eidtest of current sector
EID in 6/8 - 8/8, mastfile seek: 23658

search time before match: 530
982000409194255,X060,RK,0 - Index in sector: 260 - Pos in mastfile: 31324

Another search method that uses only add/subtract, no divides, runs really fast, but requires your table to be a power of 2 in size. You would have to pad the table to be the next power of 2 in size.

Paul

Thanks @Paul_KD7HB. Could you give me a simple example of this?

Binary search done, split up the file of 1,000 odd records into 8 parts. Search time is now under 200ms, as opposed to the over 1,300ms using liniear searching. Very pleased right now. My project is days away from commissioning. Thanks to all who have helped, and to @Seedler for paving the way.

147 millis
982000406761295,AR356,SH,0
153 millis
982000406761314,AR380,SL,0
7 millis
156111111111821,Y050,BW,0
140 millis
982000405739183,X079,LH,1
172 millis
982000409194272,Y024,LK,1
7 millis
156111111111821,Y050,BW,0
164 millis
982000409194255,X060,RK,0
130 millis
982000405739175,Y042,LH,0

You could cache the decision point EIDs and their positons in the masterfile and go directly to the linear search.

Whandall:
You could cache the decision point EIDs and their positons in the masterfile and go directly to the linear search.

The contents of the masterfile changes regularly, sometimes daily. It gets uploaded from my pc to the ftp server automatically and the arduino downloads the file automatically every day. So decision point EID's and their positions will change regularly.

As I have it now it works great. The sheep will have a hard time running past my reader at a rate of 6 per second. The ones that can deserve not to be counted.

heinburgh:
The contents of the masterfile changes regularly, sometimes daily. It gets uploaded from my pc to the ftp server automatically and the arduino downloads the file automatically every day. So decision point EID's and their positions will change regularly.

That is the difference between constants and cached values. :wink:

You could generate some index as part of the masterfile, read that on startup and store it to EEPROM,
or you could generate the index once on startup from an unchanged masterfile.

To be fast, you have to touch and read a minimal number of blocks.
searching for the begin of an entry for each decision point is only necessary once, not on each lookup.

You could store each decision point as it is found in the normal process, so each point will only be searched and accessed once.

Whandall:
That is the difference between constants and cached values. :wink:

You could generate some index as part of the masterfile, read that on startup and store it to EEPROM,
or you could generate the index once on startup from an unchanged masterfile.

To be fast, you have to touch and read a minimal number of blocks.
searching for the begin of an entry for each decision point is only necessary once, not on each lookup.

You could store each decision point as it is found in the normal process, so each point will only be searched and accessed once.

Aaaah. Got it. Once the system is up and running I will take the time to fine tune a second identical unit. I will take time to incorporate improvements like these. There are a few other things I need to clean up.

heinburgh:
Thanks @Paul_KD7HB. Could you give me a simple example of this?

I cannot give you a program code to do this. But here is it's history and how it works.

I was programming for a bank service bureau in the 1960's. One program read checks on a check reader/sorter and searched a table of account numbers to see if that check needed to sent to a particular pocket to be returned to the customer. Over time the number of bank accounts grew until the searching of the table took longer than the time allowed to select a pocket for the check.

I was chatting with the head of our mini-computer division and mentioned the time /table problem. He described their solution to the big table problem on a mini-computer that could only add and subtract. I programmed his solution, the BCD table search, and it solved the problem.

Years later, in programming a check reader/sorter with a built-in Motorola 6805 micro-controller, I ran into the same time problem. that processor was 16 bit internal, but had only an 8-bit memory buss, so was really slow. I programmed the BCD table search in assembly and worked great.

Later, the check reader/sorters were built with 68010 and then 68030 processors and for other reasons, went to a true binary search. Time was not a constraint anymore.

The idea is to set up a table area larger than the largest table you will use, but the size must be a power of 2. The size being 512, or 1024, or 2048, or 4096, or however big you need. When your program loads the table, beginning at 0, the remaining entries at the end must be set to a higher value than any real entries. We used all 9s.

Then set a fixed table of powers of 2. 0,2, 4, 8, 16, 32, 64, 128, 256, etc. Up to one step smaller than your table.

In the search, compare your search key to the table entry indexed by the last power table entry. If equal, you are done. If the search key is less than the indexed table entry, subtract the next lower power of 2 table entry from the table index.

If the search key is greater than the indexed table entry, add the next lower power of 2 table entry to the table index.

Continue that process until the power of 2 entry is zero. By then you will have either found a matching entry in the data table or know there is no matching entry.

A comment on your binary searching speed. If you will use shift-right commands, rather than divides, you will speed up the processing quite a bit. One shift right is divide by 2. Two bit shift right is divide by 4, etc.

Paul

Thanks for the info Paul. I have always been facsinated by old school hardware and what you guys were able to do with it. I'll be looking into these methods when time allows.

I changed my divides for right bitshift, and when I ran the program the millis to find the entries remained the same, give or take one ms. What I did was (mastFile.size() >> 2) for instance, instead of (mastFile.size() / 4). The program compiled and the search conducted ok, but with same time usage. Does it matter if I do it this way as opposed to giving the value of mastFile.size() to an array first, and then bitshifting the array?