It is much easier and leaner to just follow the first word that matches the input chars until one does not and then the magic numbers tell me which to check or not, if the match process continues or no match is possible.
Here is the code for when an input char != list word char. It looks for the next lowest in alpha-order word that so-far matches all of the so-far input chars. A no-match ends searching except for the word end.
else // find an alternate match or no_match
{
current++;
// we have chars matched and how many chars of the current word the next word matches
// if that number is smaller than already matched chars then no_match
// if that number is bigger then try the next list word
// if that number is the same then try to match the next char to text,
// ---- if they match then new current word and charsMatched++
// ---- else try the next list word
while ( current < keys )
{
charToMatch = pgm_read_byte( charsMatchingPrevWord + current );
if ( charToMatch < charsMatched )
{
matchState = no_match;
break; // break from the while loop
}
else if ( charToMatch > charsMatched )
{
current++;
}
else // check this one
{
curPtr = (PGM_P)pgm_read_word( key_list + current );
charToMatch = pgm_read_byte( curPtr + charsMatched );
if ( text == charToMatch )
{
charsMatched++;
charToMatch = pgm_read_byte( curPtr + charsMatched );
if ( charToMatch == 0 )
{
matchState = list_matched;
}
break; // break from the while loop
}
else
{
current++;
}
}
}
if ( current >= keys )
{
matchState = no_match; // no_match
}
}
break;
Here is the complete old speed test that comes from:
// serial_word_match_speed_test ver 1.0 Feb 11, 2019 by GoForSmoke on the Arduino.cc forum.
/*
User defines a PROGMEM array to hold a sorted word list
1-pass match/no_match keywords and evaluate numbers in text stream/data.
*/
void usage()
{
Serial.println( F( "\nserial_word_match" ));
}
const int keys = 32; // put this on a 1284P, there could be > 256 words in the list
enum keywd { boffms, bonms, bpin, bstrtms, bstat, hlp, prnt, rst, st, strt, stp };
// the keys must be in alpha order, all start with lowercase alpha
const char key000[] PROGMEM = "blinkOffMs";
const char key001[] PROGMEM = "blinkOnMs";
const char key002[] PROGMEM = "blinkPin";
const char key003[] PROGMEM = "blinkStartMs";
const char key004[] PROGMEM = "blinkState";
const char key005[] PROGMEM = "help";
const char key006[] PROGMEM = "memccpy";
const char key007[] PROGMEM = "memchr";
const char key008[] PROGMEM = "memcmp";
const char key009[] PROGMEM = "memcpy";
const char key010[] PROGMEM = "memmem";
const char key011[] PROGMEM = "memmove";
const char key012[] PROGMEM = "memrchr";
const char key013[] PROGMEM = "memset";
const char key014[] PROGMEM = "print";
const char key015[] PROGMEM = "reset";
const char key016[] PROGMEM = "set";
const char key017[] PROGMEM = "start";
const char key018[] PROGMEM = "stop";
const char key019[] PROGMEM = "strcat";
const char key020[] PROGMEM = "strchr";
const char key021[] PROGMEM = "strcmp";
const char key022[] PROGMEM = "strcpy";
const char key023[] PROGMEM = "strlen";
const char key024[] PROGMEM = "strlwr";
const char key025[] PROGMEM = "strstr";
const char key026[] PROGMEM = "strupr";
const char key027[] PROGMEM = "time";
const char key028[] PROGMEM = "wait";
const char key029[] PROGMEM = "wander";
const char key030[] PROGMEM = "watch";
const char key031[] PROGMEM = "win";
const byte charsMatchingPrevWord[ keys ] PROGMEM =
// 0-4, 5-9, 10-14, 15-19 20-24 25-29 31-32
{ 0, 6, 5, 5, 8, 0, 0, 4, 4, 4, 3, 4, 3, 3, 0, 0, 0, 1, 2, 2, 4, 4, 4, 3, 3, 3, 3, 0, 0, 1, 2, 1 };
const int a_z_starts[ 26 ] PROGMEM =
// a,b, c, d, e, f, g,h, i, j, k, l,m, n, o, p, q, r, s, t, u, v, w, x, y, z
{ -1, 0, -1, -1, -1, -1, -1, 5, -1, -1, -1, -1, 6, -1, -1, 14, -1, 15, 16, 27, -1, -1, 29, -1, -1, -1 }; // a-z
PGM_P const key_list[ keys ] PROGMEM =
{ // these are pointers to the words in the list
key000, key001, key002, key003, key004, key005, key006, key007,
key008, key009, key010, key011, key012, key013, key014, key015,
key016, key017, key018, key019, key020, key021, key022, key023,
key024, key025, key026, key027, key028, key029, key030, key031
};
char text;
int current = -1; // current match word index, may be -1 for none
PGM_P curPtr; // pointer into PROGMEM to get word list chars
byte charsMatched, charToMatch; // no key words over 255 chars....
byte matchState; // match_start, matching, number, no_match, list_matched, finished_match
byte maxDigits = 5;
unsigned long ulAccum;
enum matchType { match_start = 0, matching, number, no_match, list_matched, finished_match };
void listWords()
{
int i;
byte j;
long listLen = 0;
Serial.println( "\n\n\n\n\n\n\n\nword list" );
for ( i = 0; i < keys; i++ )
{
Serial.print( "key " );
Serial.print( i );
Serial.print( " - " );
curPtr = (PGM_P)pgm_read_word( key_list + i );
j = 0;
while (( charToMatch = pgm_read_byte( curPtr + j++ )) > 0 )
{
listLen++;
Serial.write( charToMatch );
}
Serial.println();
}
Serial.print( "\nTotal list characters = " );
Serial.println( listLen );
Serial.println( "\na - z starts" );
for ( j = 0; j < 26; j++ )
{
i = pgm_read_word( a_z_starts + j );
if ( i >= 0 )
{
Serial.write( 'a' + j );
Serial.print( " " );
Serial.println( i );
}
}
Serial.println( "\n\nchars that match previous word chars" );
for ( i = 0; i < keys; i++ )
{
j = pgm_read_byte( charsMatchingPrevWord + i );
Serial.print( "key " );
Serial.print( i );
Serial.print( " - " );
Serial.println( j, DEC );
}
}
/*
need int indexes to current and alternate words and byte chars matched offset.
states - match_start, matching, number, no_match, list_matched, finished_match
*/
void match() // uses globals, new char is in char text
{
if ( matchState == finished_match ) matchState = match_start;
switch ( matchState ) // match_start, matching, number, no_match, list_matched, finished_match
{
case match_start :
if ( text >= 'a' && text <= 'z' )
{
current = int( pgm_read_word( a_z_starts + ( text - 'a' ) ));
if ( current < 0 ) // no list word starts with the text letter
{
matchState = no_match; // no_match
}
else
{
matchState = matching;
charsMatched = 1;
}
}
else
{
matchState = no_match; // no_match
}
break;
case matching : // matching
case list_matched :
if ( text == ' ' || text == '\n' )
{
if ( matchState == list_matched ) matchState = finished_match;
else if ( text == '\n' ) matchState = match_start;
else matchState = no_match;
break; // exit the case
}
curPtr = (PGM_P)pgm_read_word( key_list + current );
charToMatch = pgm_read_byte( curPtr + charsMatched );
if ( text == charToMatch ) // matches the list word so far
{
charsMatched++;
charToMatch = pgm_read_byte( curPtr + charsMatched );
if ( charToMatch == 0 )
{
matchState = list_matched; // matched a list word
}
}
else // find an alternate match or no_match
{
current++;
// we have chars matched and how many chars of the current word the next word matches
// if that number is smaller than already matched chars then no_match
// if that number is bigger then try the next list word
// if that number is the same then try to match the next char to text,
// ---- if they match then new current word and charsMatched++
// ---- else try the next list word
while ( current < keys )
{
charToMatch = pgm_read_byte( charsMatchingPrevWord + current );
if ( charToMatch < charsMatched )
{
matchState = no_match;
break; // break from the while loop
}
else if ( charToMatch > charsMatched )
{
current++;
}
else // check this one
{
curPtr = (PGM_P)pgm_read_word( key_list + current );
charToMatch = pgm_read_byte( curPtr + charsMatched );
if ( text == charToMatch )
{
charsMatched++;
charToMatch = pgm_read_byte( curPtr + charsMatched );
if ( charToMatch == 0 )
{
matchState = list_matched;
}
break; // break from the while loop
}
else
{
current++;
}
}
}
if ( current >= keys )
{
matchState = no_match; // no_match
}
}
break;
case no_match :
if ( text == '\n' ) // reads and discards serial until newline
{
current = -1;
charsMatched = 0;
matchState = match_start;
Serial.println( "\n error\n" );
}
break;
case number :
if ( text >= '0' && text <= '9' )
{
if ( charsMatched++ < maxDigits )
{
ulAccum *= 10UL;
ulAccum += byte( text - '0' );
}
else matchState = no_match; // exceeded max length
}
else if ( text == ' ' || text == '\n' )
{
matchState = finished_match;
}
break;
}
}
void hitAny()
{
while ( Serial.available() < 1 );
while ( Serial.available() > 0 )
{
Serial.read();
delay( 250 ); // in case autorepeat is held down
}
}
void speedTest( int count )
{
static int i;
static byte j;
while ( count-- > 0 )
{
for ( i = 0; i < keys; i++ )
{
curPtr = (PGM_P)pgm_read_word( key_list + i );
j = 0;
while (( text = pgm_read_byte( curPtr + j++ )) > 0 )
{
match();
if ( matchState == no_match )
{
Serial.print( "test no_match fail at count " );
Serial.print( count, DEC );
Serial.print( " word " );
Serial.print( i );
Serial.print( " char " );
Serial.println( j );
return;
}
}
text = '\n';
match();
if ( matchState != finished_match )
{
Serial.print( "test state " );
Serial.print( matchState );
Serial.print( " fail at count " );
Serial.print( count, DEC );
Serial.print( " word " );
Serial.print( i );
Serial.print( " char " );
Serial.println( j );
return;
}
}
}
return;
}
//===========================================================
void setup()
{
Serial.begin( 115200 );
Serial.flush();
// listWords();
// hitAny();
usage();
unsigned long startms = millis();
speedTest( 1000 );
if ( matchState == finished_match )
{
startms = millis() - startms; // startms now has elapsed millis
Serial.print( "\n millis = " );
Serial.println( startms );
}
}
void loop()
{
}
I don’t want to deal with more of the data than I need to. Enough to match or no-match each char as it arrives. If it doesn’t match the list word currently being followed, it’s time to find the next word that can using the precompiled “how many chars of the current word the next word matches” ( pairs with next word).
use min/max indices to track the range of words that still match the characters received so far
The magic number coupled with chars-already-matched is my case by case guide to that with no more prep-work needed but I can see how ‘the magic’ could make that very quick work since the first word with magic number < chars-matched cannot match nor can any word after that.
In a sorted list with many words starting with the same base, the next word from each will have the most matching start chars. Past that, the data makes the pattern and the number of chars already matched limits next match.. is inside of that min-max already.
But you are welcome to upgrade the code. The last member to try left the precompiled data out which did pass the positive test I used for speed (worse-casing by forcing all-chars-matched with correct words, no-matches use way fewer cycles) so his passed false matches too. Except for that his hex file was a few bytes smaller than my spaghetti-hack and a tiny bit faster especially giving wrong answers. I got the feeling that making it work right would have resulted in bigger slower code than my mess, otherwise it would have been posted.