Detect byte pattern as terminating pattern for incoming bytes on UART

I would like to detect a certain byte pattern in an incoming byte stream at the UART. I am using Arduino. Currently, the code detects a single \r character. On this character is detected, the byte stream before this character gets stored into a buffer. This is easy. This is my code;

int incomingByte = 0; // for incoming serial data

void setup() {
    Serial.begin(9600);     // opens serial port, sets data rate to 9600 bps
}

void loop() {

    // send data only when you receive data:
    if (Serial.available() > 0 ) {
        // read the incoming byte:
        incomingByte = Serial.read();
        if (incomingByte != '\r')
        {
            buffer_incoming_data[index_buffer]=incomingByte;
            index_buffer++;
        }
        else
        {
            index_buffer = 0;
        }
    }
}

Here is my problem. Instead of a single character \r, I would like to detect a byte pattern that looks like this 0xAA 0xBB 0xCC. When this byte pattern is detected, the byte stream before byte pattern gets stored into a buffer.

Store all input characters in a buffer. Every time a character is stored check whether the buffer ends in one of the strings “0xAA”, 0xBB" or “0xCC”. If it does, write a null terminator to the buffer before the termination string.

The following was tested by typing “test0xAA”, “test0xBB” and “test0xCC” into the serial monitor and it worked, check it out.

int incomingByte = 0;   // for incoming serial data
char buffer_incoming_data[100];
int index_buffer = 0;
const byte numEndStrings = 3;
char *endStrings[] = {
  "0xAA", "0xBB",  "0xCC" };

// endOfInput
//
// Return true if input buffer ends in a termination string
//
boolean endOfInput()
{
  boolean retVal = false;

  // while more strings
  for(int s=0; s < numEndStrings; s++)
  {
    // if string is long enough to contain end stirng
    if( strlen(buffer_incoming_data) >= strlen(endStrings[s]) )
    {
        // if input string ends in one of the terminate strings
        if( !strcmp(endStrings[s], buffer_incoming_data+strlen(buffer_incoming_data)-strlen(endStrings[s])) )
        {
          
          // terminate input string
          *(buffer_incoming_data+strlen(buffer_incoming_data)-strlen(endStrings[s])) = '\0';
          
          // set ruturn value
          retVal = true;
          
          // stop looking
          break;
          
        } // if

    } // if 

  }

  return retVal;

}

void setup() {
  Serial.begin(9600);     // opens serial port, sets data rate to 9600 bps

}



void loop() {

  // send data only when you receive data:
  if (Serial.available() > 0 ) {
    // read the incoming byte:
    incomingByte = Serial.read();

    // add character to buffer 
    buffer_incoming_data[index_buffer++]=incomingByte;
    buffer_incoming_data[index_buffer]= '\0';
    
    // if we got a terminate string
    if(  endOfInput() ) {
      
      // print to serial
      Serial.println(buffer_incoming_data);
      
      // reset buffer
      index_buffer = 0;
      *buffer_incoming_data = '\0';
      
    } // if
  }
}

You could use two ring buffers.

Here's an example of a ring buffer implementation.

The first buffer would be three bytes long. The second buffer would be a long as you could possibly need it to be. Every byte from Serial.read() is added to the end of the first ring buffer (using put() ).

If the first ring buffer is full ( is_full() ), check it's contents ( using peek(0), peek(1), peek(2) ). If it does not contain 0xAA 0xBB 0xCC, the first byte from the first ring buffer is removed ( get() ) and added to end of the second ring buffer ( put() ).

If the first ring buffer does contain 0xAA 0xBB 0xCC, empty it (empty() ). The second ring buffer contains the text you're after so process it as required.

Hopefully that's helpful and not too confusing.

You can use a Finite-State Machine to watch for the multi-byte end-of-message. There are no extra buffers, and it only checks each received character once.

That is, it doesn’t do a string compare on each character plus 4 string lengths, an O(4*N2) behavior. :stuck_out_tongue: Okay, that was a little Math-y. Let’s just say this is really efficient:

//  Example to show how to use a Finite-State Machine (FSM) 
//    to watch for a specific byte sequence as a message trailer.

enum state_t { ACCUM_STATE, BB_STATE, CC_STATE }; // list the states
static state_t state = ACCUM_STATE;  // the FSM "state" variable

static const uint8_t _AA = 'A'; // replace with 0xAA or whatever
static const uint8_t _BB = 'B'; //
static const uint8_t _CC = 'C'; //

static char buffer[32];
static uint8_t buffer_index = 0;

// Helper function to save a char, if there's room.
static void saveChar( char c )
{
  if (buffer_index < sizeof(buffer))
    buffer[ buffer_index ] = c;
  buffer_index++;
}

void setup() {
    Serial.begin(9600);     // opens serial port, sets data rate to 9600 bps
}

void loop() {

    if (Serial.available() > 0 ) {

        // read the incoming byte:
        uint8_t c = Serial.read();

        // The FSM branches to the current state first, then checks the received character

        switch (state) {
          case ACCUM_STATE:
            if (c == _AA)
              state = BB_STATE;
            else
              saveChar( c );
            break;

          case BB_STATE:
            // We got the 0xAA, did we get the 0xBB?
            if (c == _BB)
              state = CC_STATE;
            else {
              //  Ooops! Didn't get a 0xCC, save what we did get.
              saveChar( _AA );
              saveChar( c );
              state = ACCUM_STATE;
            }
            break;

          case CC_STATE:
            // We got the 0xAA and 0xBB, did we get the 0xCC?
            if (c == _CC) {

              // Yes!
              Serial.print( buffer_index );
              Serial.println( F(" bytes received!") );

              // Do whatever you need to with it *here*.  I'll print out the bytes:

              for (int i=0; (i < buffer_index) && (i < sizeof(buffer)); i++)
                Serial.print( buffer[i] );
              Serial.println();

              // You *could* NUL-terminate it to make it a C string
              if (buffer_index >= sizeof(buffer)) // safely!
                buffer_index = sizeof(buffer)-1;
              buffer[ buffer_index ] = '\0';
              
              // Mark the buffer as empty for the next message...
              buffer_index = 0;

            } else {
              //  Ooops! Didn't get a 0xCC, save what we did get.
              saveChar( _AA );
              saveChar( _BB );
              saveChar( c );
            }
            state = ACCUM_STATE;  // Go back to accumulating
            break;
        }
    }
}

Seems like this topic has been coming up a lot, recently. :slight_smile:

Cheers,
/dev

/dev:
You can use a Finite-State Machine to watch for the multi-byte end-of-message. There are no extra buffers, and it only checks each received character once.

That is, it doesn’t do a string compare on each character plus 4 string lengths, an O(4*N2) behavior. :stuck_out_tongue: Okay, that was a little Math-y. Let’s just say this is really efficient:

//  Example to show how to use a Finite-State Machine (FSM) 

//    to watch for a specific byte sequence as a message trailer.

enum state_t { ACCUM_STATE, BB_STATE, CC_STATE }; // list the states
static state_t state = ACCUM_STATE;  // the FSM “state” variable

static const uint8_t _AA = ‘A’; // replace with 0xAA or whatever
static const uint8_t _BB = ‘B’; //
static const uint8_t _CC = ‘C’; //

static char buffer[32];
static uint8_t buffer_index = 0;

// Helper function to save a char, if there’s room.
static void saveChar( char c )
{
  if (buffer_index < sizeof(buffer))
    buffer[ buffer_index ] = c;
  buffer_index++;
}

void setup() {
    Serial.begin(9600);    // opens serial port, sets data rate to 9600 bps
}

void loop() {

if (Serial.available() > 0 ) {

// read the incoming byte:
        uint8_t c = Serial.read();

// The FSM branches to the current state first, then checks the received character

switch (state) {
          case ACCUM_STATE:
            if (c == _AA)
              state = BB_STATE;
            else
              saveChar( c );
            break;

case BB_STATE:
            // We got the 0xAA, did we get the 0xBB?
            if (c == _BB)
              state = CC_STATE;
            else {
              //  Ooops! Didn’t get a 0xCC, save what we did get.
              saveChar( _AA );
              saveChar( c );
              state = ACCUM_STATE;
            }
            break;

case CC_STATE:
            // We got the 0xAA and 0xBB, did we get the 0xCC?
            if (c == _CC) {

// Yes!
              Serial.print( buffer_index );
              Serial.println( F(" bytes received!") );

// Do whatever you need to with it here.  I’ll print out the bytes:

for (int i=0; (i < buffer_index) && (i < sizeof(buffer)); i++)
                Serial.print( buffer[i] );
              Serial.println();

// You could NUL-terminate it to make it a C string
              if (buffer_index >= sizeof(buffer)) // safely!
                buffer_index = sizeof(buffer)-1;
              buffer[ buffer_index ] = ‘\0’;
             
              // Mark the buffer as empty for the next message…
              buffer_index = 0;

} else {
              //  Ooops! Didn’t get a 0xCC, save what we did get.
              saveChar( _AA );
              saveChar( _BB );
              saveChar( c );
            }
            state = ACCUM_STATE;  // Go back to accumulating
            break;
        }
    }
}




Seems like this topic has been coming up a lot, recently. :)

Cheers,
/dev

That’s fantastic! Thanks for posting this, I learned something today.

Camel:

void loop() {

if (Serial.available() > 0 ) {

// read the incoming byte:
        uint8_t c = Serial.read();

// The FSM branches to the current state first, then checks the received character

switch (state) {
          case ACCUM_STATE:
            if (c == _AA)
              state = BB_STATE;
            else
              saveChar( c );
            break;

case BB_STATE:
            // We got the 0xAA, did we get the 0xBB?
            if (c == _BB)
              state = CC_STATE;
            else {
              //  Ooops! Didn't get a 0xCC, save what we did get.
              saveChar( _AA );
              saveChar( c );
              state = ACCUM_STATE;
            }
            break;

case CC_STATE:
            // We got the 0xAA and 0xBB, did we get the 0xCC?
            if (c == _CC) {

// SNIP!

} else {
              //  Ooops! Didn't get a 0xCC, save what we did get.
              saveChar( _AA );
              saveChar( _BB );
              saveChar( c );
            }
            state = ACCUM_STATE;  // Go back to accumulating
            break;
        }
    }
}




Seems like this topic has been coming up a lot, recently. :)

This will fail on input AA AA BB CC. After the first AA, it will go to BB state. In BB state, it will get another AA and swallow that without coping with the possibility that it might be the start of a new sequence.

The finite-state-machine solution is a good solution, it's just that your finite state machine is wrong :frowning: .

your finite state machine is wrong

You cut me to the quick, Sir! Hmph, it did exactly what I told it to do. :slight_smile: My assumption was that the trailer chars cannot appear in the message. That is probably a bad assumption, as data errors can introduce false trailer chars.

Here’s a program with a slightly different structure (if statements), and a little debug info:

//  Example to show how to use a Finite-State Machine (FSM) 
//    to watch for a specific byte sequence as a message trailer.

enum state_t { ACCUM_STATE, BB_STATE, CC_STATE }; // list the states
static state_t state = ACCUM_STATE;  // the FSM "state" variable

static const uint8_t _AA = 'A'; // replace with 0xAA or whatever (almost)
static const uint8_t _BB = 'B'; //
static const uint8_t _CC = 'C'; //

static char buffer[32];
static uint8_t buffer_index = 0;

// Helper function to save a char, if there's room.
static void saveChar( char c )
{
  if (buffer_index < sizeof(buffer))
    buffer[ buffer_index ] = c;
  buffer_index++;
}

void setup()
{
    Serial.begin(9600);     // opens serial port, sets data rate to 9600 bps
    Serial.println( F("FSM example started.") );
}

void loop()
{
  if (Serial.available() > 0 ) {

    // read the incoming byte:

    uint8_t c = Serial.read();
    saveChar( c );  // always saved

    // Print out a little debug information so you can see how
    //   the FSM works.

    Serial.print( state );
    Serial.print( ' ' );
    if ((' ' < c) && (c <= '~')) {
      Serial.print( '\'' );
      Serial.print( (char) c );
      Serial.print( '\'' );
    } else
      Serial.print( c );
    Serial.print( F(" -> ") );

    bool trailerReceived = false;

    // The FSM first checks to see if the trailer might be starting.
    //   If not, it handles the char according to the current state.

    if (c == _AA) {
      // Always restart
      state = BB_STATE;

    } else if (state == BB_STATE) {
  
      // We got the 0xAA, did we get the 0xBB?
      if (c == _BB)
        state = CC_STATE; // Yes!
      else
        state = ACCUM_STATE;  // No, go back to accumulating
      
    } else if (state == CC_STATE) {

      // We got the 0xAA and 0xBB, did we get the 0xCC?
      if (c == _CC)
        trailerReceived = true; // Yes!
      state = ACCUM_STATE;  // Go back to accumulating
    }

    Serial.println( state ); // end of debug info

    if (trailerReceived) {

      // Do whatever you need to with it *here*.  I'll print a few things.
     
      Serial.print( buffer_index );
      Serial.println( F(" bytes received!") );

      buffer_index -= 3; // Trim trailer that was saved
      for (uint8_t i=0; (i < buffer_index) && (i < sizeof(buffer)); i++)
        Serial.print( buffer[i] );
      Serial.println();

      // You *could* NUL-terminate it to make it a C string
      if (buffer_index >= sizeof(buffer)) // safely!
        buffer_index = sizeof(buffer)-1;
      buffer[ buffer_index ] = '\0';
      
      // Empty the buffer for the next message...
      buffer_index = 0;
    }
  }
}

Picking your message framing requires serious thought. What if the message can contain the trailer? Is it escaped in some way? Can the trailer contain subsets of itself? For example, 0xAA 0xBB 0xAA would require a different FSM.

Good catch, and a good starting point for further discussion for the curious.

Cheers,
/dev