Byte concatenation adds erroneous bits

Working on a project involving extracting bytes from the Serial buffer from the Arduino Uno. My old version worked well, but after 1.0.1 was released, not only did it stop working correctly, but using older Arduino IDE builds failed to work also.

Rewriting the program, I’ve found out that shifting bits adds other bits that I don’t want. For example, incoming bytes may look like this:

3
AC
FE
BD
F7
B9

and the goal is to concatenate them like this (without commas): 3AC, FEBD, F7B9

but what I end up with is this (without commas): 3AC, FFFFFEBD, FFFFF7B9.

//AUTHOR: Jaz S;

#include <SoftwareSerial.h>

#define IMU_Tx 3
#define IMU_Rx 2
#define BAUD 57600
#define Kc 1 /*some shit*/
#define tR 1 /*some shit*/
#define REG_EULER_PHI_THETA 0X62
#define REG_EULER_PSI 0x63
#define ANGLE_SCALE_FACTOR 0.0109863     // Convert register data to Degrees

/*PT byte we're sending out... */
#define PT_HAS_DATA 0x80  //10000000
#define PT_IS_BATCH 0x40  //01000000
//#define PT_BATCH_LEN 0x10  //Batch length = 4
#define PT_BATCH_LEN 0x08  //Batch length = 2

SoftwareSerial UM6Data(IMU_Rx,IMU_Tx); //pin2 is Rx; pin3 is Tx

struct IMU{
  int in;
}phi, theta, psi; //Roll, pitch and yaw, respectively.

void setup(){
  UM6Data.begin(BAUD);  //Sets baud rate for communication with IMU.
  Serial.begin(BAUD);   //Sets baud rate for communication with XBee.
}

void loop(){
poll_UM6();
delay(200);
read_UM6();
delay(1000);
}//end VOID

void poll_UM6(){  //This void function makes a request for a packet from the IMU
  Serial.print("\nPolling the UM6...\n");
  byte chksum0 = 0, chksum1 = 0;
  unsigned int chksum = 0; 
  chksum = 's' + 'n' + 'p' + (PT_IS_BATCH | PT_BATCH_LEN) + REG_EULER_PHI_THETA; 
  chksum1 = chksum >> 8;
  chksum0 = chksum & 0xFF;
  UM6Data.write('s');
  UM6Data.write('n');
  UM6Data.write('p');
  UM6Data.write(PT_IS_BATCH | PT_BATCH_LEN);
  UM6Data.write(REG_EULER_PHI_THETA);
  UM6Data.write(chksum1);
  UM6Data.write(chksum0);
}  //end poll_UM6

// Incoming serial data is received by and stored in the serial receive buffer, which holds 128 bytes.
void read_UM6(){
  byte i = 0;
  byte data = 0;
  byte blank = 0, temp = 0;
  byte chksum1 = 0, chksum0 = 0;
  byte reg[9] = {0};
  unsigned int chksum = 0;
  //If there's data in the serial temp register and we haven't finished retrieving data...
  if ((UM6Data.available() > 0)){
    data = UM6Data.read();
    if ((data == 's')){
      Serial.print("i = "); Serial.print(i,DEC); Serial.println();
      Serial.print("'s'\n");
      reg[i] = data; 
      Serial.print(reg[i],HEX); Serial.println();
      i++;
      data = UM6Data.read();
      if ((data == 'n')){
        Serial.print("i = "); Serial.print(i,DEC); Serial.println();
        Serial.print("'n'\n");
        reg[i] = data;
        Serial.print(reg[i],HEX); Serial.println();
        i++;
        data = UM6Data.read();
        if ((data == 'p')){
          Serial.print("i = "); Serial.print(i,DEC); Serial.println();
          Serial.print("'p'\n");
          reg[i] = data;
          Serial.print(reg[i],HEX); Serial.println();
          i++;
          data = UM6Data.read();
          if (data == (PT_HAS_DATA | PT_IS_BATCH | PT_BATCH_LEN)){
            Serial.print("i = "); Serial.print(i,DEC); Serial.println();
            reg[i] = data;
            Serial.print(reg[i],HEX); Serial.println();
            i++;
            data = UM6Data.read();
            if (data == REG_EULER_PHI_THETA){
            Serial.print("i = "); Serial.print(i,DEC); Serial.println();
            reg[i] = data; 
            Serial.print(reg[i],HEX); Serial.println();
            i++;
              while (i < 15){
                Serial.print("i = "); Serial.print(i,DEC); Serial.println();
                data = UM6Data.read();
                reg[i] = data;
                Serial.print(reg[i],HEX); Serial.println(); 
                i++;
              }
              
              phi.in = ((reg[5] << 8) | reg[6]);
              theta.in = ((reg[7] << 8) | reg[8]);
              psi.in = ((reg[9] << 8) | reg[10]);
              
              Serial.print(phi.in,HEX); Serial.println();
              Serial.print(theta.in,HEX); Serial.println();
              Serial.print(psi.in,HEX); Serial.print("\n\n");
              
              return;
            }
              else {
                FLUSH:
                Serial.print("Flushing...\n");
                while ((UM6Data.available() > 0)){
                blank = UM6Data.read();  
                }
                return;
              }
            }
            else {
             Serial.print(data,HEX);
             goto FLUSH;
            }
          }
          else {
           Serial.print(data,HEX);
           goto FLUSH; 
          }
        }
        else {
         Serial.print(data,HEX);
         goto FLUSH; 
        }
      }
      else {
       Serial.print(data,HEX);
       goto FLUSH; 
      }
    }
    else if (UM6Data.available() == 0){  //No data left; all relevant data acquired and ready for processing.
    return;
  }  //end else
    else {
     Serial.print(data,HEX);
     goto FLUSH; 
    }
  }//end read_UM6

The trouble-making portion of the code is as follows:

phi.in = ((reg[5] << 8) | reg[6]);
theta.in = ((reg[7] << 8) | reg[8]);
psi.in = ((reg[9] << 8) | reg[10]);

What can I do to avoid this problem? I’ve tried everything I can think of.

Looks like a signed vs. unsigned problem.

            Serial.print(reg[i],HEX); Serial.println();

As opposed to the simpler

            Serial.println(reg[i],HEX);

?

struct IMU{
  int in;
}phi, theta, psi; //Roll, pitch and yaw, respectively.

A structure containing one value? Why?

You test that there is at least one byte available to read, and then proceed to read more than one byte. Bad idea.

This code is almost as painful to read as it is for you to write!

byte reg[9] = {0};

... further down ...

psi.in = ((reg[9] << 8) | reg[10]);

‘reg[9]’ is out of bounds let alone ‘reg[10]’

We can see that the serial protocol consists of a header '‘snp’ followed by 3 integers (of unspecified range) and unknown termination (perhaps another “snp”).

Document the protocol …

Slappy:
What can I do to avoid this problem? I’ve tried everything I can think of.

The problem is a consequence of sign extension. When you have a signed variable and cast it to a larger word size (e.g. byte to int, in to long) the most significant bit of the original variable is replicated into the ‘new’ bits. Usually this is a good thing - if you start with a byte containing ‘-1’ (0xFF) and convert to an int you get ‘-1’ (0xFFFF). But in this case you don’t want that. Store your incoming bytes as unsigned values to avoid that.

It looks as if you have a large number of other bugs to fix before your sketch will work correctly, but hopefully you will not have any trouble fixing this particular problem.

PeterH:
The problem is a consequence of sign extension. When you have a signed variable and cast it to a larger word size (e.g. byte to int, in to long) the most significant bit of the original variable is replicated into the ‘new’ bits. Usually this is a good thing - if you start with a byte containing ‘-1’ (0xFF) and convert to an int you get ‘-1’ (0xFFFF). But in this case you don’t want that. Store your incoming bytes as unsigned values to avoid that.

Oh dear God I knew it was something stupid like that… to be honest I thought I had already tried that but I guess I didn’t.

lloyddean:
This code is almost as painful to read as it is for you to write!

byte reg[9] = {0};

… further down …

psi.in = ((reg[9] << 8) | reg[10]);



'reg[9]' is out of bounds let alone 'reg[10]'. We can see that the serial protocol consists of a header ''snp' followed by 3 integers (of unspecified range) and unknown termination (perhaps another "snp"). Document the protocol ...

Sorry for the painful code, a lot of it was there for me to debug through the console, I forgot I left it in there before posting it.

I should have specified: the serial protocol ‘snp’ stands for ‘start new packet’ the next byte is defines the type of information to follow, the byte after is the address of the first register the data is from.

Yes, this code is a hideous rewrite of the previous version, but now that I know what went wrong, I can go back to using the old one with the necessary changes. Thanks everyone!