Fast Software Serial (1-1M baud)

This was just a mad experiment that I've probably spent too much time on, but the loop-back test results were encouraging.

Objective:
To demonstrate a fast and error free method of software serial communication.

Problem:
Existing methods may accumulate timing errors as each bit is sent/received because bit timing is based only on a baud rate calculation. The source of these errors would include calculation precision, jitter, latency and baud rate difference between devices. Accumulating errors will limit the useful bandwidth that may already available.

Possible Solution:
If the start time (leading edge) of each bit to transmit is referenced to a fast time reference counter, precise bit by bit synchronization of the byte's transmission would eliminate cumulative errors and only leave jitter or latency to contend with.

Preliminary Specification:

  • Sync reference is TC counter (32 bit) counting at 42MHz (23.81 ns per count).
  • Bit by bit synchronization to eliminate accumulating errors.
  • Bit by bit send / receive similar to SPI but without clock or chip select.
  • Calculations are performed prior to transmission.
  • Use digitalWriteDirect digitalReadDirect functions for high speed.
  • Serial Monitor shows test table of synchronization results.

Loopback Test Results:
1-1M range tested OK, sync drift at 2M but RX still OK.

To Do:
Need to connect to a second Due board, have it manipulate the byte and send the previous modified byte back immediately. Much left to do but advanced C programming is beyond my experience level. I do believe at this point that a final solution with 500K baud can be achieved while not being sensitive to interrupt usage by other libraries.

Notes:

  • SysTick counter for reference worked, but overhead of tracking reloads every ms and calculating totals limited maximum bandwidth to about 230K baud. Also, its 24-bit width limited the low range to about 9600 baud.
  • The 32 bit timer/counter can easily be cleared and restarted and provides much greater bandwidth and stability due to direct reads (no calculations required during transmission).
  • The TX output can be used as a programmable pulse generator or pattern generator.

The Complete Code:

/* Fast Software Serial (F2S) 1-1M baud
   Time-base method for eliminating cumulative errors, where
   each byte is synchronized to the system's clock 20 times.
   Baud rate range 1 - 1,000,000. (1 baud step resolution).
   Bit synchronization:  TX __????????|__ RX __|???????|__
   Install jumper from pin 8 (TX) to pin 9 (RX) for loopback.
   by dlloyd*/

//Main Settings
double MckHz = 84000000;
double baudRate = 1000000; // 1-1M range, sync drift at 2M but RX still OK.
byte byteValue = B10101010; // Note: 1 byte at 1 baud takes 10 sec.
unsigned int txBytes = 200; // Number of bytes to transfer

//variables
byte byterx = 0;
byte bitshifttx[8];
const byte tx = 9, rx = 8;  //TX on pin 9, RX on pin 8
double countPerBit, nsPerCount, nsPerBit;
int sync[21], actual[21], count[21];
char* label[] = {"^bit0   ", "bi^t0  ", "^bit1  ", "bi^t1  ", "^bit2  ", "bi^t2  ",
"^bit3  ", "bi^t3  ", "^bit4  ", "bi^t4  ", "^bit5  ", "bi^t5  ", "^bit6  ", "bi^t6  ",
"^bit7  ", "bi^t7  ", "^bit8  ", "bi^t8  ", "^bit9  ", "bi^t9  ", "^bit10 "};

void setup() {
  Serial.begin(115200);
  pinMode(tx, OUTPUT); //TX
  digitalWriteDirect(tx, HIGH);
  pinMode(rx, INPUT_PULLUP); //RX

  // configure clock --------------------------------------------------------------------------
  REG_PMC_PCER0 = REG_PMC_PCER0 | 0x80000000; //Peripheral Clock Enable Register 1 (activate clock for TC4, id31, bit31` of PMC_PCSR0)
  TC_Configure(/* clock */TC1,/* channel */1, TC_CMR_WAVE | TC_CMR_WAVSEL_UP_RC | TC_CMR_TCCLKS_TIMER_CLOCK1); //TIMER_CLOCK1 = MCK/2
  TC_SetRC(TC1, 1, 0);
  TC_Start(TC1, 1);
}

void loop() {

  TcChannel * t = &(TC1->TC_CHANNEL)[1] ;    // pointer to TC1, channel 1 registers
  t->TC_CCR = TC_CCR_SWTRG;  // reset counter and start the clock

  nsPerCount = 1000000000 / (MckHz / 2);
  nsPerBit = 1000000000 / baudRate;
  countPerBit = nsPerBit / nsPerCount;

// initialize table for serial monitor
  int i = 0;
  for (i = 0; i < 21; i++) {
    sync[i] = i * countPerBit / 2;
    actual[i] = 0;
    count[i] = 0;
  }

// initialize bits to send
 bitshifttx[0] = byteValue & B10000000;
  for (i = 1; i < 8; i++) {
 bitshifttx[i] = (byteValue <<= 1) & B10000000;
  }

  Serial.print("MCK MHz:     ");
  Serial.println( MckHz / 1000000 );
  Serial.print("Baud Rate:   ");
  Serial.println( baudRate, 0 );
  Serial.print("nsPerCount:  ");
  Serial.println( nsPerCount, 4 );
  Serial.print("nsPerBit:    ");
  Serial.println( nsPerBit, 4 );
  Serial.print("countPerBit: ");
  Serial.println(countPerBit, 4);
  Serial.println();

  //======================= txrx byte ============================
  int j = 0; i = 0;
  for (j = 0; j < txBytes; j++) { //bytes to transmit
    t->TC_CCR = TC_CCR_SWTRG; // reset and start counter
    //-------------------------------
    count[0] = REG_TC1_CV1;
    digitalWriteDirect(tx, LOW); // ^bit0 (start bit)
    //-------------------------------
    do { //sync bi^t0
      count[1] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[1] );
    //-------------------------------
    do { //sync ^bit1
      count[2] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[2] );
    digitalWriteDirect(tx, bitshifttx[0]);
    //-------------------------------
    do { //sync bi^t1
      count[3] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[3] );
    byterx  = (byterx <<= 1) | digitalReadDirect(rx);
    //-------------------------------
    do { //sync ^bit2
      count[4] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[4] );
    digitalWriteDirect(tx, bitshifttx[1]);
    //-------------------------------
    do { //sync bi^t2
      count[5] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[5] );
    byterx  = (byterx <<= 1) | digitalReadDirect(rx);
    //-------------------------------
    do { //sync ^bit3
      count[6] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[6] );
    digitalWriteDirect(tx, bitshifttx[2]);
    //-------------------------------
    do { //sync bi^t3
      count[7] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[7] );
    byterx  = (byterx <<= 1) | digitalReadDirect(rx);
    //-------------------------------
    do { //sync ^bit4
      count[8] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[8] );
    digitalWriteDirect(tx, bitshifttx[3]);
    //-------------------------------
    do { //sync bi^t4
      count[9] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[9] );
    byterx  = (byterx <<= 1) | digitalReadDirect(rx);
    //-------------------------------
    do { //sync ^bit5
      count[10] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[10] );
    digitalWriteDirect(tx, bitshifttx[4]);
    //-------------------------------
    do { //sync bi^t5
      count[11] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[11] );
    byterx  = (byterx <<= 1) | digitalReadDirect(rx);
    //-------------------------------
    do { //sync ^bit6
      count[12] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[12] );
    digitalWriteDirect(tx, bitshifttx[5]);
    //-------------------------------
    do { //sync bi^t6
      count[13] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[13] );
    byterx  = (byterx <<= 1) | digitalReadDirect(rx);
    //-------------------------------
    do { //sync ^bit7
      count[14] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[14] );
    digitalWriteDirect(tx, bitshifttx[6]);
    //-------------------------------
    do { //sync bi^t7
      count[15] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[15] );
    byterx  = (byterx <<= 1) | digitalReadDirect(rx);
    //-------------------------------
    do { //sync ^bit8
      count[16] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[16] );
    digitalWriteDirect(tx, bitshifttx[7]);
    //-------------------------------
    do { //sync bi^t8
      count[17] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[17] );
    byterx  = (byterx <<= 1) | digitalReadDirect(rx);
    //-------------------------------
    do { //sync ^bit9 (stop bit)
      count[18] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[18] );
    digitalWriteDirect(tx, HIGH);
    //-------------------------------
    do { //sync bi^t9 (stop bit)
      count[19] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[19] );
    //-------------------------------
    do { //sync ^bit10 (sync for next start)
      count[20] = REG_TC1_CV1;
    } while ( REG_TC1_CV1 < count[0] + sync[20] );
    //-------------------------------

  // inter-byte time delay (for testing)
    delayMicroseconds(100);
  }

  Serial.println("Label   Sync   Count");
  for (i = 0; i < 21; i++) {
    Serial.print(label[i]);
    Serial.print(sync[i]);
    Serial.print("   ");
    Serial.println(count[i]);
  }
    Serial.println();
    Serial.print("Byte RX: ");
    Serial.println(byterx, BIN);

  delay(10000000);
}
//functions---------------------------------------------

//digitalWriteDirect
inline void digitalWriteDirect(int pin, boolean val) {
  if (val) g_APinDescription[pin].pPort -> PIO_SODR = g_APinDescription[pin].ulPin;
  else    g_APinDescription[pin].pPort -> PIO_CODR = g_APinDescription[pin].ulPin;
}

//digitalReadDirect
inline int digitalReadDirect(int pin) {
  return !!(g_APinDescription[pin].pPort -> PIO_PDSR & g_APinDescription[pin].ulPin);
}

Interesting. Thanks