FastShiftOut with Print interface (experimental)

Find below an experimental class that implements a FastShiftOut Class which is derived from the Print class.
This means that all print and println interfaces are available to FSO, including printing of strings, floats and ints.
The class does not implement the latch signal (e.g. to be used for one or more 74HC595)

The class is not tested so use at your own risk :wink:

as always comments and remarks are welcome.

FastShiftOut.h

//
// 聽 聽FILE: FastShiftOut.h
// 聽AUTHOR: Rob Tillaart
// VERSION: 0.1.03
// PURPOSE: shiftout that implements the Print interface
// 聽 聽 URL:
//
// Released to the public domain
//

#ifndef FastShiftOut_h
#define FastShiftOut_h

#if defined(ARDUINO) && ARDUINO >= 100
 聽#include "Arduino.h"
#else
 聽#include "WProgram.h"
#endif

#define FASTSHIFTOUT_LIB_VERSION "0.1.03"

#include "Print.h"

class FastShiftOut : public Print
{
 聽public:
	FastShiftOut(uint8_t, uint8_t, uint8_t);
 聽 聽size_t write(uint8_t);
 聽 聽int read(void);

 聽private:
 聽 聽uint8_t _bitorder;
 聽 聽int _value;
 聽 聽
	uint8_t _databit;
 聽 聽volatile uint8_t *_dataout;
 聽 聽
	uint8_t _clockbit;
 聽 聽volatile uint8_t *_clockout;
};

#endif
// -- END OF FILE --

FastShiftOut.cpp

//
// 聽 聽FILE: FastShiftOut.cpp
// 聽AUTHOR: Rob Tillaart
// VERSION: 0.1.03
// PURPOSE: shiftout that implements the Print interface
// 聽 聽 URL:
//
// Released to the public domain
//

#include "FastShiftOut.h"

//
// Constructor
// prepares the digitalWrite()
FastShiftOut::FastShiftOut(uint8_t datapin, uint8_t clockpin, uint8_t bitOrder)
{
 聽 聽_bitorder = bitOrder;
 聽 聽_value = -1;
 聽 聽pinMode(datapin, OUTPUT);
 聽 聽pinMode(clockpin, OUTPUT);
 聽 聽
 聽 聽// uint8_t _datatimer 聽= digitalPinToTimer(datapin);
 聽 聽// if (_datatimer != NOT_ON_TIMER) turnOffPWM(_datatimer); TODO
	uint8_t _dataport 聽 = digitalPinToPort(datapin);
 聽 聽_dataout = portOutputRegister(_dataport);
	_databit = digitalPinToBitMask(datapin);
 聽 聽
 聽 聽// uint8_t _clocktimer = digitalPinToTimer(clockpin);
 聽 聽// if (_clocktimer != NOT_ON_TIMER) turnOffPWM(_clocktimer);
	uint8_t _clockport 聽= digitalPinToPort(clockpin);
 聽 聽_clockout = portOutputRegister(_clockport);
	_clockbit 聽 = digitalPinToBitMask(clockpin);
}

//
// write() must implement the virtual write of Print class
//
size_t FastShiftOut::write(uint8_t data)
{
 聽 聽_value = data;
 聽 聽for (uint8_t i = 0; i < 8; i++) 聽
 聽 聽{
 聽 聽 聽 聽uint8_t v;
 聽 聽 聽 聽if (_bitorder == LSBFIRST) 聽 v = !!(_value & (1 << i));
 聽 聽 聽 聽else 聽 聽 聽 聽 聽 聽 聽 聽 聽 聽 聽 聽v = !!(_value & (1 << (7 - i)));
 聽 聽 聽 聽
 聽 聽 聽 聽uint8_t oldSREG = SREG;
 聽 聽 聽 聽cli();
 聽 聽 聽 聽if (v == LOW) 聽*_dataout &= ~_databit;
 聽 聽 聽 聽else 聽 聽 聽 聽 聽 *_dataout |= _databit;
 聽 聽 聽 聽*_clockout |= _clockbit;
 聽 聽 聽 聽*_clockout &= ~_clockbit;
 聽 聽 聽 聽SREG = oldSREG;
 聽 聽}
 聽 聽return 1;
}

//
// reads back the last value written.
//
int FastShiftOut::read()
{
 聽 聽return _value;
}
// -- END OF FILE --

FSO001.ino, tests some performance compared to standard shiftOut()

#include "FastShiftOut.h"

FastShiftOut FSO(12, 13, LSBFIRST);

void setup()
{
 聽Serial.begin(115200);
 聽Serial.println("Start FSO");

 聽unsigned long start = millis();
 聽for (int i=0; i<10000; i++)
 聽{
 聽 聽FSO.write(0x55);
 聽}
 聽Serial.println((millis() - start)/10.0, 4);

 聽start = millis();
 聽for (int i=0; i<10000; i++)
 聽{
 聽 聽FSO.write(0x55);
 聽 聽FSO.write(0x55);
 聽}
 聽Serial.println((millis() - start)/10.0, 4);

 聽start = millis();
 聽for (int i=0; i<10000; i++)
 聽{
 聽 聽shiftOut(12, 13, 0x55, LSBFIRST);
 聽}
 聽Serial.println((millis() - start)/10.0, 4);

 聽start = millis();
 聽for (int i=0; i<10000; i++)
 聽{
 聽 聽shiftOut(12, 13, 0x55, LSBFIRST);
 聽 聽shiftOut(12, 13, 0x55, LSBFIRST);
 聽}
 聽Serial.println((millis() - start)/10.0, 4);

 聽Serial.println("test print interface");
 聽start = millis();
 聽for (int i=0; i<10000; i++)
 聽{
 聽 聽FSO.println("Hello world");
 聽}
 聽Serial.println((millis() - start)/10.0, 4);

 聽start = millis();
 聽for (int i=0; i<10000; i++)
 聽{
 聽 聽FSO.println(1357);
 聽}
 聽Serial.println((millis() - start)/10.0, 4);

 聽start = millis();
 聽for (int i=0; i<10000; i++)
 聽{
 聽 聽FSO.println(3.14159265, 4);
 聽}
 聽Serial.println((millis() - start)/10.0, 4);

 聽Serial.println("done");
}

void loop()
{
}

output:
Start FSO
61.3000
122.4000
114.8000
229.5000
test print interface
812.8000
403.8000
572.5000
done

The FSO Class is almost 2x faster than normal shiftOut() [times in uSec]
Times for strings, ints and floats are approx. as expected. (add newline chars too)

update:
latest version check - Arduino/libraries at master 路 RobTillaart/Arduino 路 GitHub -

Note: the timings found above might be hard to reproduce as my print.cpp and print.h contains several speed ups (including experimental).
These are discussed in this thread - divmod10() : a fast replacement for /10 and %10 (unsigned) - Libraries - Arduino Forum -

speed optimized version of the write, ~twice as fast, but it disables interrupts for the whole byte; and its larger ..
(replace in lib above)

// approx 32 us / byte
size_t FastShiftOut::write(uint8_t data)
{
聽 聽 _value = data;
聽 聽 // prep masks
聽 聽 uint8_t dm1 = *_dataout | _databit;
聽 聽 uint8_t dm0 = *_dataout & ~_databit;
聽 聽 uint8_t cm1 = *_clockout | _clockbit;
聽 聽 uint8_t cm0 = *_clockout & ~_clockbit;
聽 聽 
聽 聽 uint8_t oldSREG = SREG;
聽 聽 cli();
聽 聽 if (_bitorder == LSBFIRST)
聽 聽 {
聽 聽 聽 聽 for (uint8_t m = 0x01; m != 0x80; m <<= 1)聽 
聽 聽 聽 聽 {
聽 聽 聽 聽 聽 聽 if (_value & m) *_dataout = dm1;
聽 聽 聽 聽 聽 聽 else聽 聽 聽 聽 聽  *_dataout = dm0;聽 
聽 聽 聽 聽 聽 聽 *_clockout = cm1;
聽 聽 聽 聽 聽 聽 *_clockout = cm0;
聽 聽 聽 聽 }
聽 聽 }
聽 聽 else
聽 聽 {
聽 聽 聽 聽 for (uint8_t m = 0x80; m > 0; m >>= 1)聽 
聽 聽 聽 聽 {
聽 聽 聽 聽 聽 聽 if (_value & m) *_dataout = dm1;
聽 聽 聽 聽 聽 聽 else聽 聽 聽 聽 聽  *_dataout = dm0;聽 
聽 聽 聽 聽 聽 聽 *_clockout = cm1;
聽 聽 聽 聽 聽 聽 *_clockout = cm0;
聽 聽 聽 聽 }聽 聽 
聽 聽 }
聽 聽 SREG = oldSREG;
聽 聽 return 1;
}

use at own risk :wink:

Rob,
Thanks for this new approach in Shiftout. I shall check how can i utilize it in my Graphical waterfall project.
Regards

latest version can be found here - Arduino/libraries at master 路 RobTillaart/Arduino 路 GitHub -

Thank you for the generosity and sharing:) I shall come back and report..
Regards

latest version 0.1.5 can be found here - Arduino/libraries at master 路 RobTillaart/Arduino 路 GitHub -

major improvement in performance by more efficient masking of bits.
Time dropped from 42.72 usec to 28.83 usec per byte.

tests done with IDE 1.8.1

example fastShiftOut: 0.1.5

Performance - time in us
FastShiftOut1: 29.76
FastShiftOut2: 58.59
聽 聽 聽 聽 Delta: 28.83

Standard shiftOut1: 90.80
Standard shiftOut2: 181.00
聽 聽 聽 聽 聽 聽  Delta: 90.20


Test print interface
println("Hello world"): 417.08
println(1357): 353.00
println(3.14159265, 4): 766.28

done ...

======================================

example fastShiftOut: 0.1.04

Performance - time in us
FastShiftOut1: 43.66
FastShiftOut2: 86.38
聽 聽 聽 聽 Delta: 42.72

Standard shiftOut1: 90.79
Standard shiftOut2: 181.00
聽 聽 聽 聽 聽 聽  Delta: 90.21


Test print interface
println("Hello world"): 597.68
println(1357): 436.44
println(3.14159265, 4): 877.48

done ...

Hi, Thanks for your code.

I am trying to run 4 independent LED Matrix displays from 1 arduino. it's actually working with regular shiftout but I have quite a flicker in the characters. I was hoping to use your library to speed up the writes to the displays, however , I am unsure how to call it more than once, I seem to get a redeclaration error ?

Your help is much appreciated

This topic was automatically closed 120 days after the last reply. New replies are no longer allowed.