SSD1306 HW SPI which is the fastest library?

Hey folks, I am trying to make a smooth and fast VU meter with Arduino Nano 328PB and SSD1306 128x64 v2.4" SPI display from Aliexpress. I am using one Nano per channel to maximize the speed of graphics. I am just wondering if there is anything faster outhere than HW SPI with Adafruit SSD1306 library, as my needle is smoothly animated between positions change (like in real VU meter) instead of simply jumping into new position. I am also not using any bitmap draw, all drawing is done with rectangle and line functions. To get a good response, I have to draw every other pixel; if I draw every pixel between position changes, VU meter is very smooth but lagging behind.

For atmega328, probably not. You might want to try the U8G2 library. If you do, make sure to use the "full size buffer" option, otherwise it will definitely be slower. But I imagine both libraries will be similar speed.

One Nano for both channels should be enough, if you optimise the code well, I think.

If you post your code, I'm sure forum members can find ways to optimise the speed.

Exactly like a real mechanical VU meter!

Here's the code:

#define DEBUG
#include <SPI.h>
#include <Adafruit_GFX.h>
#include <Adafruit_SSD1306.h>
#define OLED_DC     6
#define OLED_CS     7
#define OLED_RESET  8
Adafruit_SSD1306 oled(128, 64, &SPI, OLED_DC, OLED_RESET, OLED_CS);



#ifdef DEBUG
#define PRINT(x) Serial.print(x)     // console logging
#define PRINTLN(x) Serial.println(x)
#else
#define PRINT(x)
#define PRINTLN(x)
#endif

#define ANALOG_INPUT A0                 // analog input for outside audio source
int hMeter = 64;                      // horizontal center for needle animation
int vMeter = 85;                      // vertical center for needle animation (outside of display limits)
int rMeter = 85;                      // length of needle animation or arch of needle travel
float meterValue = 0;
float meterPos = 0;
int peak;
int peakPos;
int oldPeak;
int peakHoldPos;
int dB = 0;
unsigned long peakMillis;
unsigned long peakHoldMillis;
unsigned int inputValue;
unsigned int inputValueMax = 0;
bool peakFalling;
byte resolution = 1;          // 2 - higher resolution, but slower; 1 faster, lower resolution
byte range = 1;               // 2 - more sensitivity, 1 - normal range


void drawDisplay() {
  oled.clearDisplay();                                 
  int a1 = (hMeter + (sin(meterPos / 30 / resolution) * rMeter));    // meter needle horizontal coordinate
  int a2 = (vMeter - (cos(meterPos / 30 / resolution) * rMeter));    // meter needle vertical coordinate
  oled.drawLine(a1, a2, hMeter, vMeter, WHITE);         // draws needle
  oled.setTextSize(1);
  oled.setCursor(0, 0); oled.print("-20");
  oled.setCursor(115, 0); oled.print("+5");
  oled.setCursor(95, 0); oled.print("0");
  oled.setCursor(55, 0); oled.print("-8");
  oled.drawRect(0, 10, 128, 6, WHITE);
  oled.drawRect(98, 11, 128, 4, WHITE);    // red area rectangle
  //oled.drawLine(98, 9, 128, 9, WHITE);   // red area line
  //oled.fillRect(100, 10, 28, 4, WHITE);
  oled.setTextSize(1);     
  oled.setTextColor(SSD1306_WHITE); // Draw white text
  oled.setCursor(0, 55); if (dB > -1)oled.print(" "); oled.print(dB);
  oled.setCursor(20, 55); oled.print("dB");
  oled.setCursor(60, 40); oled.print("VU");
  oled.fillRect(0, 11, peakPos * 2 /resolution, 4, WHITE);
  oled.fillRect(peakHoldPos * 2 / resolution, 11, 4, 4, WHITE);
  oled.display();
}

void setup() {
#ifdef DEBUG
  Serial.begin(115200);
#endif
  pinMode(ANALOG_INPUT, INPUT); 
  oled.begin(SSD1306_SWITCHCAPVCC);                
  oled.clearDisplay();                                
  peakMillis = millis();
}

void loop() {
  if (peakFalling && peak < peakHoldPos) {
    if (millis() - peakHoldMillis > 100) {
      peakHoldPos--;
      peakHoldMillis = millis();
    }
  }
// measure input voltage
  inputValueMax = 0;
  for (int i = 0; i < 200/resolution; i++) {
    inputValue = analogRead(ANALOG_INPUT);
    if (inputValue > inputValueMax) inputValueMax = inputValue;
  }
  peak = inputValueMax * 64 * resolution / (1024 / range);
  if (peakHoldPos < peakPos) {
    peakHoldPos = peakPos;
    peakMillis = millis();
    dB = 15 * log(peakHoldPos/resolution) - 58;
    peakFalling = 0;
  }
  else if (millis() - 2000 > peakMillis) {
    peakMillis = millis();
    peakFalling = 1;

  } 
  meterValue = peak - 32*resolution;
  while ( meterPos < meterValue) {
    meterPos++;
    peakPos++;
    drawDisplay();
  }
  while (meterPos > meterValue) {
    meterPos--;
    peakPos--;
    drawDisplay();
  }
  peakPos = peak;
  PRINT("Signal="); PRINT(inputValueMax); PRINT(" Peak="); PRINT(peak); PRINT(" meter="); PRINTLN(meterValue);
}

I quick scan over your code and I suspect I can see a few ways to optimise it.

I'll try it out first.

What circuit are you using to sample the audio signal?

Ok, I have your code working. Looks good!

I used a pot to control the level. I can see that it doesn't keep up with the pot of I move it quickly.

Use "resolution" variable to smoothen it out. At resolution = 1, it's fast enough. At resolution = 2, it gets too slow.

There is no sampling circuit, just a diode in series to to cut off the negative voltage. I am planning to use an unity gain opamp to better isolate A0 from the signal source. There is too much noise at A0 pin currently in my dirty setup.

You might want to think about using an envelope-follower circuit. This will smooth the audio signal into a "sound volume level". With this circuit, the Arduino's analog input will not need to sample the signal hundreds of times to find the maximum value, hopefully a single sample will be good enough.

When optimising any code, the first thing to do is measure it, so that each time you make a change, you can quickly see if the change has made any improvement or not. In your case you are optimising for speed/frame rate. I would suggest using millis() or micros() to time 100 or 1000 frames and print this to serial monitor.

The next thing to do is measure some of the individual or groups of functions that are needed to draw each frame. This is important to help understand which functions or groups are taking the longest time and making drawing the frame slow. No point wasting time optimising functions that are already performing well. Any improvements you make to those won't have much effect overall.

I noticed that drawing each frame is done by clearing the entire frame and re-drawing each element, even though many of them don't change from frame to frame.

One possible way to speed up the frame rate would be to find a way to avoid redrawing all those components that don't change.

One way to achieve that would be to draw all the fixed elements once, and save that somehow, then re-use the saved image when drawing each frame. Problem with this is the very limited dynamic (RAM) memory on the atmega328. But it might be possible to use program (Flash) memory for that.

Another way to avoid re-drawing elements that don't change is to find a way to "undraw" and redraw the changing elements in a way that doesn't destroy the elements that don't change. I don't think the Adafruit library has any method to do that, but the U8G2 library does (XOR drawing mode).

The speed of the analogRead() function can be increased by dropping the resolution down from 10 bits to 8 on atmega328. I think 8 bits would be good enough for your project, so maybe consider that.

Floating point maths, especially functions like sin(), cos() and log(), are very slow on atmega328 because it is all done in software.

It might be a good idea to pre-calculate the a1 & a2 values and store them in an array which can be stored in Flash memory using the PROGMEM attribute.

@PaulRB posted good ideas.

my 2 cents

  • replace float divisions in your math as much as possible, replacing them with multiplication if possible e.g x/10 == x * 0.1
  • cache intermediate values where possible.
  • have a look at GitHub - RobTillaart/FastTrig: Arduino library with interpolated lookup for sin() and cos() for faster sin() and cos() functions.
  • I do not know if the ADAfruit library can draw a line in XOR mode, then it would be easy to remove it, just by drawing it again.
  • with respect to the redraw of the full screen, divide the screen in two halves. Ans as you know that the line is either left or right, you only need to redraw half of it.
  • check if the line has actually changed and need to be redrawn

The Adafruit library has a function that returns a pointer to the display buffer. The difficulty is that you need to save the buffer into program memory. What I've done is print out the buffer contents to the serial monitor in a format suitable for inserting into the sketch as a uint8_t array. Once that is in program memory, memcpy_P() can be used to copy it back into the display buffer.

Might be possible to just redraw the bar graph and pointer in the background color using the previous data, then redraw those using the new data. That would speed things up a bit.

That could be complex, but determining just how much of a change in value results in a noticeable movement of the line in the dial would eliminate a lot of redundant drawing of identical images. I'd probably throw the code on an arduino with enough memory that I could save the current image, then run through the full range of values for the data, comparing the current image to the previous image to see at which values the pointer actually moves.

It does not seem to have that feature. But U8G2 does (.drawColor(2))

Yes, I looked for a feature in the Adafruit lib to do just that, but there isn't one. Can you share that code with @zdravke, if needed?

There is a function in the library to do that (.drawBitmap()). I guess memcpy_P() could be substantially faster.

As I said earlier, first thing to do is measure the current frame rate and then figure out what aspects of the code will give "biggest bang-per-buck" for the effort to optimise them.

@zdravke have you an idea about the frame rate you would be happy with?

This might be simpler than you think, you just need to remember the angle you draw the dial.
If it is the same you do not need to redraw.
Or probably more robust, if you calculated the endpoint of the dial and it is the same you do not need to redraw.

1 Like

What I was thinking of doing was running through the full range of possible values and determine how many discretely different positions of the dial needle are produced. I have a suspicion that multiple values are producing the same image, resulting in long delays between visible changes in the display.

Will be tonight before I can look at the equations in the code, it looks like the full range of analog input values will run into integer overflow.

I have done some timing tests and the results are interesting.

Here's the updated code:

#define DEBUG
#include <SPI.h>
#include <Adafruit_GFX.h>
#include <Adafruit_SSD1306.h>
#define OLED_DC     6
#define OLED_CS     7
#define OLED_RESET  8
Adafruit_SSD1306 oled(128, 64, &SPI, OLED_DC, OLED_RESET, OLED_CS);



#ifdef DEBUG
#define PRINT(x) Serial.print(x)     // console logging
#define PRINTLN(x) Serial.println(x)
#else
#define PRINT(x)
#define PRINTLN(x)
#endif

#define ANALOG_INPUT A0                 // analog input for outside audio source
int hMeter = 64;                      // horizontal center for needle animation
int vMeter = 85;                      // vertical center for needle animation (outside of display limits)
int rMeter = 85;                      // length of needle animation or arch of needle travel
float meterValue = 0;
float meterPos = 0;
int peak;
int peakPos;
int oldPeak;
int peakHoldPos;
int dB = 0;
unsigned long peakMillis;
unsigned long peakHoldMillis;
unsigned int inputValue;
unsigned int inputValueMax = 0;
bool peakFalling;
byte resolution = 1;          // 2 - higher resolution, but slower; 1 faster, lower resolution
byte range = 1;               // 2 - more sensitivity, 1 - normal range

int frameCounter;
unsigned long lastFPS;
unsigned long functionTotal;

void drawDisplay() {
  unsigned long functionStart = micros();
  oled.clearDisplay();
  int a1 = (hMeter + (sin(meterPos / 30 / resolution) * rMeter));    // meter needle horizontal coordinate
  int a2 = (vMeter - (cos(meterPos / 30 / resolution) * rMeter));    // meter needle vertical coordinate
  oled.drawLine(a1, a2, hMeter, vMeter, WHITE);         // draws needle
  oled.setTextSize(1);
  oled.setCursor(0, 0); oled.print("-20");
  oled.setCursor(115, 0); oled.print("+5");
  oled.setCursor(95, 0); oled.print("0");
  oled.setCursor(55, 0); oled.print("-8");
  oled.drawRect(0, 10, 128, 6, WHITE);
  oled.drawRect(98, 11, 128, 4, WHITE);    // red area rectangle
  //oled.drawLine(98, 9, 128, 9, WHITE);   // red area line
  //oled.fillRect(100, 10, 28, 4, WHITE);
  oled.setTextSize(1);
  oled.setTextColor(SSD1306_WHITE); // Draw white text
  oled.setCursor(0, 55); if (dB > -1)oled.print(" "); oled.print(dB);
  oled.setCursor(20, 55); oled.print("dB");
  oled.setCursor(60, 40); oled.print("VU");
  oled.fillRect(0, 11, peakPos * 2 / resolution, 4, WHITE);
  oled.fillRect(peakHoldPos * 2 / resolution, 11, 4, 4, WHITE);
  oled.display();
  functionTotal += micros() - functionStart;
  frameCounter++;
}

void setup() {
#ifdef DEBUG
  Serial.begin(115200);
#endif
  pinMode(ANALOG_INPUT, INPUT);
  oled.begin(SSD1306_SWITCHCAPVCC);
  oled.clearDisplay();
  peakMillis = millis();
}

void loop() {
  if (peakFalling && peak < peakHoldPos) {
    if (millis() - peakHoldMillis > 100) {
      peakHoldPos--;
      peakHoldMillis = millis();
    }
  }
  // measure input voltage
  inputValueMax = 0;
  for (int i = 0; i < 200 / resolution; i++) {
    inputValue = analogRead(ANALOG_INPUT);
    if (inputValue > inputValueMax) inputValueMax = inputValue;
  }
  peak = inputValueMax * 64 * resolution / (1024 / range);
  if (peakHoldPos < peakPos) {
    peakHoldPos = peakPos;
    peakMillis = millis();
    dB = 15 * log(peakHoldPos / resolution) - 58;
    peakFalling = 0;
  }
  else if (millis() - 2000 > peakMillis) {
    peakMillis = millis();
    peakFalling = 1;

  }
  meterValue = peak - 32 * resolution;
  while ( meterPos < meterValue) {
    meterPos++;
    peakPos++;
    //drawDisplay();
  }
  while (meterPos > meterValue) {
    meterPos--;
    peakPos--;
    //drawDisplay();
  }
  drawDisplay();
  peakPos = peak;
  //  PRINT("Signal="); PRINT(inputValueMax); PRINT(" Peak="); PRINT(peak); PRINT(" meter="); PRINTLN(meterValue);

  if (frameCounter >= 100) {
    PRINT((micros() - lastFPS) / frameCounter);
    PRINT("us per frame, ");
    PRINT(functionTotal / frameCounter);
    PRINTLN("us in drawDisplay()");
    frameCounter = 0;
    functionTotal = 0;
    lastFPS = micros();
  }
}

I temporarily changed it so that drawDisplay() is called every time in loop(), not only when the meter level has changed, so I could get accurate timings.

Here's some sample output:

17:26:18.890 -> 32698us per frame, 10266us in drawDisplay()
17:26:22.174 -> 32698us per frame, 10266us in drawDisplay()
17:26:25.424 -> 32697us per frame, 10266us in drawDisplay()
17:26:28.707 -> 32697us per frame, 10266us in drawDisplay()
17:26:31.958 -> 32698us per frame, 10266us in drawDisplay()
17:26:35.243 -> 32697us per frame, 10266us in drawDisplay()

The interesting thing this tells us is that a new frame is drawn every 32.7ms. Drawing each frame and updating the OLED is taking 10.3ms. Taking the peak analog reading and performing the peak hold calculations is taking around 22.4ms.

Of course, when the peak signal is changing rapidly, as it will be when music is playing, drawing the frames and updating the OLED will need to be performed many times to move the needle to the new peak reading.

For the next test, I tried to separate the drawing of the non-changing elements of the display from the other so I could see how much time that might save if there was a way to avoid re-drawing them:

void drawDisplay() {
  oled.clearDisplay();
  oled.setTextSize(1);
  oled.setCursor(0, 0); oled.print("-20");
  oled.setCursor(115, 0); oled.print("+5");
  oled.setCursor(95, 0); oled.print("0");
  oled.setCursor(55, 0); oled.print("-8");
  oled.drawRect(0, 10, 128, 6, WHITE);
  oled.drawRect(98, 11, 128, 4, WHITE);    // red area rectangle
  //oled.drawLine(98, 9, 128, 9, WHITE);   // red area line
  //oled.fillRect(100, 10, 28, 4, WHITE);
  oled.setTextSize(1);
  oled.setTextColor(SSD1306_WHITE); // Draw white text
  oled.setCursor(20, 55); oled.print("dB");
  oled.setCursor(60, 40); oled.print("VU");
  unsigned long functionStart = micros();
  oled.setCursor(0, 55); if (dB > -1)oled.print(" "); oled.print(dB);
  oled.fillRect(0, 11, peakPos * 2 / resolution, 4, WHITE);
  oled.fillRect(peakHoldPos * 2 / resolution, 11, 4, 4, WHITE);
  int a1 = (hMeter + (sin(meterPos / 30 / resolution) * rMeter));    // meter needle horizontal coordinate
  int a2 = (vMeter - (cos(meterPos / 30 / resolution) * rMeter));    // meter needle vertical coordinate
  oled.drawLine(a1, a2, hMeter, vMeter, WHITE);         // draws needle
  oled.display();
  functionTotal += micros() - functionStart;
  frameCounter++;
}
17:51:04.434 -> 33079us per frame, 5899us in drawDisplay()
17:51:07.714 -> 32733us per frame, 5904us in drawDisplay()
17:51:10.995 -> 32736us per frame, 5902us in drawDisplay()
17:51:14.275 -> 32733us per frame, 5902us in drawDisplay()
17:51:17.523 -> 32734us per frame, 5903us in drawDisplay()
17:51:20.803 -> 32733us per frame, 5902us in drawDisplay()
17:51:24.085 -> 32734us per frame, 5904us in drawDisplay()
17:51:27.370 -> 32736us per frame, 5905us in drawDisplay()

So the non-changing elements of the display are taking around 4.3ms to draw.

After that, I thought I would see how long the display.update() itself is taking:

void drawDisplay() {
  oled.clearDisplay();
  oled.setTextSize(1);
  oled.setCursor(0, 0); oled.print("-20");
  oled.setCursor(115, 0); oled.print("+5");
  oled.setCursor(95, 0); oled.print("0");
  oled.setCursor(55, 0); oled.print("-8");
  oled.drawRect(0, 10, 128, 6, WHITE);
  oled.drawRect(98, 11, 128, 4, WHITE);    // red area rectangle
  //oled.drawLine(98, 9, 128, 9, WHITE);   // red area line
  //oled.fillRect(100, 10, 28, 4, WHITE);
  oled.setTextSize(1);
  oled.setTextColor(SSD1306_WHITE); // Draw white text
  oled.setCursor(20, 55); oled.print("dB");
  oled.setCursor(60, 40); oled.print("VU");
  oled.setCursor(0, 55); if (dB > -1)oled.print(" "); oled.print(dB);
  oled.fillRect(0, 11, peakPos * 2 / resolution, 4, WHITE);
  oled.fillRect(peakHoldPos * 2 / resolution, 11, 4, 4, WHITE);
  int a1 = (hMeter + (sin(meterPos / 30 / resolution) * rMeter));    // meter needle horizontal coordinate
  int a2 = (vMeter - (cos(meterPos / 30 / resolution) * rMeter));    // meter needle vertical coordinate
  oled.drawLine(a1, a2, hMeter, vMeter, WHITE);         // draws needle
  unsigned long functionStart = micros();
  oled.display();
  functionTotal += micros() - functionStart;
  frameCounter++;
}
17:57:21.783 -> 32735us per frame, 3384us in drawDisplay()
17:57:25.064 -> 32736us per frame, 3384us in drawDisplay()
17:57:28.346 -> 32738us per frame, 3384us in drawDisplay()
17:57:31.627 -> 32735us per frame, 3384us in drawDisplay()
17:57:34.908 -> 32738us per frame, 3385us in drawDisplay()

So there is a potentially unavoidable 3.3ms for each frame to update the OLED.