I need help speeding up my led matrix functions

Hi, I've recently been making a 24x5 led matrix with 74hc595's. But, the code I built has functions that have to run for every column of some text. Therefore making the process longer if the text gets bigger.

Here's my code:

/*
  24 x 5 led matrix
  Version 2.2
  by: ArduMasterPro44
*/

#define Data1 3
#define Clock1 4
#define Data2 5
#define Clock2 6
#define Latch 7

uint8_t alphabet[95][4] = {
  { 3, 0x00, 0x00, 0x00 },  // " "
  { 1, 0x17, 0x00, 0x00 },  // "!"
  { 3, 0x03, 0x00, 0x03 },  // """
  { 3, 0x1F, 0x0A, 0x1F },  // "#"
  { 3, 0x0B, 0x1F, 0x0D },  // "$"
  { 3, 0x19, 0x04, 0x13 },  // "%"
  { 3, 0x0A, 0x15, 0x1A },  // "&"
  { 1, 0x03, 0x00, 0x00 },  // "'"
  { 2, 0x0E, 0x11, 0x00 },  // "("
  { 2, 0x11, 0x0E, 0x00 },  // ")"
  { 3, 0x05, 0x02, 0x05 },  // "*"
  { 3, 0x04, 0x0E, 0x04 },  // "+"
  { 1, 0x18, 0x00, 0x00 },  // ","
  { 3, 0x04, 0x04, 0x04 },  // "-"
  { 1, 0x10, 0x00, 0x00 },  // "."
  { 3, 0x18, 0x04, 0x03 },  // "/"
  { 3, 0x1F, 0x11, 0x1F },  // "0"
  { 3, 0x12, 0x1F, 0x10 },  // "1"
  { 3, 0x19, 0x15, 0x12 },  // "2"
  { 3, 0x11, 0x15, 0x1F },  // "3"
  { 3, 0x07, 0x04, 0x1F },  // "4"
  { 3, 0x17, 0x15, 0x1D },  // "5"
  { 3, 0x1E, 0x15, 0x1D },  // "6"
  { 3, 0x01, 0x01, 0x1F },  // "7"
  { 3, 0x1F, 0x15, 0x1F },  // "8"
  { 3, 0x17, 0x15, 0x1F },  // "9"
  { 1, 0x0A, 0x00, 0x00 },  // ":"
  { 1, 0x1A, 0x00, 0x00 },  // ";"
  { 3, 0x04, 0x0A, 0x11 },  // "<"
  { 3, 0x0A, 0x0A, 0x0A },  // "="
  { 3, 0x11, 0x0A, 0x04 },  // ">"
  { 3, 0x01, 0x15, 0x03 },  // "?"
  { 3, 0x0E, 0x11, 0x17 },  // "@"
  { 3, 0x1F, 0x05, 0x1F },  // "A"
  { 3, 0x1F, 0x15, 0x1B },  // "B"
  { 3, 0x1F, 0x11, 0x11 },  // "C"
  { 3, 0x1F, 0x11, 0x0E },  // "D"
  { 3, 0x1F, 0x15, 0x11 },  // "E"
  { 3, 0x1F, 0x05, 0x01 },  // "F"
  { 3, 0x1F, 0x11, 0x1D },  // "G"
  { 3, 0x1F, 0x04, 0x1F },  // "H"
  { 3, 0x11, 0x1F, 0x11 },  // "I"
  { 3, 0x18, 0x10, 0x1F },  // "J"
  { 3, 0x1F, 0x04, 0x1B },  // "K"
  { 3, 0x1F, 0x10, 0x10 },  // "L"
  { 3, 0x1F, 0x06, 0x1F },  // "M"
  { 3, 0x1F, 0x01, 0x1F },  // "N"
  { 3, 0x1F, 0x11, 0x1F },  // "O"
  { 3, 0x1F, 0x05, 0x07 },  // "P"
  { 3, 0x0F, 0x19, 0x0F },  // "Q"
  { 3, 0x1F, 0x05, 0x1A },  // "R"
  { 3, 0x17, 0x15, 0x1D },  // "S"
  { 3, 0x01, 0x1F, 0x01 },  // "T"
  { 3, 0x1F, 0x10, 0x1F },  // "U"
  { 3, 0x0F, 0x10, 0x1F },  // "V"
  { 3, 0x1F, 0x0C, 0x1F },  // "W"
  { 3, 0x1B, 0x04, 0x1B },  // "X"
  { 3, 0x07, 0x1C, 0x07 },  // "Y"
  { 3, 0x19, 0x15, 0x13 },  // "Z"
  { 2, 0x1F, 0x11, 0x00 },  // "["
  { 3, 0x03, 0x04, 0x18 },  // "\"
  { 2, 0x11, 0x1F, 0x00 },  // "]"
  { 3, 0x02, 0x01, 0x02 },  // "^"
  { 3, 0x10, 0x10, 0x10 },  // "_"
  { 2, 0x01, 0x02, 0x00 },  // "`"
  { 3, 0x0c, 0x12, 0x1E },  // "a"
  { 3, 0x1F, 0x12, 0x0C },  // "b"
  { 3, 0x1E, 0x12, 0x12 },  // "c"
  { 3, 0x0C, 0x12, 0x1F },  // "d"
  { 3, 0x0C, 0x1A, 0x14 },  // "e"
  { 3, 0x04, 0x1F, 0x05 },  // "f"
  { 3, 0x12, 0x15, 0x0E },  // "g"
  { 3, 0x1F, 0x04, 0x18 },  // "h"
  { 1, 0x1D, 0x00, 0x00 },  // "i"
  { 2, 0x10, 0x0D, 0x00 },  // "j"
  { 3, 0x1F, 0x04, 0x1A },  // "k"
  { 1, 0x1F, 0x00, 0x00 },  // "l"
  { 3, 0x1E, 0x04, 0x1E },  // "m"
  { 3, 0x1E, 0x02, 0x1E },  // "n"
  { 3, 0x1E, 0x12, 0x1E },  // "o"
  { 3, 0x1F, 0x09, 0x06 },  // "p"
  { 3, 0x06, 0x09, 0x1F },  // "q"
  { 3, 0x1E, 0x02, 0x04 },  // "r"
  { 3, 0x14, 0x16, 0x0A },  // "s"
  { 3, 0x02, 0x0F, 0x12 },  // "t"
  { 3, 0x1E, 0x10, 0x1E },  // "u"
  { 3, 0x0E, 0x10, 0x0E },  // "v"
  { 3, 0x1E, 0x08, 0x1E },  // "w"
  { 3, 0x1A, 0x04, 0x1A },  // "x"
  { 3, 0x17, 0x14, 0x0F },  // "y"
  { 3, 0x1A, 0x12, 0x16 },  // "z"
  { 3, 0x04, 0x1F, 0x11 },  // "{"
  { 1, 0x1F, 0x00, 0x00 },  // "|"
  { 3, 0x11, 0x1F, 0x04 },  // "}"
  { 3, 0x04, 0x0C, 0x08 }   // "~"
};

const uint16_t delayTime = 100;
const uint8_t rows = 24;
const uint8_t columns = 5;
uint32_t time;
char message[] = "This is a test string";

void setup() {
  pinMode(Data1, OUTPUT);
  pinMode(Clock1, OUTPUT);
  pinMode(Data2, OUTPUT);
  pinMode(Clock2, OUTPUT);
  pinMode(Latch, OUTPUT);
}

void loop() {

  int8_t i = -24;
  while (i < sizeOfString(message)) {
    if (millis() - time >= delayTime) {
      i++;
      time = millis();
    }
    shiftText(message, i, rows, columns, delayTime);
  }
}

void shiftText(char msg[], int16_t idx, uint8_t bitsX, uint8_t bitsY, uint16_t del) {
  for (uint8_t i = 0; i < bitsX; i++) {
    shiftByte(Data1, Data2, Clock1, Clock2, Latch, MSBFIRST, LSBFIRST, i, stringToChar(msg, i + idx), bitsX, bitsY);  // Shift the byte to the matrix
  }
}

uint8_t stringToChar(char str[], int16_t bit) {  // Converts one peice of the character according to the bit variable
  if (bit >= sizeOfString(message)) {
    return 0x00;
  } else if (bit < 0) {
    return 0x00;
  }
  uint8_t bitMap[sizeOfString(message)] = { 0 };              // makes an array of the message in 3x5 font
  uint8_t x = 0;                                              // Keeps track of the X-Axis bit
  for (uint8_t i = 0; i < strlen(str); i++) {                 // Loop for every character in the message
    for (uint8_t j = 0; j < alphabet[str[i] - 32][0]; j++) {  // Loop for every byte in the character
      bitMap[x] = alphabet[str[i] - 32][j + 1];               // Store the byte in the bitMap[] array
      x++;
    }
    if (i < strlen(str) - 1) {  // Check if it isn't the last character of the message
      bitMap[x] = 0;            // Store a 0x00 byte in the bitMap[] array
      x++;
    }
  }
  return bitMap[bit];
}

uint8_t sizeOfString(char str[]) {  // Returns the size of the string if it were to be made of the 3x5 font
  uint8_t size = 0;
  for (uint8_t i = 0; i < strlen(str); i++) {  // Loop for every character in the message
    size += alphabet[str[i] - 32][0];
    if (i < strlen(str) - 1) {  // Check if it isn't the last character of the message
      size++;
    }
  }
  return size;
}

void shiftByte(uint8_t data1, uint8_t data2, uint8_t clock1, uint8_t clock2, uint8_t latch, uint8_t bitOrderX, uint8_t bitOrderY, uint8_t pin, uint32_t val, uint8_t bitsX, uint8_t bitsY) {
  digitalWrite(latch, LOW);
  shiftBits(data1, clock1, bitOrderX, bit(pin), bitsX);
  shiftBits(data2, clock2, bitOrderY, ~val, bitsY);
  digitalWrite(latch, HIGH);
}

void shiftBits(uint8_t data, uint8_t clock, uint8_t bitOrder, uint32_t value, uint8_t bits) {
  for (uint8_t i = 0; i < bits; i++) {
    if (bitOrder == LSBFIRST) {
      digitalWrite(data, value & 1);
      value >>= 1;
    } else if (bitOrder == MSBFIRST) {
      digitalWrite(data, (value & bit(bits - 1)) != 0);
      value <<= 1;
    }
    digitalWrite(clock, HIGH);
    digitalWrite(clock, LOW);
  }
}

Here's the schematic:


(Ignore the potentiometer, It's for later in my project)

I would greatly appreciate your help!

─ ArduMasterPro44 ─

  1. replace digitalWrite with such expression
#define CLR(x,y) (x&=(~(1<<y)))
#define SET(x,y) (x|=(1<<y))
#define CHK(x,y) (x & (1<<y))
#define TOG(x,y) (x^=(1<<y))
#define makePinHIGH(b) ((b)<13?((b)<8?(PORTD|=(1<<(b))):(PORTB|=(1<<(b-8)))):(PORTC|=(1<<(b-13))))
#define makePinLOW(b) ((b)<13?((b)<8?(PORTD&=~(1<<(b))):(PORTB&=~(1<<(b-8)))):(PORTC&=~(1<<(b-13))))
#define readOutput(b) ((b)<13?((b)<8?(PORTD&(1<<(b))):(PORTB&(1<<(b-8)))):(PORTC&(1<<(b-13))))
#define readInput(b) ((b)<13?((b)<8?(PIND&(1<<(b))):(PINB&(1<<(b-8)))):(PINC&(1<<(b-13))))
  1. get rid of Strings
  2. 5 Rows can be connected to one port of Nano and set all 5 pins with only 1 line of code
    for example
PORTD |=0b00011111; // pin 8,9,10,11,12 will be set to HIGH. (don't try to change bits 6-7)

Nice Schematic! You have the classic matrix problem. The bigger the matrix the neater it is but the less time each LED will be on. The thing that will help you get it brighter is to drive the LEDs harder. You will need to study the data sheet for the LEDs to determine how much you can supply. Then you need to be sure it is in the range of the 74HC595s and adjust the resistors accordingly. A rough starting point would be 500 Ohms. Remember when calculating the LED current be sure to take into consideration of its forward voltage drop, this will be different for different colors.

I doubt that it will make a big diffrence, since the stringToChar () and sizeOfString() functions both run at every iteration of the main loop. So if something is causing the slowness, it's those functions. But I don't know how to fix it.

This is a software problem, not a hardware problem. My code doesn't scan fast enough with long strings.

I'd probably put the font table "alphabet" in flash memory (PROGMEM if it is an AVR mcu but anyway define it as constant). It won't make it faster but would free up about 400 bytes of RAM. Secondly I'd preprocess the entire input string so it more closely matches the format of the 24x5 pixel "window" that it is to be scrolled in. That is, it would be some representation of Nx5 where N is the total width of the string to be displayed in pixels. The main display activity would then be simply picking out the next 24x5 block from the preprocessed input string and loading into the buffer which is sent to the shift register chain. That at regular intervals to match the scrolling speed.

EDIT

I've spent a few more minutes looking at it. You must separate out the display multiplexing from the other activities in the loop such as analysing the input string, converting it to a font, scrolling it and loading it into the display buffer. You almost certainly want to drive the scanning of the display buffer and loading the shift register chain asynchronously from the loop by using a timer call back routine. The way the hardware is configured, at any instant, you can display only one of the 24 vertical stripes on that display and the whole process of displaying all 24 vertical stripes (columns) should be completed in about 5 ms to avoid noticeable flicker.
For testing, I'd start with a 24 byte buffer, preloaded with a display pattern, and create the routine to continuously push that out to the shift register chain, one column at a time, and get that optimised for brightness and minimal flicker. Once that is done, then proceed with the rest of the loop activities.

The problem with preprocessing the string, is that I have to declare the "pixel window" with the size of the string which cannot be done outside of the main functions (setup and loop) and if I do it in the setup, the "pixel window" will be local to the setup (). So I need to declare it global in the setup ().

[EDIT]: It would be ideal to be able to return the whole array from the stringToChar() function, but I don't know how that would be done.

I just did a quick test to simulate just the multiplexing of that 24 x 5 matrix and is seems it is going to be quite slow. I used shiftOut() which is a built function but is relatively slow with a clock of about 100 kHz. I checked it out with a logic analyser. There is a faster on avaliable GitHub - RobTillaart/FastShiftOut: Arduino library for (AVR) optimized shiftOut - e.g. 74HC595 which I have not tried. It would have been ideal if you had used the hardware SPI pins (MOSI and SCK) and put all shift registers in the same chain. Here is the code I used for the test and it should work on your set up because I used the same pin numbers. If you do it the way I suggested, then the scanning speed is independent of the length of the string you are going to display. You'd just load displayBuffer[] from the loop for scrolling etc.

#define Data1 3
#define Clock1 4
#define Data2 5
#define Clock2 6
#define Latch 7
#define LED 13

volatile uint8_t displayBuffer[24] = {0} ;

void multiplex() {
  static uint8_t colNr = 0 ;  // 0..23 static initialised once only

  digitalWrite( Latch, LOW ) ;

  // push out current column number
  uint32_t colOut = (uint32_t)1 <<  colNr ;

  shiftOut( Data1, Clock1, LSBFIRST, (uint8_t) colOut ) ;
  shiftOut( Data1, Clock1, LSBFIRST, (uint8_t) (colOut >> 8) ) ;
  shiftOut( Data1, Clock1, LSBFIRST, (uint8_t) (colOut >> 16) ) ;

  // push out row data for current column
  // only 5 of the 8 positions are used
  // you may have to invert '~' displayBuffer[ colNr ]
  shiftOut( Data2, Clock2, MSBFIRST, displayBuffer[ colNr ] ) ;

  digitalWrite( Latch, HIGH ) ;

  if ( ++ colNr == 24 ) colNr = 0 ; // increment/wrap around
}

ISR(TIMER2_COMPA_vect) {
  // digitalWrite( LED , !digitalRead( LED ) ) ;
  multiplex() ;
}

void setup() {
  Serial.begin( 115200 ) ;

  pinMode(Data1, OUTPUT);
  pinMode(Clock1, OUTPUT);
  pinMode(Data2, OUTPUT);
  pinMode(Clock2, OUTPUT);
  pinMode(Latch, OUTPUT);

  pinMode( LED, OUTPUT ) ;

  // fill displayBuffer with a recognisable pattern for testing
  for ( uint8_t i = 0; i < 24; i++ ) {
    displayBuffer[ i ] = 0b11111111 << ( i / 3 ) % 8 ;
    Serial.print( i ) ;
    Serial.print("   ") ;
    Serial.println( displayBuffer[ i ], BIN ) ;
  }

  // set up timer 2 to call multiplex with a 1ms period
  cli() ;
  TCNT2 = 0 ;
  TCCR2A = _BV( WGM21 ) ; //  CTC mode 
  TCCR2B = _BV( CS22 )  ; //  PS /64,
  TIMSK2 = _BV( OCIE2A); // interrupt TIMER2_COMPA_vect
  OCR2A = 255 ;  // ~1kHz @ PS /64 
  sei() ;
}

void loop() {

}

Are you actually seeing that string "This is a test string" on the display and do you see visible scanning of the display and flicker ?
If the size of the string you are going to display is known only at run time (say it is entered by the user via the serial console) then it makes preprocessing it more tricky because, for a global array, you must specify the size in advance. This would mean setting maximum size, say enough for an 80 character string.

1 Like

I tried your code, but the display outputs this (flickering):

Also, I think it would work if I declare my bitMap[] array in the setup as global, but how do you declare a variable global?

It is not very clean, but should be sufficient for your initial testing; you could define bitMap as a global array (like alphabet is) but then the size has to be fixed. This can be optimised later by dynamically allocating it:
uint8_t bitMap[240] = { 0 } ; // enough for about 80 characters in message[ ]
You can then load bitMap once in setup() instead of every time you reference it. You'll then have to modify function stringToChar().

Also, instead of calling sizeOfString( message[ ]) all the time, which always returns the same value for the same message, define a global variable say uint8_t sizeOfMessage ;. Set this variable once in setup() sizeOfMessage = sizeOfString( message[ ]).

I did expect some flicker with the code I produced for testing the multiplexing because it was running at only 1kHz. It probably needs to run 5 times faster. However, I would be surprised if it was much worse than your current code could produce. It is difficult to see from the picture what it is doing, though.

EDIT

I made the test code I supplied much faster. It now runs at 4kHz so the display should refresh every ~5ms. It needs the FastShiftOut library which can be downloaded through the IDE.

// FastShiftOut version
 
#define Data1 3
#define Clock1 4
#define Data2 5
#define Clock2 6
#define Latch 7
#define LED 13

#include <FastShiftOut.h>

FastShiftOut FSOcol(Data1, Clock1, MSBFIRST);
FastShiftOut FSOrow(Data2, Clock2, LSBFIRST);

volatile uint8_t displayBuffer[24] = {0} ;



ISR(TIMER2_COMPA_vect) {
  // multiplex display - called from timer
  
  static uint8_t colNr = 0 ;  // 0..23 static initialised once only

  digitalWrite( Latch, LOW ) ;

  // push out current column number
  uint32_t colOut = (uint32_t)1 <<  colNr ;

  FSOcol.write( (uint8_t)  colOut ) ;
  FSOcol.write( (uint8_t) (colOut >> 8) ) ;
  FSOcol.write( (uint8_t) (colOut >> 16) ) ;

  // push out row data for current column
  // only 5 of the 8 positions are used
  // you may have to invert '~' displayBuffer[ colNr ]
  FSOrow.write( ~displayBuffer[ colNr ] ) ;  //inverted

  digitalWrite( Latch, HIGH ) ;

  if ( ++ colNr == 24 ) colNr = 0 ; // increment/wrap around
}


void setup() {
  Serial.begin( 115200 ) ;

  pinMode(Data1, OUTPUT);
  pinMode(Clock1, OUTPUT);
  pinMode(Data2, OUTPUT);
  pinMode(Clock2, OUTPUT);
  pinMode(Latch, OUTPUT);

  pinMode( LED, OUTPUT ) ;

  // fill displayBuffer with a recognisable pattern for testing
  for ( uint8_t i = 0; i < 24; i++ ) {
    displayBuffer[ i ] = 0b11111111 << ( i / 3 ) % 8 ;
    Serial.print( i ) ;
    Serial.print("   ") ;
    Serial.println( displayBuffer[ i ], BIN ) ;
  }

  // set up timer 2 to call multiplex routine with a 250us period
  cli() ;
  TCNT2 = 0 ;
  TCCR2A = _BV( WGM21 ) ; //  CTC mode 
  TCCR2B = _BV( CS22 )  ; //  PS /64,
  TIMSK2 = _BV( OCIE2A); // interrupt TIMER2_COMPA_vect
  OCR2A = 62 ;  // ~4kHz @ PS /64 
  sei() ;
}

void loop() {

}

It now outputs this (not flickering):

Also, what I though of doing was this:

/*
  24 x 5 led matrix
  Version 2.2
  by: ArduMasterPro44
*/

#define Data1 3
#define Clock1 4
#define Data2 5
#define Clock2 6
#define Latch 7

uint8_t alphabet[95][4] = {
  { 3, 0x00, 0x00, 0x00 },  // " "
  { 1, 0x17, 0x00, 0x00 },  // "!"
  { 3, 0x03, 0x00, 0x03 },  // """
  { 3, 0x1F, 0x0A, 0x1F },  // "#"
  { 3, 0x0B, 0x1F, 0x0D },  // "$"
  { 3, 0x19, 0x04, 0x13 },  // "%"
  { 3, 0x0A, 0x15, 0x1A },  // "&"
  { 1, 0x03, 0x00, 0x00 },  // "'"
  { 2, 0x0E, 0x11, 0x00 },  // "("
  { 2, 0x11, 0x0E, 0x00 },  // ")"
  { 3, 0x05, 0x02, 0x05 },  // "*"
  { 3, 0x04, 0x0E, 0x04 },  // "+"
  { 1, 0x18, 0x00, 0x00 },  // ","
  { 3, 0x04, 0x04, 0x04 },  // "-"
  { 1, 0x10, 0x00, 0x00 },  // "."
  { 3, 0x18, 0x04, 0x03 },  // "/"
  { 3, 0x1F, 0x11, 0x1F },  // "0"
  { 3, 0x12, 0x1F, 0x10 },  // "1"
  { 3, 0x19, 0x15, 0x12 },  // "2"
  { 3, 0x11, 0x15, 0x1F },  // "3"
  { 3, 0x07, 0x04, 0x1F },  // "4"
  { 3, 0x17, 0x15, 0x1D },  // "5"
  { 3, 0x1E, 0x15, 0x1D },  // "6"
  { 3, 0x01, 0x01, 0x1F },  // "7"
  { 3, 0x1F, 0x15, 0x1F },  // "8"
  { 3, 0x17, 0x15, 0x1F },  // "9"
  { 1, 0x0A, 0x00, 0x00 },  // ":"
  { 1, 0x1A, 0x00, 0x00 },  // ";"
  { 3, 0x04, 0x0A, 0x11 },  // "<"
  { 3, 0x0A, 0x0A, 0x0A },  // "="
  { 3, 0x11, 0x0A, 0x04 },  // ">"
  { 3, 0x01, 0x15, 0x03 },  // "?"
  { 3, 0x0E, 0x11, 0x17 },  // "@"
  { 3, 0x1F, 0x05, 0x1F },  // "A"
  { 3, 0x1F, 0x15, 0x1B },  // "B"
  { 3, 0x1F, 0x11, 0x11 },  // "C"
  { 3, 0x1F, 0x11, 0x0E },  // "D"
  { 3, 0x1F, 0x15, 0x11 },  // "E"
  { 3, 0x1F, 0x05, 0x01 },  // "F"
  { 3, 0x1F, 0x11, 0x1D },  // "G"
  { 3, 0x1F, 0x04, 0x1F },  // "H"
  { 3, 0x11, 0x1F, 0x11 },  // "I"
  { 3, 0x18, 0x10, 0x1F },  // "J"
  { 3, 0x1F, 0x04, 0x1B },  // "K"
  { 3, 0x1F, 0x10, 0x10 },  // "L"
  { 3, 0x1F, 0x06, 0x1F },  // "M"
  { 3, 0x1F, 0x01, 0x1F },  // "N"
  { 3, 0x1F, 0x11, 0x1F },  // "O"
  { 3, 0x1F, 0x05, 0x07 },  // "P"
  { 3, 0x0F, 0x19, 0x0F },  // "Q"
  { 3, 0x1F, 0x05, 0x1A },  // "R"
  { 3, 0x17, 0x15, 0x1D },  // "S"
  { 3, 0x01, 0x1F, 0x01 },  // "T"
  { 3, 0x1F, 0x10, 0x1F },  // "U"
  { 3, 0x0F, 0x10, 0x1F },  // "V"
  { 3, 0x1F, 0x0C, 0x1F },  // "W"
  { 3, 0x1B, 0x04, 0x1B },  // "X"
  { 3, 0x07, 0x1C, 0x07 },  // "Y"
  { 3, 0x19, 0x15, 0x13 },  // "Z"
  { 2, 0x1F, 0x11, 0x00 },  // "["
  { 3, 0x03, 0x04, 0x18 },  // "\"
  { 2, 0x11, 0x1F, 0x00 },  // "]"
  { 3, 0x02, 0x01, 0x02 },  // "^"
  { 3, 0x10, 0x10, 0x10 },  // "_"
  { 2, 0x01, 0x02, 0x00 },  // "`"
  { 3, 0x0c, 0x12, 0x1E },  // "a"
  { 3, 0x1F, 0x12, 0x0C },  // "b"
  { 3, 0x1E, 0x12, 0x12 },  // "c"
  { 3, 0x0C, 0x12, 0x1F },  // "d"
  { 3, 0x0C, 0x1A, 0x14 },  // "e"
  { 3, 0x04, 0x1F, 0x05 },  // "f"
  { 3, 0x12, 0x15, 0x0E },  // "g"
  { 3, 0x1F, 0x04, 0x18 },  // "h"
  { 1, 0x1D, 0x00, 0x00 },  // "i"
  { 2, 0x10, 0x0D, 0x00 },  // "j"
  { 3, 0x1F, 0x04, 0x1A },  // "k"
  { 1, 0x1F, 0x00, 0x00 },  // "l"
  { 3, 0x1E, 0x04, 0x1E },  // "m"
  { 3, 0x1E, 0x02, 0x1E },  // "n"
  { 3, 0x1E, 0x12, 0x1E },  // "o"
  { 3, 0x1F, 0x09, 0x06 },  // "p"
  { 3, 0x06, 0x09, 0x1F },  // "q"
  { 3, 0x1E, 0x02, 0x04 },  // "r"
  { 3, 0x14, 0x16, 0x0A },  // "s"
  { 3, 0x02, 0x0F, 0x12 },  // "t"
  { 3, 0x1E, 0x10, 0x1E },  // "u"
  { 3, 0x0E, 0x10, 0x0E },  // "v"
  { 3, 0x1E, 0x08, 0x1E },  // "w"
  { 3, 0x1A, 0x04, 0x1A },  // "x"
  { 3, 0x17, 0x14, 0x0F },  // "y"
  { 3, 0x1A, 0x12, 0x16 },  // "z"
  { 3, 0x04, 0x1F, 0x11 },  // "{"
  { 1, 0x1F, 0x00, 0x00 },  // "|"
  { 3, 0x11, 0x1F, 0x04 },  // "}"
  { 3, 0x04, 0x0C, 0x08 }   // "~"
};

const uint16_t delayTime = 100;
const uint8_t rows = 24;
const uint8_t columns = 5;
uint32_t time;
char message[] = "This is a test string";
uint8_t size = 0;

void setup() {
  pinMode(Data1, OUTPUT);
  pinMode(Clock1, OUTPUT);
  pinMode(Data2, OUTPUT);
  pinMode(Clock2, OUTPUT);
  pinMode(Latch, OUTPUT);


  for (uint8_t i = 0; i < strlen(message); i++) {  // Loop for every character in the message
    size += alphabet[message[i] - 32][0];
    if (i < strlen(message) - 1) {  // Check if it isn't the last character of the message
      size++;
    }
  }

  uint8_t bitMap[size] = { 0 };  // --------------------- DECLARED GLOBAL -----------------------

  uint8_t x = 0;                                              // Keeps track of the X-Axis bit
  for (uint8_t i = 0; i < strlen(message); i++) {                 // Loop for every character in the message
    for (uint8_t j = 0; j < alphabet[message[i] - 32][0]; j++) {  // Loop for every byte in the character
      bitMap[x] = alphabet[message[i] - 32][j + 1];               // Store the byte in the bitMap[] array
      x++;
    }
    if (i < strlen(message) - 1) {  // Check if it isn't the last character of the message
      bitMap[x] = 0;            // Store a 0x00 byte in the bitMap[] array
      x++;
    }
  }
}

void loop() {

  int8_t i = -24;
  while (i < size) {
    if (millis() - time >= delayTime) {
      i++;
      time = millis();
    }
    shiftText(message, i, rows, columns, delayTime);
  }
}

void shiftText(char msg[], int16_t idx, uint8_t bitsX, uint8_t bitsY, uint16_t del) {
  for (uint8_t i = 0; i < bitsX; i++) {
    shiftByte(Data1, Data2, Clock1, Clock2, Latch, MSBFIRST, LSBFIRST, i, stringToChar(i + idx), bitsX, bitsY);  // Shift the byte to the matrix
  }
}

uint8_t stringToChar(int16_t bit) {  // Converts one peice of the character according to the bit variable
  if (bit >= size) {
    return 0x00;
  } else if (bit < 0) {
    return 0x00;
  } else {
    return bitMap[bit];
  }
}

void shiftByte(uint8_t data1, uint8_t data2, uint8_t clock1, uint8_t clock2, uint8_t latch, uint8_t bitOrderX, uint8_t bitOrderY, uint8_t pin, uint32_t val, uint8_t bitsX, uint8_t bitsY) {
  digitalWrite(latch, LOW);
  shiftBits(data1, clock1, bitOrderX, bit(pin), bitsX);
  shiftBits(data2, clock2, bitOrderY, ~val, bitsY);
  digitalWrite(latch, HIGH);
}

void shiftBits(uint8_t data, uint8_t clock, uint8_t bitOrder, uint32_t value, uint8_t bits) {
  for (uint8_t i = 0; i < bits; i++) {
    if (bitOrder == LSBFIRST) {
      digitalWrite(data, value & 1);
      value >>= 1;
    } else if (bitOrder == MSBFIRST) {
      digitalWrite(data, (value & bit(bits - 1)) != 0);
      value <<= 1;
    }
    digitalWrite(clock, HIGH);
    digitalWrite(clock, LOW);
  }
}

But I need to declare bitMap[] as global, how do you do that?

Simply declare it near the top of the prograam outside any function definition and it will be global. You'll probably have to give it a fixed size similar to what I showed in the last post.
Do you see a fixed pattern on the display with that test code? It is not very clear from the picture. The pattern should be similar to that on the serial console.

Edit

I've assumed the columns are the led anodes and the rows the cathodes. If that is not so the test code needs an inversion to the shift register output.

I want to declare it in the setup to be able to give it the right size, but it would need to be global, is there a way to declare a setup variable global?

And yes, the columns are anodes and rows are cathodes.

Also, that picture is all you see, nothing changes.

If you noticed, the last 8 columns are dimmer, well, I just figured out why.


But sadly this doesn't fix the main issue, the code.

[EDIT]: I fixed the joint and this is what it outputs:


(The modules are inverted in the code)

and I fixed the code:

// FastShiftOut version

#define Data1 3
#define Clock1 4
#define Data2 5
#define Clock2 6
#define Latch 7
#define LED 13

#include <FastShiftOut.h>

FastShiftOut FSOcol(Data1, Clock1, MSBFIRST);
FastShiftOut FSOrow(Data2, Clock2, LSBFIRST);

volatile uint8_t displayBuffer[24] = { 0 };



ISR(TIMER2_COMPA_vect) {
  // multiplex display - called from timer

  static uint8_t colNr = 0;  // 0..23 static initialised once only

  digitalWrite(Latch, LOW);

  // push out current column number
  uint32_t colOut = (uint32_t)1 << colNr;

  FSOcol.write((uint8_t)(colOut >> 16));
  FSOcol.write((uint8_t)(colOut >> 8));
  FSOcol.write((uint8_t)(colOut));

  // push out row data for current column
  // only 5 of the 8 positions are used
  // you may have to invert '~' displayBuffer[ colNr ]
  FSOrow.write(~displayBuffer[colNr]);  //inverted

  digitalWrite(Latch, HIGH);

  if (++colNr == 24) colNr = 0;  // increment/wrap around
}


void setup() {
  Serial.begin(115200);

  pinMode(Data1, OUTPUT);
  pinMode(Clock1, OUTPUT);
  pinMode(Data2, OUTPUT);
  pinMode(Clock2, OUTPUT);
  pinMode(Latch, OUTPUT);

  pinMode(LED, OUTPUT);

  // fill displayBuffer with a recognisable pattern for testing
  for (uint8_t i = 0; i < 24; i++) {
    displayBuffer[i] = 0b11111111 << (i / 3) % 8;
    Serial.print(i);
    Serial.print("   ");
    Serial.println(displayBuffer[i], BIN);
  }

  // set up timer 2 to call multiplex routine with a 250us period
  cli();
  TCNT2 = 0;
  TCCR2A = _BV(WGM21);   //  CTC mode
  TCCR2B = _BV(CS22);    //  PS /64,
  TIMSK2 = _BV(OCIE2A);  // interrupt TIMER2_COMPA_vect
  OCR2A = 62;            // ~4kHz @ PS /64
  sei();
}

void loop() {
}

This is the output:

This is the first thing that I want someone to answer, because it could solve my problem.

'new' can create objects that persist outside of the dynamic scope of setup():

float * vals;

int nVals = 0;

void setup() {
  Serial.begin(115200);
  nVals = 10;
  vals = new float[nVals];
  for (int ii = 0 ; ii < nVals ; ++ii) {
    vals[ii] = ii / 10.0;
  }
}

void loop() {
  for (int ii = 0; ii < nVals ; ++ii) {
    Serial.print(ii);
    Serial.print(':');
    Serial.println(vals[ii]);
  }
  while (true);
}

OK. The picture of the display gets better all the time. It is not supposed to show a changing pattern because it was intended only to demonstrate multiplexing that display while showing a stable, flicker free pattern. It could also be the basis of your solution. Once you succeed in building your global array bitMap in setup() then, in the loop(), all you have to do is copy (the next) 24 entries from bitMap into displayBuffer[ ] to scroll it

Something like:

void loop() {
   static uint8_t index = 0; // static initialized once only. 
   for ( uint8_t i = 0 ; i < 24 ; i ++ )  displayBuffer[ i ]  = bitMap[ ( index + i) % size ] ; // size is of bitMap 
   if ( ++index >= size ) index = 0 ;
   delay( 100 ) ; // scrolling interval. better to use millis() but OK for testing 
}

I'm guessing that, because of that hardware problem, your code could never have displayed anything. That is, you never got it working but only assumed that it would be slow for large strings because of the way it was structured. Did you write it yourself or just found it somewhere?

Incidentally, you can buy breakout boards for those 16 pin smd chips. Example for the picture only: 6pc SOP16 SO16 TSSOP16 SSOP16 SOIC to DIP16 Adapter Breakout PCB Converter Board | eBay

Edit

The whole code would be something like:

/*
  24 x 5 led matrix
  Version 2.2
  by: ArduMasterPro44
*/

#define Data1 3
#define Clock1 4
#define Data2 5
#define Clock2 6
#define Latch 7

#include <FastShiftOut.h>

FastShiftOut FSOcol(Data1, Clock1, MSBFIRST);
FastShiftOut FSOrow(Data2, Clock2, LSBFIRST);

uint8_t alphabet[95][4] = {
  { 3, 0x00, 0x00, 0x00 },  // " "
  { 1, 0x17, 0x00, 0x00 },  // "!"
  { 3, 0x03, 0x00, 0x03 },  // """
  { 3, 0x1F, 0x0A, 0x1F },  // "#"
  { 3, 0x0B, 0x1F, 0x0D },  // "$"
  { 3, 0x19, 0x04, 0x13 },  // "%"
  { 3, 0x0A, 0x15, 0x1A },  // "&"
  { 1, 0x03, 0x00, 0x00 },  // "'"
  { 2, 0x0E, 0x11, 0x00 },  // "("
  { 2, 0x11, 0x0E, 0x00 },  // ")"
  { 3, 0x05, 0x02, 0x05 },  // "*"
  { 3, 0x04, 0x0E, 0x04 },  // "+"
  { 1, 0x18, 0x00, 0x00 },  // ","
  { 3, 0x04, 0x04, 0x04 },  // "-"
  { 1, 0x10, 0x00, 0x00 },  // "."
  { 3, 0x18, 0x04, 0x03 },  // "/"
  { 3, 0x1F, 0x11, 0x1F },  // "0"
  { 3, 0x12, 0x1F, 0x10 },  // "1"
  { 3, 0x19, 0x15, 0x12 },  // "2"
  { 3, 0x11, 0x15, 0x1F },  // "3"
  { 3, 0x07, 0x04, 0x1F },  // "4"
  { 3, 0x17, 0x15, 0x1D },  // "5"
  { 3, 0x1E, 0x15, 0x1D },  // "6"
  { 3, 0x01, 0x01, 0x1F },  // "7"
  { 3, 0x1F, 0x15, 0x1F },  // "8"
  { 3, 0x17, 0x15, 0x1F },  // "9"
  { 1, 0x0A, 0x00, 0x00 },  // ":"
  { 1, 0x1A, 0x00, 0x00 },  // ";"
  { 3, 0x04, 0x0A, 0x11 },  // "<"
  { 3, 0x0A, 0x0A, 0x0A },  // "="
  { 3, 0x11, 0x0A, 0x04 },  // ">"
  { 3, 0x01, 0x15, 0x03 },  // "?"
  { 3, 0x0E, 0x11, 0x17 },  // "@"
  { 3, 0x1F, 0x05, 0x1F },  // "A"
  { 3, 0x1F, 0x15, 0x1B },  // "B"
  { 3, 0x1F, 0x11, 0x11 },  // "C"
  { 3, 0x1F, 0x11, 0x0E },  // "D"
  { 3, 0x1F, 0x15, 0x11 },  // "E"
  { 3, 0x1F, 0x05, 0x01 },  // "F"
  { 3, 0x1F, 0x11, 0x1D },  // "G"
  { 3, 0x1F, 0x04, 0x1F },  // "H"
  { 3, 0x11, 0x1F, 0x11 },  // "I"
  { 3, 0x18, 0x10, 0x1F },  // "J"
  { 3, 0x1F, 0x04, 0x1B },  // "K"
  { 3, 0x1F, 0x10, 0x10 },  // "L"
  { 3, 0x1F, 0x06, 0x1F },  // "M"
  { 3, 0x1F, 0x01, 0x1F },  // "N"
  { 3, 0x1F, 0x11, 0x1F },  // "O"
  { 3, 0x1F, 0x05, 0x07 },  // "P"
  { 3, 0x0F, 0x19, 0x0F },  // "Q"
  { 3, 0x1F, 0x05, 0x1A },  // "R"
  { 3, 0x17, 0x15, 0x1D },  // "S"
  { 3, 0x01, 0x1F, 0x01 },  // "T"
  { 3, 0x1F, 0x10, 0x1F },  // "U"
  { 3, 0x0F, 0x10, 0x1F },  // "V"
  { 3, 0x1F, 0x0C, 0x1F },  // "W"
  { 3, 0x1B, 0x04, 0x1B },  // "X"
  { 3, 0x07, 0x1C, 0x07 },  // "Y"
  { 3, 0x19, 0x15, 0x13 },  // "Z"
  { 2, 0x1F, 0x11, 0x00 },  // "["
  { 3, 0x03, 0x04, 0x18 },  // "\"
  { 2, 0x11, 0x1F, 0x00 },  // "]"
  { 3, 0x02, 0x01, 0x02 },  // "^"
  { 3, 0x10, 0x10, 0x10 },  // "_"
  { 2, 0x01, 0x02, 0x00 },  // "`"
  { 3, 0x0c, 0x12, 0x1E },  // "a"
  { 3, 0x1F, 0x12, 0x0C },  // "b"
  { 3, 0x1E, 0x12, 0x12 },  // "c"
  { 3, 0x0C, 0x12, 0x1F },  // "d"
  { 3, 0x0C, 0x1A, 0x14 },  // "e"
  { 3, 0x04, 0x1F, 0x05 },  // "f"
  { 3, 0x12, 0x15, 0x0E },  // "g"
  { 3, 0x1F, 0x04, 0x18 },  // "h"
  { 1, 0x1D, 0x00, 0x00 },  // "i"
  { 2, 0x10, 0x0D, 0x00 },  // "j"
  { 3, 0x1F, 0x04, 0x1A },  // "k"
  { 1, 0x1F, 0x00, 0x00 },  // "l"
  { 3, 0x1E, 0x04, 0x1E },  // "m"
  { 3, 0x1E, 0x02, 0x1E },  // "n"
  { 3, 0x1E, 0x12, 0x1E },  // "o"
  { 3, 0x1F, 0x09, 0x06 },  // "p"
  { 3, 0x06, 0x09, 0x1F },  // "q"
  { 3, 0x1E, 0x02, 0x04 },  // "r"
  { 3, 0x14, 0x16, 0x0A },  // "s"
  { 3, 0x02, 0x0F, 0x12 },  // "t"
  { 3, 0x1E, 0x10, 0x1E },  // "u"
  { 3, 0x0E, 0x10, 0x0E },  // "v"
  { 3, 0x1E, 0x08, 0x1E },  // "w"
  { 3, 0x1A, 0x04, 0x1A },  // "x"
  { 3, 0x17, 0x14, 0x0F },  // "y"
  { 3, 0x1A, 0x12, 0x16 },  // "z"
  { 3, 0x04, 0x1F, 0x11 },  // "{"
  { 1, 0x1F, 0x00, 0x00 },  // "|"
  { 3, 0x11, 0x1F, 0x04 },  // "}"
  { 3, 0x04, 0x0C, 0x08 }   // "~"
};

const uint16_t delayTime = 100;
const uint8_t rows = 24;
const uint8_t columns = 5;
uint32_t time;
char message[] = "This is a test string ";   // add a blank at the end to separate  head and tail during scrolling
// ensure message is shorter than 256 bytes.
uint8_t size = 0;
uint8_t * bitMap ; // you can use this as an array once storage has been assigned to it.
volatile uint8_t displayBuffer[24] = { 0 };


ISR(TIMER2_COMPA_vect) {
  // multiplex display - called from timer

  static uint8_t colNr = 0 ;  // 0..23 static initialised once only

  digitalWrite( Latch, LOW ) ;

  // push out current column number
  uint32_t colOut = (uint32_t)1 <<  colNr ;

  FSOcol.write( (uint8_t) (colOut >> 16) ) ;
  FSOcol.write( (uint8_t) (colOut >> 8) ) ;
  FSOcol.write( (uint8_t)  colOut ) ;

  // push out row
  FSOrow.write( ~displayBuffer[ colNr ] ) ;  //inverted

  digitalWrite( Latch, HIGH ) ;

  if ( ++ colNr == 24 ) colNr = 0 ; // increment/wrap around
}


void setup() {
  Serial.begin(115200);

  pinMode(Data1, OUTPUT);
  pinMode(Clock1, OUTPUT);
  pinMode(Data2, OUTPUT);
  pinMode(Clock2, OUTPUT);
  pinMode(Latch, OUTPUT);


  for (uint8_t i = 0; i < strlen(message); i++) {  // Loop for every character in the message
    size += alphabet[message[i] - 32][0];
    if (i < strlen(message) - 1) {  // Check if it isn't the last character of the message
      size++;
    }
  }

  bitMap = new uint8_t[ size ] ;    // DECLARED GLOBAL storage assigned here.

  uint8_t x = 0;                                              // Keeps track of the X-Axis bit
  for (uint8_t i = 0; i < strlen(message); i++) {                 // Loop for every character in the message
    for (uint8_t j = 0; j < alphabet[message[i] - 32][0]; j++) {  // Loop for every byte in the character
      bitMap[x] = alphabet[message[i] - 32][j + 1];               // Store the byte in the bitMap[] array
      x++;
    }
    if (i < strlen(message) - 1) {  // Check if it isn't the last character of the message
      bitMap[x] = 0;            // Store a 0x00 byte in the bitMap[] array
      x++;
    }
  }

  // for testing dump bitMap[]
  for (uint8_t i = 0; i < size ; i++) { 
    Serial.print( i ) ;
    Serial.print( '\t' ) ;
    for (uint8_t j = 0; j < 8 ; j++) { 
      Serial.print( bitRead( bitMap[ i ] , 7 - j) == true ? "1" : "0") ;
    }
    Serial.println() ;
  }

  // set up timer 2 to call multiplex routine with a 250us period
  cli() ;
  TCNT2 = 0 ;
  TCCR2A = _BV( WGM21 ) ; //  CTC mode
  TCCR2B = _BV( CS22 )  ; //  PS /64,
  TIMSK2 = _BV( OCIE2A); // interrupt TIMER2_COMPA_vect
  OCR2A = 62 ;  // ~4kHz @ PS /64
  sei() ;
}

void loop() {
  static uint8_t index = 0; // static initialized once only.
  if (millis() - time >= delayTime) {
    time = millis();
    for ( uint8_t i = 0 ; i < 24 ; i ++ )  displayBuffer[ i ]  = bitMap[ ( index + i) % size ] ; // size is of bitMap
    if ( ++index >= size ) index = 0 ;
  }
}

Your code didn't work, which made me investigate. But after fixing the hardware and tweaking the code, your code did work. But before, those last 8 columns were only dim.

Also, I will try the new method, but with my own shifting method.

The reason is, that I want to make the whole code by myself (no libraries), but if I have to, I will use the fast method.

The advantage of using a timer to control the multiplexing is that it independent of the activities in the loop which may cause noticeable flicker on the display, especially if there is some blocking code like delay() etc. If you are simply scrolling a message it may not be such a big problem. If you turn your display into, for example, a clock which has to fetch a time stamp from an external source then it could be a problem. Once you get more proficient, you could anyway incorporate the main part of that library in your code. It really does not do that much, simply more or less replacing digitalWrite() with direct port manipulation.

When you talk about a timer, do you mean using millis() instead of delay()?