read/print only one byte from utf8 input?

i am running the "parola scrolling" code that printing an input from bt terminal to a max7219.
everything works great until i try to print Hebrew, it prints a 255 ascii and another 255 ascii character.
i understand that hebrew contain two byte, how can i take only the relevant byte (the one that changes) and print it.
i need to print only one byte, because i will change the represent byte to hebrew later.
HOW DO I DO THAT?

the code:

// Use the Parola library to scroll text on the display
//
// Demonstrates the use of the scrolling function to display text received
// from the serial interface
//
// User can enter text on the serial monitor and this will display as a
// scrolling message on the display.
// Speed for the display is controlled by a pot on SPEED_IN analog in.
// Scrolling direction is controlled by a switch on DIRECTION_SET digital in.
// Invert ON/OFF is set by a switch on INVERT_SET digital in.
//
// UISwitch library can be found at https://github.com/MajicDesigns/MD_UISwitch
// MD_MAX72XX library can be found at https://github.com/MajicDesigns/MD_MAX72XX
//

#include <MD_Parola.h>
#include <MD_MAX72xx.h>
#include <SPI.h>

// set to 1 if we are implementing the user interface pot, switch, etc
#define USE_UI_CONTROL 0

#if USE_UI_CONTROL
#include <MD_UISwitch.h>
#endif

// Turn on debug statements to the serial output
#define DEBUG 0

#if DEBUG
#define PRINT(s, x) { Serial.print(F(s)); Serial.print(x); }
#define PRINTS(x) Serial.print(F(x))
#define PRINTX(x) Serial.println(x, HEX)
#else
#define PRINT(s, x)
#define PRINTS(x)
#define PRINTX(x)
#endif

// Define the number of devices we have in the chain and the hardware interface
// NOTE: These pin numbers will probably not work with your hardware and may
// need to be adapted
#define HARDWARE_TYPE MD_MAX72XX::FC16_HW
#define MAX_DEVICES 8
#define CLK_PIN   13
#define DATA_PIN  11
#define CS_PIN    10

// HARDWARE SPI
MD_Parola P = MD_Parola(HARDWARE_TYPE, CS_PIN, MAX_DEVICES);
// SOFTWARE SPI
//MD_Parola P = MD_Parola(HARDWARE_TYPE, DATA_PIN, CLK_PIN, CS_PIN, MAX_DEVICES);

// Scrolling parameters
#if USE_UI_CONTROL
const uint8_t SPEED_IN = A5;
const uint8_t DIRECTION_SET = 8;  // change the effect
const uint8_t INVERT_SET = 9;     // change the invert

const uint8_t SPEED_DEADBAND = 5;
#endif // USE_UI_CONTROL

uint8_t scrollSpeed = 70;    // default frame delay value
textEffect_t scrollEffect = PA_SCROLL_RIGHT;
textPosition_t scrollAlign = PA_LEFT;
uint16_t scrollPause = 1; // in milliseconds

// Global message buffers shared by Serial and Scrolling functions
#define	BUF_SIZE	75
char curMessage[BUF_SIZE] = { "" };
char newMessage[BUF_SIZE] = { "Hello! Enter new message?" };
bool newMessageAvailable = true;

#if USE_UI_CONTROL

MD_UISwitch_Digital uiDirection(DIRECTION_SET);
MD_UISwitch_Digital uiInvert(INVERT_SET);

void doUI(void)
{
  // set the speed if it has changed
  {
    int16_t speed = map(analogRead(SPEED_IN), 0, 1023, 10, 150);

    if ((speed >= ((int16_t)P.getSpeed() + SPEED_DEADBAND)) ||
      (speed <= ((int16_t)P.getSpeed() - SPEED_DEADBAND)))
    {
      P.setSpeed(speed);
      scrollSpeed = speed;
      PRINT("\nChanged speed to ", P.getSpeed());
    }
  }

  if (uiDirection.read() == MD_UISwitch::KEY_PRESS) // SCROLL DIRECTION
  {
    PRINTS("\nChanging scroll direction");
    scrollEffect = (scrollEffect == PA_SCROLL_LEFT ? PA_SCROLL_RIGHT : PA_SCROLL_LEFT);
    P.setTextEffect(scrollEffect, scrollEffect);
    P.displayClear();
    P.displayReset();
  }

  if (uiInvert.read() == MD_UISwitch::KEY_PRESS)  // INVERT MODE
  {
    PRINTS("\nChanging invert mode");
    P.setInvert(!P.getInvert());
  }
}
#endif // USE_UI_CONTROL

void readSerial(void)
{
  static char *cp = newMessage;

  while (Serial.available())
  {
    *cp = (char)Serial.read();
    if ((*cp == '\n') || (cp - newMessage >= BUF_SIZE-2)) // end of message character or full buffer
    {
      *cp = '\0'; // end the string
      // restart the index for next filling spree and flag we have a message waiting
      cp = newMessage;
      newMessageAvailable = true;
    }
    else  // move char pointer to next position
      cp++;
  }
}

void setup()
{
  Serial.begin(9600);
  Serial.print("\n[Parola Scrolling Display]\nType a message for the scrolling display\nEnd message line with a newline");

#if USE_UI_CONTROL
  uiDirection.begin();
  uiInvert.begin();
  pinMode(SPEED_IN, INPUT);

  doUI();
#endif // USE_UI_CONTROL

  P.begin();
  P.displayText(curMessage, scrollAlign, scrollSpeed, scrollPause, scrollEffect, scrollEffect);
}

void loop()
{
#if USE_UI_CONTROL
  doUI();
#endif // USE_UI_CONTROL

  if (P.displayAnimate())
  {
    if (newMessageAvailable)
    {
      strcpy(curMessage, newMessage);
      newMessageAvailable = false;
    }
    P.displayReset();
  }
  readSerial();
}

Are you sure that it is UTF-8 ? Because UTF-8 is not always two bytes.
The Java unicode used two bytes for a long time, until that was not enough and they had to expand it without breaking the code that was already made.
Unicode is not UTF-8.

A two-byte UTF-8 character is 0b110....., 0b10......

The next example seems a bit clumsy, perhaps someone else knows a better way:

const byte utf8text[] = "Hello ° ½ ± © Ø";


void setup() 
{
  Serial.begin(9600);

  Serial.println( "----------------------------------------");

  Serial.println( (const char *)utf8text);

  int n = strlen( (const char *)utf8text);
  for( int i=0; i<n; i++)
  {
    Serial.print( utf8text[i], HEX);
  }
  Serial.println();
  
  for( int i=0; i<n;)
  {
    if( ((utf8text[i] & 0b11100000) == 0b11000000) && ((utf8text[i+1] & 0b11000000) == 0b10000000))
    {
      Serial.print( "UTF-8: ");
      Serial.print( utf8text[i], HEX);
      Serial.print( ",");
      Serial.print( utf8text[i+1], HEX);
      Serial.println();
      i += 2;
    }
    else
    {
      Serial.println( utf8text[i], HEX);
      i += 1;
    }
  }
}

void loop() 
{
}

This forum, your browser, the Arduino IDE and the Serial Monitor all support UTF-8. You should always see my example text correctly.

yes, i am sure hebrew presented by two byte
for example:
Alef - א - d7 90
Bet - ב - d7 91

like you can see, only the seconed byte was changed.
so when i'am trying to print it, it prints the ascii d7 and the ascii 90...

i am trying to understand what you did with that code, how to implement this code on mine?
remember that i am trying to present it on max7219, so the *cp must change

The two bytes of UTF-8 have both the highest bit set.
So when you are sure that only normal 7-bit ASCII text is used together with only two byte UTF-8 and every Hebrew character has a different second byte, then you can test for the first part of the UTF-8 and ignore that. Store the second part in the array just as a normal character. Later on you can translate or decode the second part.

void readSerial(void)
{
  static int index = 0;

  while (Serial.available())     // I prefer: if( Serial.available() > 0)
  {
    byte inChar = (byte) Serial.read();

    if( (inChar == '\n') || (inChar == '\r') || (index >= BUF_SIZE-2))   // end of message character or full buffer
    {
      newMessage[index] = '\0'; // end the string
      index = 0;                          // restart the index for next time
      newMessageAvailable = true;
    }
    else if( (inChar & 0b11100000) == 0b11000000)
    {
      // First part of a UTF-8 character
      // Ignore it. Do not store it, do not increment index.
    }
    else
    {
      // A normal ASCII character or the second part of the UTF-8 character has been received.
      newMessage[index] = (char) inChar;   // store it
      index++;        // increment index
    }
  }
}

I'm sure this will not work.

The first byte is just as important, the range D4 up to D7 for the first bytes is used for Hebrew.
Ԁ = 0xD4, 0x80
Հ = 0xD5, 0x80
ր = 0xD6, 0x80
D7 is also used.

What the code should be, depends on how you are going to translate it to your own defined 8-bit value, if that is what you are planning to do.

I would store the first part as well and set a boolean flag when the first part is received. The next time something is received, check the flag and if set, check if the second part of the UTF-8 character is received.
Then you can translate the UTF-8 bytes into something else.

i dont mind that the bytes are in use, because i am going to change them in the cpp font file, i will set the hebrew alphbet bit by bit.
if i can store the seconed byte, then i can modify it to any number (1-255), and than it will show only one ascii character that i can modify later
hope you understand me, as you can tell english is not my first language.
what is the code to store only the seconed byte and skip the first one without buffering problems?

If you have a text and use 0...127 for normal ASCII text, then you can use 128...255 for your own designed characters.
Then you have to translate the two bytes of UTF-8 to one of those 128...255 numbers.

You can not ignore the first byte and only use the second byte of the UTF-8 character code.

I'm looking at this page: Unicode/UTF-8-character table - starting from code position 0500.

Suppose the second byte is 0x85 and you have not stored the first byte. Then you don't know which one it is.
0xD4 0x85 = ԅ
0xD5 0x85 = Յ
0xD6 0x85 = օ
0xD7 0x85 = ׅ

Is your question how to convert the two bytes of UTF-8 into your own number of 128...255 ?

Yes, this is what I am trying to do.
From there it's easy for me to gain my goal.
I'm sure that there is few ways to print Hebrew, but I didn't find other solution to do that.
So, how to convert the two byte to 1-255 number?

With a lookup table.

Suppose the number 128...255 is not used in the lookup table, but only the UTF-8 characters.
Then compare the two bytes of the UTF-8 character with the table until it is found, and then you know the number.

That means searching the table for every UTF-8 character. That is okay, that is a normal way to do that.

// option 1
const byte lookupTable[][2] = 
{
  { 0xD4, 0x80 },       // Ԁ   index 0, number 128
  { 0xD5, 0xAD },     // խ   index 1, number 129
};


// option 2
const byte *pLookupTable[] =
{
  "Ԁ",   //  index 0, number 128
  "խ",  //  index 1, number 129
}

i found another solution!
i defined in the cpp file the first byte(found it equal to 115 , its the one that wont changes) to 0, and now it only print the seconed byte, i defined all the numbers above this byte to hebrew and it works!
but dont get too exited, it reversed.
because hebrew its RIGHT TO LEFT read, insted "שלום" i get "םולש".
now i need to revers it some how
sorry to bother you again

EDIT:
great success!
i found how to reverse and the code works great,thanks for your help.
i am gussing it will help also for arabic users that look for this kind of program.

this is the code to run hebrew with the parolla scrolling:

// Use the Parola library to scroll text on the display
//
// Demonstrates the use of the scrolling function to display text received
// from the serial interface
//
// User can enter text on the serial monitor and this will display as a
// scrolling message on the display.
// Speed for the display is controlled by a pot on SPEED_IN analog in.
// Scrolling direction is controlled by a switch on DIRECTION_SET digital in.
// Invert ON/OFF is set by a switch on INVERT_SET digital in.
//
// UISwitch library can be found at https://github.com/MajicDesigns/MD_UISwitch
// MD_MAX72XX library can be found at https://github.com/MajicDesigns/MD_MAX72XX
//

#include <MD_Parola.h>
#include <MD_MAX72xx.h>
#include <SPI.h>

// set to 1 if we are implementing the user interface pot, switch, etc
#define USE_UI_CONTROL 0

#if USE_UI_CONTROL
#include <MD_UISwitch.h>
#endif

// Turn on debug statements to the serial output
#define DEBUG 0

#if DEBUG
#define PRINT(s, x) { Serial.print(F(s)); Serial.print(x); }
#define PRINTS(x) Serial.print(F(x))
#define PRINTX(x) Serial.println(x, HEX)
#else
#define PRINT(s, x)
#define PRINTS(x)
#define PRINTX(x)
#endif

// Define the number of devices we have in the chain and the hardware interface
// NOTE: These pin numbers will probably not work with your hardware and may
// need to be adapted
#define HARDWARE_TYPE MD_MAX72XX::FC16_HW
#define MAX_DEVICES 8
#define CLK_PIN   13
#define DATA_PIN  11
#define CS_PIN    10

// HARDWARE SPI
MD_Parola P = MD_Parola(HARDWARE_TYPE, CS_PIN, MAX_DEVICES);
// SOFTWARE SPI
//MD_Parola P = MD_Parola(HARDWARE_TYPE, DATA_PIN, CLK_PIN, CS_PIN, MAX_DEVICES);

// Scrolling parameters
#if USE_UI_CONTROL
const uint8_t SPEED_IN = A5;
const uint8_t DIRECTION_SET = 8;  // change the effect
const uint8_t INVERT_SET = 9;     // change the invert

const uint8_t SPEED_DEADBAND = 5;
#endif // USE_UI_CONTROL

uint8_t scrollSpeed = 50;    // default frame delay value
textEffect_t scrollEffect = PA_SCROLL_RIGHT;
textPosition_t scrollAlign = PA_LEFT;
uint16_t scrollPause = 1; // in milliseconds

// Global message buffers shared by Serial and Scrolling functions
#define BUF_SIZE 75
char curMessage[BUF_SIZE] = { "" };
char newMessage[BUF_SIZE] = { "Hello! Enter new message?" };
bool newMessageAvailable = true;

#if USE_UI_CONTROL

MD_UISwitch_Digital uiDirection(DIRECTION_SET);
MD_UISwitch_Digital uiInvert(INVERT_SET);

void doUI(void)
{
  // set the speed if it has changed
  {
    int16_t speed = map(analogRead(SPEED_IN), 0, 1023, 10, 150);

    if ((speed >= ((int16_t)P.getSpeed() + SPEED_DEADBAND)) ||
      (speed <= ((int16_t)P.getSpeed() - SPEED_DEADBAND)))
    {
      P.setSpeed(speed);
      scrollSpeed = speed;
      PRINT("\nChanged speed to ", P.getSpeed());
    }
  }

  if (uiDirection.read() == MD_UISwitch::KEY_PRESS) // SCROLL DIRECTION
  {
    PRINTS("\nChanging scroll direction");
    scrollEffect = (scrollEffect == PA_SCROLL_LEFT ? PA_SCROLL_RIGHT : PA_SCROLL_LEFT);
    P.setTextEffect(scrollEffect, scrollEffect);
    P.displayClear();
    P.displayReset();
  }

  if (uiInvert.read() == MD_UISwitch::KEY_PRESS)  // INVERT MODE
  {
    PRINTS("\nChanging invert mode");
    P.setInvert(!P.getInvert());
  }
}
#endif // USE_UI_CONTROL

      char* reverse(char *str)
      {
        size_t len = strlen(str);
        size_t i=0;
        while (len > i)
        {char tmp = str[--len];
        str[len] = str[i];
        str[i++] = tmp;
        }
        return str;
      }
      
void readSerial(void)
{
  static char *cp = newMessage;

  while (Serial.available())
  {
    *cp = (char)Serial.read();
    if ((*cp == '\n') || (cp - newMessage >= BUF_SIZE-2)) // end of message character or full buffer
    {
      *cp = '\0'; // end the string
      // restart the index for next filling spree and flag we have a message waiting
      cp = newMessage;
      newMessageAvailable = true;

      reverse(cp);
    }
    else  // move char pointer to next position
      cp++;
  }
}

void setup()
{
  Serial.begin(9600);
  Serial.print("\n[Parola Scrolling Display]\nType a message for the scrolling display\nEnd message line with a newline");

#if USE_UI_CONTROL
  uiDirection.begin();
  uiInvert.begin();
  pinMode(SPEED_IN, INPUT);

  doUI();
#endif // USE_UI_CONTROL

  P.begin();
  P.displayText(curMessage, scrollAlign, scrollSpeed, scrollPause, scrollEffect, scrollEffect);
}

void loop()
{
#if USE_UI_CONTROL
  doUI();
#endif // USE_UI_CONTROL

  if (P.displayAnimate())
  {
    if (newMessageAvailable)
    {
      strcpy(curMessage, newMessage);
      newMessageAvailable = false;
    }
    P.displayReset();
  }
  readSerial();
}

in the cpp font file there is a need to change the number 115 to 0, and numbers 144 to 170 to this:

  7, 99, 119, 12, 24, 51, 127, 96, 	// 144א
  7, 65, 65, 65, 65, 127, 126, 64, 	// 145ב
  5, 65, 97, 63, 126, 96, 	// 146ג
  7, 1, 1, 1, 1, 127, 127, 1, 	// 147ד
  7, 121, 121, 1, 1, 1, 127, 126, 	// 148ה
  3, 1, 127, 127, 	// 149ו
  4, 65, 103, 63, 25, 	// 150ז
  7, 125, 127, 3, 1, 1, 127, 126, 	// 151ח
  7, 63, 127, 64, 70, 65, 127, 62, 	// 152ט
  4, 1, 1, 15, 14, 	// 153י
  7, 1, 1, 1, 1, 249, 255, 134, 	// 154ך
  7, 65, 65, 65, 65, 65, 127, 62, 	// 155כ
  5, 7, 103, 100, 52, 28, 	// 156ל
  7, 125, 127, 67, 65, 65, 127, 126, 	// 157ם
  7, 125, 127, 6, 67, 65, 127, 126, 	// 158מ
  4, 1, 249, 255, 134, 	// 159ן
  6, 96, 96, 99, 99, 127, 126, 	// 160נ
  7, 1, 63, 65, 65, 65, 35, 30, 	// 161ס
  7, 65, 95, 127, 64, 97, 63, 31, 	// 162ע
  7, 29, 31, 17, 1, 255, 254, 128, 	// 163ף
  7, 93, 95, 81, 65, 65, 127, 126, 	// 164פ
  7, 1, 255, 255, 144, 25, 15, 7, 	// 165ץ
  7, 65, 71, 79, 88, 113, 111, 71, 	// 166צ
  6, 253, 253, 1, 49, 63, 14, 	// 167ק
  7, 1, 1, 1, 1, 1, 127, 126, 	// 168ר
  7, 63, 127, 80, 95, 64, 127, 63, 	// 169ש
  7, 97, 127, 127, 3, 3, 127, 126, 	// 170ת

I'm glad that you have success :smiley:
It is hard for me to understand, I never did something with right-to-left. I suppose a dot or a question mark at the end of a sentence is not part of Hebrew.

Fun fact: Do you know what this is: 0xF0, 0x9F, 0x87, 0xAE, 0xF0, 0x9F, 0x87, 0xB1
That are two UTF-8 characters, each 4 bytes long. That is a total of 8 bytes for a single character.
That single character is a emoticon of a flag: 🇮🇱 Flag for Israel Emoji.

Lol, you can see from my comments that I did try to use some of this marks.
But you right, my code need improvement of a few "if", I won't use any of this marks on this project anyway.
In addition the code can't Distinguish between English and Hebrew, so it reverse the English as well