easy to understand examples of SafeString-function stoken

Hi everybody,
I have started to write demo-codes for the SafeString-library. My main goal is to write easy to understand demo-codes through beeing stripped down to a short example and a version with quite a lot comments and additional serial output to explain what is going on.

So here are two demo-codes.
This one is tripped down to the bare minimum to show how to use the function .stoken

#include "SafeString.h"
createSafeString(MyLongStr,    64); 
createSafeString(MySmallerStr, 16); 
int  MyTokenNr = 0;
char MyDelimiter;

void setup() {
  Serial.begin(115200);
  Serial.println("Setup-Start");
  MyDelimiter = ','; 
   
  MyLongStr = "Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  "; 
  Serial.println(MyLongStr);
  Serial.println("Start extracting...");
  while (MyTokenNr >= 0) { 
    MyTokenNr = MyLongStr.stoken(MySmallerStr, MyTokenNr, MyDelimiter);
    Serial.print("extracted a token ");
    Serial.println(MySmallerStr);
  }  
  Serial.println("finished extracting");

  MyDelimiter = '@'; 
  Serial.println(MyLongStr);
  Serial.println("Start extracting...");
  while (MyTokenNr >= 0) { 
    MyTokenNr = MyLongStr.stoken(MySmallerStr, MyTokenNr, MyDelimiter);
    Serial.print("extracted a token ");
    Serial.println(MySmallerStr);
  }  
  Serial.println("finished extracting");
}

void loop() {
}

serial output should look like this

Setup-Start
Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  
Start extracting...
extracted a token Part 1  
extracted a token This is Part 2
extracted a token that's Part 3 
extracted a token    and Part 4  
finished extracting
Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  
Start extracting...
finished extracting

and here is a richly commented version that explains a lot of details and has some additional functions

#include "SafeString.h"

// defining variables of type SafeString is a little bit different from defining
// standard variable definitions. It is similar to a function-call
// the basic structure is 
// createSafeString( <Name-of-Variable>, <Number-of-max-characters>);

createSafeString(MyLongStr,    64); 
createSafeString(MySmallerStr, 16); 
int  MyTokenNr = 0;
char MyDelimiter;

// parameters of variable-type SafeString 
// need a trailing        '&'
//                         | 
void PrintlnDblX(SafeString& p_ChToPrint) {
//code should always be structured into functions where each function
// does ONE thing
// this functions prints a leading '#' and a trailing '#' 
// for clearly indicating what characters are inside a string
  Serial.print("#");
  Serial.print(p_ChToPrint);
  Serial.println("#");
}

void setup() {
  Serial.begin(115200);
  Serial.println("Setup-Start");
}

void loop() {
  TokenizeAtCommas();

  TryTo_TokenizeAtCommas();

  while(true); // empty loop. Effect: stops loop from looping 
}

void TokenizeAtCommas() {
  MyDelimiter = ','; 
  // the String below contains four "tokens"
  // token just means a sequence of characters until a delimiting character
  // delimiting character in this example is comma ','
  // so the string below has the tokens
  // "Part 1  "
  // "This is Part 2"
  // "that's Part 3 "
  // "  and Part 4  " 
  // pay attention to the leading and the trailing spaces   
  MyLongStr = "Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  "; 
  Serial.print("Tokenizing string at delimiting character  ");
  Serial.println(MyDelimiter);
  
  Serial.println();
  Serial.println("String to tokenize is");
  PrintlnDblX(MyLongStr);    

  // let's "tokenize" the string
  // as long as there are tokens found the "stoken"-function returns 
  // a value > 0. If no more tokens were found it returns -1
  Serial.println("start extracting...");
  while (MyTokenNr >= 0) { 
    MyTokenNr = MyLongStr.stoken(MySmallerStr, MyTokenNr, MyDelimiter);
    Serial.print("extracted a token ");
    PrintlnDblX(MySmallerStr);
  }
  Serial.println("extracting finished");
  Serial.println();   Serial.println();  Serial.println();
}

void TryTo_TokenizeAtCommas() {
  // delimiter ist set to '@'
  // string still has commas ',' => extracting tokens will not work
  // due to the wrong delimiter-character
  MyDelimiter = '@'; 
   
  MyLongStr = "Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  "; 
  Serial.print("Tokenizing string at delimiting character '");
  Serial.print(MyDelimiter);
  Serial.println("'");
  Serial.println();
  Serial.println("String to tokenize is");
  PrintlnDblX(MyLongStr);    

  // let's "tokenize" the string
  // as long as there are tokens found the "stoken"-function returns 
  // a value > 0. If no more tokens were found it returns -1
  Serial.println("start extracting...");
  while (MyTokenNr >= 0) { 
    MyTokenNr = MyLongStr.stoken(MySmallerStr, MyTokenNr, MyDelimiter);
    Serial.print("extracted a token ");
    PrintlnDblX(MySmallerStr);
  }
  Serial.println("extracting finished");
  Serial.println();   Serial.println();  Serial.println();
}

serial output should look like this

Setup-Start
Tokenizing string at delimiting character  ,

String to tokenize is
#Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  #
start extracting...
extracted a token #Part 1  #
extracted a token #This is Part 2#
extracted a token #that's Part 3 #
extracted a token #   and Part 4  #
extracting finished



Tokenizing string at delimiting character '@'

String to tokenize is
#Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  #
start extracting...
extracting finished

As I'm learning how to use the SafeString-library myself from time to time I will add more democodes that explain other functions of SafeString.
best regards Stefan

I have started to write demo-codes for the SafeString-library

You are brave to venture into the String/string debate

Personally I prefer to use C style strings, but everyone to their own

An example of extracting parts of a string using C style strings would be

char * MyLongStr = "Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  ";
char * strPart = NULL;
char * delimiter = ",";

void setup()
{
  Serial.begin(115200);
  strPart = strtok(MyLongStr, delimiter);
  while (strPart != NULL)
  {
    Serial.println(strPart);
    strPart = strtok(NULL, delimiter);
  }
}

void loop()
{
}

Thanks for that Stefan. I am building a page on SoftwareSolutions for Arduino that covers both Arduino Strings and SafeString demos. I have not get as far as parsing yet...
Each 'solution' has its pros: and cons:

No solutions using c-strings methods, like #2, though as they have just too many cons: and the resulting code is too fragile. It is incredibly easy to write a very simple example that appears to run correctly on UNO but in fact has a buffer overflow which blows up as soon as you run the same code on an ESP32
So you really don't want to be using c-string methods if you want a reliable program.

Each 'solution' has its pros: and cons:

I agree

you really don't want to be using c-string methods if you want a reliable program.

What you do want to be using is a method that you understand and suits the needs of your project

In order to do a comparison between the 2 methods I loaded the first sketch from the original post and ran it, only to find that it go into an infinite loop reporting that it has

extracted a token

What was it I said about understanding the method that you used ?

Are there any direct comparisions of the amount of memory useage that Safestring uses versus c string ?

The example in the first post it uses, on a Pro Mini;

Sketch uses 9812 bytes (30%) of program storage space.
Global variables use 582 bytes (28%) of dynamic memory.

How much space would a similar program take using a c string approach ?

Taking a stripped version of the first example (left out the second part) in the OP and a modified version of reply #1 to create similar output, SafeString seems expensive. Obviously some things come at a price. Compiled for a Nano using IDE 1.8.5 with boards package 1.8.3

#include "SafeString.h"
createSafeString(MyLongStr,    64);
createSafeString(MySmallerStr, 16);
int  MyTokenNr = 0;
char MyDelimiter;

void setup() {
  Serial.begin(115200);
  Serial.println("Setup-Start");
  MyDelimiter = ',';

  MyLongStr = "Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  ";
  Serial.println(MyLongStr);
  Serial.println("Start extracting...");
  while (MyTokenNr >= 0) {
    MyTokenNr = MyLongStr.stoken(MySmallerStr, MyTokenNr, MyDelimiter);
    Serial.print("extracted a token ");
    Serial.println(MySmallerStr);
  }
  Serial.println("finished extracting");
}

void loop()
{
}

Sketch uses 4044 bytes (13%) of program storage space. Maximum is 30720 bytes.
Global variables use 593 bytes (28%) of dynamic memory, leaving 1455 bytes for local variables. Maximum is 2048 bytes.

char * MyLongStr = "Part 1  ,This is Part 2,that's Part 3 ,   and Part 4  ";
char * strPart = NULL;
char * delimiter = ",";

void setup()
{
  Serial.begin(115200);
  Serial.println("Setup-Start");

  Serial.println(MyLongStr);
  Serial.println("Start extracting...");

  strPart = strtok(MyLongStr, delimiter);
  while (strPart != NULL)
  {
    Serial.print("extracted a token ");
    Serial.println(strPart);

    strPart = strtok(NULL, delimiter);
  }
  Serial.println("finished extracting");
}

void loop()
{
}

Sketch uses 1814 bytes (5%) of program storage space. Maximum is 30720 bytes.
Global variables use 320 bytes (15%) of dynamic memory, leaving 1728 bytes for local variables. Maximum is 2048 bytes.

In defence of SafeString, stoken() can return empty fields in a comma delimited string, strtok() can't. You will have to write a strtok() that will return empty fields (not complicated).

@StefanL18:
I think that that feature is worth adding to a next iteration of an example.

MyTokenNr = MyLongStr.stoken(MySmallerStr, MyTokenNr, MyDelimiter, true);

I suspect many - including myself would be happy to see a fully implemented String library on any processor for ease and convenience.

The issue I see is simply that small micro controllers just weren’t designed to be ‘computers’.

Those that put chunky libraries ‘down’, are (like myself), old time real-time developers that expect to do a lot with little, as tightly as possible.
The requirement is in the result, Wasting any resources (program memory, RAM, performance) is looked upon badly.

The growth of bigger, faster processors that can do the same job in less time is great, but there will always be the argument between making it ‘easier’, or getting the best from your hardware. The goal is - more work in less time and memory - simplicity.

There was a term often tossed around in the 70s and 80s of ’elegance’ in code.
That usually referred to getting the most out of a processor with the tightest, most readable code running in the smallest space. This when RAM was often measured in Bytes!

Some of that argument disappears with the growth in processor power, but the real-time, and efficiency aspects haven’t gone away.

Less baggage = less to carry.

Taking a stripped version of the first example

The stripped down version gets into the same endless loop as I pointed out before and never exits the while loop. It appears that MyTokenNr is never update so never reaches zero

@UKHeliBob

I did observe that behaviour earlier on, but did not check for the compile results above. Checked it now and you're right.

Had to upgrade from library version 3.0.4 to 4.0.2 and it now gives the expected output (compared to OP); compile results changed

Sketch uses 4468 bytes (14%) of program storage space. Maximum is 30720 bytes.
Global variables use 557 bytes (27%) of dynamic memory, leaving 1491 bytes for local variables. Maximum is 2048 bytes.

Thanks - I have upgraded and it is working now

I have been using several different Arduino cores for some years, Minicore, MegaCore and MightCore, they all neeed to be updated to compile with Safestrings.

The exception, so far appears to be the DUE, that will not compile Safestrings code.

Just curious...
Are those mods a weakness in the cores, or to accommodate a specific oddity needed by the library ?

The exception, so far appears to be the DUE, that will not compile Safestrings code.

V4.0.3 of SafeStrings fixes DUE (board core version 1.6.12) available now from
https://www.forward.com.au/pfod/ArduinoProgramming/SafeString/SafeString.zip
(and later from Arduino Library Manager)

It appears that DUE thinks C++ false in not a bool but and int == 1

Welcome to the bane of writing Arduino libraries
"Write once and test on every board"

stoken() can return empty fields in a comma delimited string, strtok() can't.

Another feature of stoken() is that you can delimit on the complement of the specified delimiters by passing false for the final (optional) argument.
so

    nextIdx = sfLine.stoken(field, nextIdx, delimiters, false, false); 
// false, false, => skip multiple delimiters, delimit on complement of delimiters

for example

#include "SafeString.h"
void setup() {
  Serial.begin(9600);
  for (int i = 10; i > 0; i--) {
    Serial.print(' '); Serial.print(i);
    delay(500);
  }
  Serial.println();
  SafeString::setOutput(Serial); // enable full debugging error msgs

  char line[] = " ,23.5,some text,335, some more text, ";
  cSFP(sfLine, line); // wrap the line in a SafeString for processing 
  Serial.print(F("Input line is '")); Serial.print(sfLine); Serial.println('\'');
  createSafeString(field, 10); // for the field strings. Should have capacity > largest field length
  char delimiters[] = "01234567890."; // numbers + decimal point
  Serial.print(F("Delimiters '"));Serial.print(delimiters);Serial.println("'");
  Serial.println(F("Using  nextIdx = sfLine.stoken(field, nextIdx, delimiters, false,false);"));
  Serial.println(F("    // false, false, => skip multiple delimiters, delimit on complement of delimiters"));
  Serial.println(F(" to extract number fields :-"));
  int nextIdx = 0; // start from beginning of sfLine
  while (nextIdx >= 0) {
    nextIdx = sfLine.stoken(field, nextIdx, delimiters, false, false); // false, false, => skip multiple delimiters, delimit on complement of delimiters
    if (field.isEmpty()) {
      break; // no more digits found
    }
    Serial.print(F("  Digit Field ")); Serial.println(field);
  }
}
void loop() {
}

returns

Input line is ' ,23.5,some text,335, some more text, '
Delimiters '01234567890.'
Using  nextIdx = sfLine.stoken(field, nextIdx, delimiters, false,false);
    // false, false, => skip multiple delimiters, delimit on complement of delimiters
 to extract number fields :-
  Digit Field 23.5
  Digit Field 335

This topic was automatically closed 120 days after the last reply. New replies are no longer allowed.