Extracting characters from a string

Hi there

I am reading data from a device that returns strings of characters, both letters and numbers.

Here is and example of a string:

x: 5674 y: 846 z: 23 qf: 89

I want to be able to extract each of the numbers from the string and turn it into its own variable so I can then create my own comma separated string. Effectively I want to remove the letters and colons and separate the numbers with commas.

Any suggestions on the best way of going about this? I have tried reading some other forum posts but got a bit lost, i understand that I may need to use commands such as indexof?

Any help would be much appreciated, cheers.

If you use the String object then here is a reference.
Warning: On the UNO and similar boards this object can cause memory fragmentation:
String

Use
http://www.cplusplus.com/reference/cstring/strtok/

Try this reference.

Some time ago I wrote a simple parser to extract an identifier and a value that has the structure of
ini-file-parameters
simple example MyId=234

This code uses the PString-library which can be downloaded via library-manager of the Arduino-IDE
variable-type PString does NOT cause memory problems.
You have to define a maximum length how many characters should be storable inside the variable
PString brings back a part of the comfort that String has.
The democode shows some possabilities how to work with PStrings but not all

#include <PString.h>
char    XML_Str_AoC[256] = " "; 
PString XML_Str_PS(XML_Str_AoC, sizeof(XML_Str_AoC));

char    XML_SubStr_AoC[256] = " ";
PString XML_SubStr_PS(XML_SubStr_AoC, sizeof(XML_SubStr_AoC));

char    Value_AoC[32] = " ";
PString Value_PS(Value_AoC, sizeof(Value_AoC));

char    IDStr_AoC[32] = "=";
PString IDStr_PS(IDStr_AoC, sizeof(IDStr_AoC));

char    Separator_AoC[32] = "=";
PString Separator_PS(Separator_AoC, sizeof(Separator_AoC));

unsigned long IDPos;
unsigned long ValuePos;
unsigned long EndOfValuePos;
unsigned long StrLen;

unsigned long BeginOfSubStr (char* p_PointerToSource, char* p_PointerToSubStr) {
  unsigned long result = strstr(p_PointerToSource,p_PointerToSubStr) - p_PointerToSource;
  return result;
}

unsigned long EndOfSubStr (char* p_PointerToSource, char* p_PointerToSubStr) {
  unsigned long result = strstr(p_PointerToSource,p_PointerToSubStr) - p_PointerToSource + strlen(p_PointerToSubStr);
  return result;
}

// my personal naming-convention parameter of functions start with prefix "p_"
void ExtractUntilSeparator(char* p_PointerToTarget, char* p_PointerToSeparator, char* p_PointerToSource)
{
  Serial.println("entering ExtractUntilSeparator");
  unsigned int LengthUntilDelimiter = strstr(p_PointerToSource,p_PointerToSeparator) - p_PointerToSource + 1;

  // attention use the command strlcpy with care. strlcpy perfoms a direct write into RAM at the given adress
  // the compiler has no change to check if your adress makes sense
  // the writing is done mercyless to the given adress. This means wrong pointer-adress leads to unpredictable bugs
  strlcpy(p_PointerToTarget,p_PointerToSource,LengthUntilDelimiter);

  Serial.print("p_PointerToSource:>#");
  Serial.print(p_PointerToSource);
  Serial.println("#");

  Serial.print("p_PointerToTarget:>#");
  Serial.print(p_PointerToTarget);
  Serial.println("#");
  
  Serial.print("p_PointerToSeparator:>#");
  Serial.print(p_PointerToSeparator);
  Serial.println("#");
  
  Serial.println("leaving ExtractUntilSeparator");
}

// my personal naming-convention parameter of functions start with "p_"
void ExtractValueBehindSeparator(char* p_PointerToTarget, char* p_PointerToSeparator, char* p_PointerToSource)
{
  Serial.println("entering ExtractValueBehindSeparator");
  unsigned int PosOfSeparatorEnd = strstr(p_PointerToSource,p_PointerToSeparator) - p_PointerToSource + strlen(p_PointerToSeparator);
  // if separatorstring was  found   
  if (PosOfSeparatorEnd < strlen(p_PointerToSource) )
  {
    Serial.print("PosOfSeparatorEnd:>#");
    Serial.print(PosOfSeparatorEnd);
    Serial.println("#");
    unsigned int NoOfBytesUntilEoString = strlen (p_PointerToSource) - PosOfSeparatorEnd + 1; 
    
  // attention use the command strlcpy with care. strlcpy perfoms a direct write into RAM at the given adress
  // the compiler has no change to check if your adress makes sense
  // the writing is done mercyless to the given adress. This means wrong pointer-adress leads to unpredictable bugs
    strlcpy(p_PointerToTarget, p_PointerToSource + PosOfSeparatorEnd, NoOfBytesUntilEoString);
  }
  else 
  { 
    p_PointerToTarget = ""; // if no separator was found there is nothing behind the separator
  }

  Serial.print("p_PointerToSource:>#");
  Serial.print(p_PointerToSource);
  Serial.println("#");

  Serial.print("p_PointerToTarget:>#");
  Serial.print(p_PointerToTarget);
  Serial.println("#");
  
  Serial.print("p_PointerToSeparator:>#");
  Serial.print(p_PointerToSeparator);
  Serial.println("#");
  
  Serial.println("leaving ExtractValueBehindSeparator");
}

void setup() 
{
  Serial.begin(115200);
  Serial.println();
  Serial.println("setup-Start");

  XML_Str_PS += "HTTP/1.0 200 OK ";
  XML_Str_PS += "Content-Type: text/xml ";
  XML_Str_PS += "Access-Control-Allow-Origin: * ";
/*
  XML_Str_PS += "X-Frame-Options: SAMEORIGIN ";
  XML_Str_PS += "X-Content-Type-Options: nosniff ";
  XML_Str_PS += "X-XSS-Protection: 1; mode=block ";
  XML_Str_PS += "X-Robots-Tag: none ";
  XML_Str_PS += "X-Download-Options: noopen ";
  XML_Str_PS += "X-Permitted-Cross-Domain-Policies: none ";
  XML_Str_PS += "Referrer-Policy: no-referrer ";
  XML_Str_PS += "Content-Length: 300 ";
  XML_Str_PS += "Connection: close ";
  XML_Str_PS += "Date: Fri, 19 Jun 2020 20:20:38 GMT ";
  */
  XML_Str_PS += "<?xml version='1.0' encoding='ISO-8859-1' ?><systemVariables><systemVariable name='Grube Stand' variable='3.000000' value='3.000000' value_list='' value_text='' ise_id='2632' min='0' max='65000' unit='cm' type='4' subtype='0' timestamp='1592598038' value_name_0='' value_name_1=''/></systemVariables>"; 

  Serial.print("XML_Str_PS#");
  Serial.print(XML_Str_PS);
  Serial.print("#");
  Serial.println();

  IDStr_PS = "'Grube Stand'";

  Serial.print("1: IDStr_PS#");
  Serial.print(IDStr_PS);
  Serial.print("#");
  Serial.println();

  IDPos = BeginOfSubStr(XML_Str_PS,IDStr_PS);
  Serial.print("IDPos:");
  Serial.print(IDPos);
  Serial.print("#");
  Serial.println();
  
  IDStr_PS = "value_list=";

  Serial.print("2:IDStr_PS#");
  Serial.print(IDStr_PS);
  Serial.print("#");
  Serial.println();
  
  ValuePos = BeginOfSubStr(XML_Str_AoC,IDStr_AoC);

  Serial.print("XML_Str_PS#");
  Serial.print(XML_Str_PS);
  Serial.print("#");
  Serial.println();

  Serial.print("ValuePos:");
  Serial.print(ValuePos);
  Serial.print("#");
  Serial.println();

  StrLen = ValuePos - IDPos; 
  Serial.print("StrLen:");
  Serial.print(StrLen);
  Serial.print("#");
  Serial.println();

//Grube Stand' variable='3.000000' value='3.000000'   
  // attention use the command strncpy with care. strncpy perfoms a direct write into RAM at the given adress
  // the compiler has no change to check if your adress makes sense
  // the writing is done mercyless to the given adress. This means wrong pointer-adress leads to unpredictable bugs
  strncpy(XML_SubStr_PS,  XML_Str_PS + IDPos, StrLen);

  Serial.print("SubStr#");
  Serial.print(XML_SubStr_PS);
  Serial.print("#");
  Serial.println();

  XML_Str_PS = XML_SubStr_PS;
  Separator_PS = "value='";

  IDPos = EndOfSubStr(XML_SubStr_PS,Separator_PS);

  Serial.print("3: IDPos:");
  Serial.print(IDPos);
  Serial.print("#");
  Serial.println();

  StrLen = strlen(XML_Str_PS) - IDPos + 1;

  Serial.print("StrLen:");
  Serial.print(StrLen);
  Serial.print("#");
  Serial.println();

  strncpy(XML_SubStr_PS,  XML_Str_PS + IDPos, StrLen);

  Serial.print("SubStr#");
  Serial.print(XML_SubStr_PS);
  Serial.print("#");
  Serial.println();

  XML_Str_PS = XML_SubStr_PS;
  Separator_PS = "'";

  ExtractUntilSeparator(XML_SubStr_PS, Separator_PS, XML_Str_PS);
  Serial.print("value SubStr#");
  Serial.print(XML_SubStr_PS);
  Serial.print("#");
  Serial.println();
}

void loop() {
}

there is a lot of debug-output in the code to make visible what is going on.

best regards Stefan

Maybe reading serial input basics will give you some ideas.

using strtok() to split string into tokens then sscanf() to look for an integer number

int main ()
{
  char str[] ="x: 5674 y: 846 z: 23 qf: 89";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str,": ");
  while (pch != NULL)
  {
    int x=999;
    printf ("string found %s\n",pch);
    if(sscanf(pch,"%d",&x) == 1)   printf("integer = %d\n",  x);
    else                             printf("not integer\n");
    pch = strtok (NULL, ": ");
  }
  return 0;
}

gives

Splitting string "x: 5674 y: 846 z: 23 qf: 89" into tokens:
string found x
not integer
string found 5674
integer = 5674
string found y
not integer
string found 846
integer = 846
string found z
not integer
string found 23
integer = 23
string found qf
not integer
string found 89
integer = 89

@horace: very good solution. I didn't read the TO text carefully enough. You solution matches the needs of the TO

best regards Stefan

The extraction of the numeric fields and building the comma separated list can easily be done using Nick Gammon's regular expressions library port. Example :-

#include <Regexp.h>

/*
 * Used to build the comma separated list of values
 * found by the regex.
 */
char output[128];
uint16_t outputLength = 0;

// called for each match
void match_callback  (const char * match,          // matching string (not null-terminated)
                      const unsigned int length,   // length of matching string
                      const MatchState & ms)       // MatchState in use (to get captures)
{
  // Build the comma separated list
  if (outputLength != 0)
    output[outputLength++] = ',';
  for (int i = 0; i < length; i++)
    output[outputLength++] = match[i];
  output[outputLength] = '\0';
}  // end of match_callback


void setup ()
{
  Serial.begin (115200);
  Serial.println ();

  // what we are searching (the target)
  const char buf[] = "x: 5674 y: 846 z: 23 qf: 89";

  // original buffer
  Serial.println (buf);

  // match state object
  MatchState ms (buf);
  ms.GlobalMatch ("%d+", match_callback);

  Serial.println(output);
  

}  // end of setup

void loop () {}

Many people make the sign of the cross when regular expressions are proposed. I expect this to happen here.

The only thing against regexp in Arduinos might be memory usage and machine cycles when one compares it with solutions based on other approaches.

Note that I don't have anything against regexp. I used to write Windows applications where the configuration to extract information out of textfiles was purely regexp based so if input files changed, it could easily be adjusted to new needs. Neither me nor the users have ever regretted that.

But writing a proper regexp is also an art; I've seen one to validate dates including leapdays; from memory, it was about 400 characters long. Good luck with that in an Arduino that also has to do other things :wink:

Thank you for all the replies :).
Here is my solution. It's pretty simple and will probably be prone to errors, especially when reading actual strings that may contain unexpected characters. However, it works for this string and continues to work for any length and combination of numbers as long as the string starts with x:.

String stringOne, posString;
byte charPos = 2;
char pos;

void setup() {
  // put your setup code here, to run once:
  Serial.begin(115200);
}

void loop() {
  // put your main code here, to run repeatedly:

  charPos = 2;
  stringOne = "x:100 y:120 z:2500 qf:100";
  posString = "";
  if (stringOne.startsWith("x:")) {
    Serial.println("This is the correct string");

    while (stringOne.charAt(charPos) != 'y') {
      pos = (stringOne.charAt(charPos));
      posString = posString + pos;
      Serial.println(posString);
      charPos ++;
    }
    if (stringOne.charAt(charPos) == 'y') {
      posString = posString + ", ";
      charPos ++; charPos ++;

    } else {
      Serial.println("There has been an error...");
    }

    while (stringOne.charAt(charPos) != 'z') {
      pos = (stringOne.charAt(charPos));
      posString = posString + pos;
      Serial.println(posString);
      charPos ++;
    }
    if (stringOne.charAt(charPos) == 'z') {
      posString = posString + ", ";
      charPos ++; charPos ++;

    } else {
      Serial.println("There has been an error...");
    }

    while (stringOne.charAt(charPos) != 'q') {
      pos = (stringOne.charAt(charPos));
      posString = posString + pos;
      Serial.println(posString);
      charPos ++;
    }
    if (stringOne.charAt(charPos) == 'q') {
      Serial.print("Here is your final string: "); Serial.println(posString);

    } else {
      Serial.println("There has been an error...");
    }
  }
  delay(1000);
}

Here is the output:

This is the correct string
1
10
100
100 
100 , 1
100 , 12
100 , 120
100 , 120 
100 , 120 , 2
100 , 120 , 25
100 , 120 , 250
100 , 120 , 2500
100 , 120 , 2500 
Here is your final string: 100 , 120 , 2500

If anyone has any ideas to make it better or shorter or more reliable they will be much appreciated.
Cheers

your result appears to be missing the qf:100 field

your original post stated the requirement was extract each of the numbers from the string and turn it into its own variable - your posted code does not appear the assign values to variaables it just creates a comma seperated string

if a comma seperated list is all you require use string tokeniser to extract numeric data, e.g.

// parse string extracting numeric values to comma seperated list

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void setup()
{
  Serial.begin(115200);
  char str[] =  "x: 5674 y: 846 z: 23 qf: 89";
  char * pch;
  int firstToken=0;
  Serial.print ("\nextracting numeric values to comma seperated list: ");
  Serial.println(str);
  pch = strtok (str,": ");
  while (pch != NULL)
  {
    //Serial.print ("token found ");
    //Serial.println(pch);
    // check if last token starts with a digit if so print it
    if(isdigit(pch[0])) {
      // print comma seperate value - no comma on first token
      if(firstToken++) Serial.print(", "); Serial.print(pch);
    }
    pch = strtok (NULL, ": ");
  }
  
}

void loop() {}

gives

extracting numeric values to comma seperated list: x: 5674 y: 846 z: 23 qf: 89
5674, 846, 23, 89

If the input is from a serial port you can use features of Serial to do it:

// Example input: x: 5674 y: 846 z: 23 qf: 89
// Example output: 5674,846,23,89

void setup()
{
  Serial.begin(115200);
  while (!Serial);
}


int x, y, z, qf;


void loop()
{
  if (Serial.available())
  {
    char c = Serial.read();
    switch (c)
    {
      case 'x': x = Serial.parseInt(); break;
      case 'y': y = Serial.parseInt(); break;
      case 'z': z = Serial.parseInt(); break;
      case 'q': qf = Serial.parseInt();
        Serial.print(x);
        Serial.print(',');
        Serial.print(y);
        Serial.print(',');
        Serial.print(z);
        Serial.print(',');
        Serial.println(qf);
        break;
    }
  }
}

Note: Works with positive and negative values. Use .parseFloat() for floating-point values.

horace:
your result appears to be missing the qf:100 field

your original post stated the requirement was extract each of the numbers from the string and turn it into its own variable - your posted code does not appear the assign values to variaables it just creates a comma seperated string

if a comma seperated list is all you require use string tokeniser to extract numeric data, e.g.

// parse string extracting numeric values to comma seperated list

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void setup()
{
 Serial.begin(115200);
 char str[] =  "x: 5674 y: 846 z: 23 qf: 89";
 char * pch;
 int firstToken=0;
 Serial.print ("\nextracting numeric values to comma seperated list: ");
 Serial.println(str);
 pch = strtok (str,": ");
 while (pch != NULL)
 {
   //Serial.print ("token found ");
   //Serial.println(pch);
   // check if last token starts with a digit if so print it
   if(isdigit(pch[0])) {
     // print comma seperate value - no comma on first token
     if(firstToken++) Serial.print(", "); Serial.print(pch);
   }
   pch = strtok (NULL, ": ");
 }
 
}

void loop() {}




gives


extracting numeric values to comma seperated list: x: 5674 y: 846 z: 23 qf: 89
5674, 846, 23, 89

Thanks for your reply. The numbers following qf are irrelevant for me so it doesn't matter whether I have them or not. This method seems much more reliable and outputs the correct numbers. I probably should have specified in my first post that in the end I want a comma separated string that I can then put onto an SD card in a csv file. I am still trying to muddle my way through this code trying to understand it but how could I modify it to create a new string?
Thanks so much

If you want to store three variables in a csv file, there is no need to combine them first; below to give you the idea

myFile.print(number1);
myFile.print(",");
myFile.print(number2);
myFile.print(",");
myFile.println(number3);

macg_jones:
I am still trying to muddle my way through this code trying to understand it but how could I modify it to create a new string?

as you tokenise the input data concatenate the required tokens onto the result

// parse string extracting numeric values to comma seperated list

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void setup()
{
  Serial.begin(115200);
  char str[] =  "x: 5674 y: 846 z: 23 qf: 89";
  char result[100]={0};     // must be of sufficient size to take output string
  char * pch;
  int firstToken=0;
  Serial.print ("\nextracting numeric values to comma seperated list: ");
  Serial.println(str);
  pch = strtok (str,": ");
  result[0]=0;     // initialise result
  while (pch != NULL)
  {
    //Serial.print ("token found ");
    //Serial.println(pch);
    // check if last token starts with a digit if so print it
    if(isdigit(pch[0])) {
      // print comma seperate value - no comma on first token
      if(firstToken++) strcat(result,","); strcat(result, pch);
    }
    pch = strtok (NULL, ": ");
  }
  Serial.print("result = "); Serial.print(result);
 
}

void loop() {}

result is

extracting numeric values to comma seperated list: x: 5674 y: 846 z: 23 qf: 89
result = 5674,846,23,89

or following the advice of @sterretje in post #14 simply write the tokens to a SD file as you extract them, e.g. in the code of post #11 replace

      if(firstToken++) Serial.print(", "); Serial.print(pch);

with

      if(firstToken++) myFile.print(", "); myFile.print(pch);

Ah yes that makes sense, thank you very much.

One thing I probably should have mentioned and has caused me errors, is that although what I gave was an example of a string, It's actual length is unknown as it comes as a global variable. At the start of my code I create a string called positionString which stores that value. When I tried replacing the:

char str[] = "x: 100 y: 200 z:1200";

With:

char str[] = positionString;

It told me it couldn't compile because it was an unknown length. How can I fix this?

Use strcpt to copy c-strings.

And str array needs to have a length !!

in the code of post #15 replace str with positionString

or make str a pointer to char and assign it the start address of positionString, e.g.

  char positionString[100] = "x: 100 y: 200 z:1200";
......
  char *str = positionString;