I am reading data from a device that returns strings of characters, both letters and numbers.
Here is and example of a string:
x: 5674 y: 846 z: 23 qf: 89
I want to be able to extract each of the numbers from the string and turn it into its own variable so I can then create my own comma separated string. Effectively I want to remove the letters and colons and separate the numbers with commas.
Any suggestions on the best way of going about this? I have tried reading some other forum posts but got a bit lost, i understand that I may need to use commands such as indexof?
Some time ago I wrote a simple parser to extract an identifier and a value that has the structure of
ini-file-parameters
simple example MyId=234
This code uses the PString-library which can be downloaded via library-manager of the Arduino-IDE
variable-type PString does NOT cause memory problems.
You have to define a maximum length how many characters should be storable inside the variable
PString brings back a part of the comfort that String has.
The democode shows some possabilities how to work with PStrings but not all
#include <PString.h>
char XML_Str_AoC[256] = " ";
PString XML_Str_PS(XML_Str_AoC, sizeof(XML_Str_AoC));
char XML_SubStr_AoC[256] = " ";
PString XML_SubStr_PS(XML_SubStr_AoC, sizeof(XML_SubStr_AoC));
char Value_AoC[32] = " ";
PString Value_PS(Value_AoC, sizeof(Value_AoC));
char IDStr_AoC[32] = "=";
PString IDStr_PS(IDStr_AoC, sizeof(IDStr_AoC));
char Separator_AoC[32] = "=";
PString Separator_PS(Separator_AoC, sizeof(Separator_AoC));
unsigned long IDPos;
unsigned long ValuePos;
unsigned long EndOfValuePos;
unsigned long StrLen;
unsigned long BeginOfSubStr (char* p_PointerToSource, char* p_PointerToSubStr) {
unsigned long result = strstr(p_PointerToSource,p_PointerToSubStr) - p_PointerToSource;
return result;
}
unsigned long EndOfSubStr (char* p_PointerToSource, char* p_PointerToSubStr) {
unsigned long result = strstr(p_PointerToSource,p_PointerToSubStr) - p_PointerToSource + strlen(p_PointerToSubStr);
return result;
}
// my personal naming-convention parameter of functions start with prefix "p_"
void ExtractUntilSeparator(char* p_PointerToTarget, char* p_PointerToSeparator, char* p_PointerToSource)
{
Serial.println("entering ExtractUntilSeparator");
unsigned int LengthUntilDelimiter = strstr(p_PointerToSource,p_PointerToSeparator) - p_PointerToSource + 1;
// attention use the command strlcpy with care. strlcpy perfoms a direct write into RAM at the given adress
// the compiler has no change to check if your adress makes sense
// the writing is done mercyless to the given adress. This means wrong pointer-adress leads to unpredictable bugs
strlcpy(p_PointerToTarget,p_PointerToSource,LengthUntilDelimiter);
Serial.print("p_PointerToSource:>#");
Serial.print(p_PointerToSource);
Serial.println("#");
Serial.print("p_PointerToTarget:>#");
Serial.print(p_PointerToTarget);
Serial.println("#");
Serial.print("p_PointerToSeparator:>#");
Serial.print(p_PointerToSeparator);
Serial.println("#");
Serial.println("leaving ExtractUntilSeparator");
}
// my personal naming-convention parameter of functions start with "p_"
void ExtractValueBehindSeparator(char* p_PointerToTarget, char* p_PointerToSeparator, char* p_PointerToSource)
{
Serial.println("entering ExtractValueBehindSeparator");
unsigned int PosOfSeparatorEnd = strstr(p_PointerToSource,p_PointerToSeparator) - p_PointerToSource + strlen(p_PointerToSeparator);
// if separatorstring was found
if (PosOfSeparatorEnd < strlen(p_PointerToSource) )
{
Serial.print("PosOfSeparatorEnd:>#");
Serial.print(PosOfSeparatorEnd);
Serial.println("#");
unsigned int NoOfBytesUntilEoString = strlen (p_PointerToSource) - PosOfSeparatorEnd + 1;
// attention use the command strlcpy with care. strlcpy perfoms a direct write into RAM at the given adress
// the compiler has no change to check if your adress makes sense
// the writing is done mercyless to the given adress. This means wrong pointer-adress leads to unpredictable bugs
strlcpy(p_PointerToTarget, p_PointerToSource + PosOfSeparatorEnd, NoOfBytesUntilEoString);
}
else
{
p_PointerToTarget = ""; // if no separator was found there is nothing behind the separator
}
Serial.print("p_PointerToSource:>#");
Serial.print(p_PointerToSource);
Serial.println("#");
Serial.print("p_PointerToTarget:>#");
Serial.print(p_PointerToTarget);
Serial.println("#");
Serial.print("p_PointerToSeparator:>#");
Serial.print(p_PointerToSeparator);
Serial.println("#");
Serial.println("leaving ExtractValueBehindSeparator");
}
void setup()
{
Serial.begin(115200);
Serial.println();
Serial.println("setup-Start");
XML_Str_PS += "HTTP/1.0 200 OK ";
XML_Str_PS += "Content-Type: text/xml ";
XML_Str_PS += "Access-Control-Allow-Origin: * ";
/*
XML_Str_PS += "X-Frame-Options: SAMEORIGIN ";
XML_Str_PS += "X-Content-Type-Options: nosniff ";
XML_Str_PS += "X-XSS-Protection: 1; mode=block ";
XML_Str_PS += "X-Robots-Tag: none ";
XML_Str_PS += "X-Download-Options: noopen ";
XML_Str_PS += "X-Permitted-Cross-Domain-Policies: none ";
XML_Str_PS += "Referrer-Policy: no-referrer ";
XML_Str_PS += "Content-Length: 300 ";
XML_Str_PS += "Connection: close ";
XML_Str_PS += "Date: Fri, 19 Jun 2020 20:20:38 GMT ";
*/
XML_Str_PS += "<?xml version='1.0' encoding='ISO-8859-1' ?><systemVariables><systemVariable name='Grube Stand' variable='3.000000' value='3.000000' value_list='' value_text='' ise_id='2632' min='0' max='65000' unit='cm' type='4' subtype='0' timestamp='1592598038' value_name_0='' value_name_1=''/></systemVariables>";
Serial.print("XML_Str_PS#");
Serial.print(XML_Str_PS);
Serial.print("#");
Serial.println();
IDStr_PS = "'Grube Stand'";
Serial.print("1: IDStr_PS#");
Serial.print(IDStr_PS);
Serial.print("#");
Serial.println();
IDPos = BeginOfSubStr(XML_Str_PS,IDStr_PS);
Serial.print("IDPos:");
Serial.print(IDPos);
Serial.print("#");
Serial.println();
IDStr_PS = "value_list=";
Serial.print("2:IDStr_PS#");
Serial.print(IDStr_PS);
Serial.print("#");
Serial.println();
ValuePos = BeginOfSubStr(XML_Str_AoC,IDStr_AoC);
Serial.print("XML_Str_PS#");
Serial.print(XML_Str_PS);
Serial.print("#");
Serial.println();
Serial.print("ValuePos:");
Serial.print(ValuePos);
Serial.print("#");
Serial.println();
StrLen = ValuePos - IDPos;
Serial.print("StrLen:");
Serial.print(StrLen);
Serial.print("#");
Serial.println();
//Grube Stand' variable='3.000000' value='3.000000'
// attention use the command strncpy with care. strncpy perfoms a direct write into RAM at the given adress
// the compiler has no change to check if your adress makes sense
// the writing is done mercyless to the given adress. This means wrong pointer-adress leads to unpredictable bugs
strncpy(XML_SubStr_PS, XML_Str_PS + IDPos, StrLen);
Serial.print("SubStr#");
Serial.print(XML_SubStr_PS);
Serial.print("#");
Serial.println();
XML_Str_PS = XML_SubStr_PS;
Separator_PS = "value='";
IDPos = EndOfSubStr(XML_SubStr_PS,Separator_PS);
Serial.print("3: IDPos:");
Serial.print(IDPos);
Serial.print("#");
Serial.println();
StrLen = strlen(XML_Str_PS) - IDPos + 1;
Serial.print("StrLen:");
Serial.print(StrLen);
Serial.print("#");
Serial.println();
strncpy(XML_SubStr_PS, XML_Str_PS + IDPos, StrLen);
Serial.print("SubStr#");
Serial.print(XML_SubStr_PS);
Serial.print("#");
Serial.println();
XML_Str_PS = XML_SubStr_PS;
Separator_PS = "'";
ExtractUntilSeparator(XML_SubStr_PS, Separator_PS, XML_Str_PS);
Serial.print("value SubStr#");
Serial.print(XML_SubStr_PS);
Serial.print("#");
Serial.println();
}
void loop() {
}
there is a lot of debug-output in the code to make visible what is going on.
using strtok() to split string into tokens then sscanf() to look for an integer number
int main ()
{
char str[] ="x: 5674 y: 846 z: 23 qf: 89";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str,": ");
while (pch != NULL)
{
int x=999;
printf ("string found %s\n",pch);
if(sscanf(pch,"%d",&x) == 1) printf("integer = %d\n", x);
else printf("not integer\n");
pch = strtok (NULL, ": ");
}
return 0;
}
gives
Splitting string "x: 5674 y: 846 z: 23 qf: 89" into tokens:
string found x
not integer
string found 5674
integer = 5674
string found y
not integer
string found 846
integer = 846
string found z
not integer
string found 23
integer = 23
string found qf
not integer
string found 89
integer = 89
The extraction of the numeric fields and building the comma separated list can easily be done using Nick Gammon's regular expressions library port. Example :-
#include <Regexp.h>
/*
* Used to build the comma separated list of values
* found by the regex.
*/
char output[128];
uint16_t outputLength = 0;
// called for each match
void match_callback (const char * match, // matching string (not null-terminated)
const unsigned int length, // length of matching string
const MatchState & ms) // MatchState in use (to get captures)
{
// Build the comma separated list
if (outputLength != 0)
output[outputLength++] = ',';
for (int i = 0; i < length; i++)
output[outputLength++] = match[i];
output[outputLength] = '\0';
} // end of match_callback
void setup ()
{
Serial.begin (115200);
Serial.println ();
// what we are searching (the target)
const char buf[] = "x: 5674 y: 846 z: 23 qf: 89";
// original buffer
Serial.println (buf);
// match state object
MatchState ms (buf);
ms.GlobalMatch ("%d+", match_callback);
Serial.println(output);
} // end of setup
void loop () {}
Many people make the sign of the cross when regular expressions are proposed. I expect this to happen here.
The only thing against regexp in Arduinos might be memory usage and machine cycles when one compares it with solutions based on other approaches.
Note that I don't have anything against regexp. I used to write Windows applications where the configuration to extract information out of textfiles was purely regexp based so if input files changed, it could easily be adjusted to new needs. Neither me nor the users have ever regretted that.
But writing a proper regexp is also an art; I've seen one to validate dates including leapdays; from memory, it was about 400 characters long. Good luck with that in an Arduino that also has to do other things
Thank you for all the replies :).
Here is my solution. It's pretty simple and will probably be prone to errors, especially when reading actual strings that may contain unexpected characters. However, it works for this string and continues to work for any length and combination of numbers as long as the string starts with x:.
String stringOne, posString;
byte charPos = 2;
char pos;
void setup() {
// put your setup code here, to run once:
Serial.begin(115200);
}
void loop() {
// put your main code here, to run repeatedly:
charPos = 2;
stringOne = "x:100 y:120 z:2500 qf:100";
posString = "";
if (stringOne.startsWith("x:")) {
Serial.println("This is the correct string");
while (stringOne.charAt(charPos) != 'y') {
pos = (stringOne.charAt(charPos));
posString = posString + pos;
Serial.println(posString);
charPos ++;
}
if (stringOne.charAt(charPos) == 'y') {
posString = posString + ", ";
charPos ++; charPos ++;
} else {
Serial.println("There has been an error...");
}
while (stringOne.charAt(charPos) != 'z') {
pos = (stringOne.charAt(charPos));
posString = posString + pos;
Serial.println(posString);
charPos ++;
}
if (stringOne.charAt(charPos) == 'z') {
posString = posString + ", ";
charPos ++; charPos ++;
} else {
Serial.println("There has been an error...");
}
while (stringOne.charAt(charPos) != 'q') {
pos = (stringOne.charAt(charPos));
posString = posString + pos;
Serial.println(posString);
charPos ++;
}
if (stringOne.charAt(charPos) == 'q') {
Serial.print("Here is your final string: "); Serial.println(posString);
} else {
Serial.println("There has been an error...");
}
}
delay(1000);
}
Here is the output:
This is the correct string
1
10
100
100
100 , 1
100 , 12
100 , 120
100 , 120
100 , 120 , 2
100 , 120 , 25
100 , 120 , 250
100 , 120 , 2500
100 , 120 , 2500
Here is your final string: 100 , 120 , 2500
If anyone has any ideas to make it better or shorter or more reliable they will be much appreciated.
Cheers
your result appears to be missing the qf:100 field
your original post stated the requirement was extract each of the numbers from the string and turn it into its own variable - your posted code does not appear the assign values to variaables it just creates a comma seperated string
if a comma seperated list is all you require use string tokeniser to extract numeric data, e.g.
// parse string extracting numeric values to comma seperated list
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void setup()
{
Serial.begin(115200);
char str[] = "x: 5674 y: 846 z: 23 qf: 89";
char * pch;
int firstToken=0;
Serial.print ("\nextracting numeric values to comma seperated list: ");
Serial.println(str);
pch = strtok (str,": ");
while (pch != NULL)
{
//Serial.print ("token found ");
//Serial.println(pch);
// check if last token starts with a digit if so print it
if(isdigit(pch[0])) {
// print comma seperate value - no comma on first token
if(firstToken++) Serial.print(", "); Serial.print(pch);
}
pch = strtok (NULL, ": ");
}
}
void loop() {}
horace:
your result appears to be missing the qf:100 field
your original post stated the requirement was extract each of the numbers from the string and turn it into its own variable - your posted code does not appear the assign values to variaables it just creates a comma seperated string
if a comma seperated list is all you require use string tokeniser to extract numeric data, e.g.
// parse string extracting numeric values to comma seperated list
Thanks for your reply. The numbers following qf are irrelevant for me so it doesn't matter whether I have them or not. This method seems much more reliable and outputs the correct numbers. I probably should have specified in my first post that in the end I want a comma separated string that I can then put onto an SD card in a csv file. I am still trying to muddle my way through this code trying to understand it but how could I modify it to create a new string?
Thanks so much
One thing I probably should have mentioned and has caused me errors, is that although what I gave was an example of a string, It's actual length is unknown as it comes as a global variable. At the start of my code I create a string called positionString which stores that value. When I tried replacing the:
char str[] = "x: 100 y: 200 z:1200";
With:
char str[] = positionString;
It told me it couldn't compile because it was an unknown length. How can I fix this?