Just thought I'd share a little routine with you all that I find very useful.
The idea of this function is to split words out of a char * string (C string) in the same way that the Unix shell does - that is, things between " and " are returned complete, as well as things between ' and '. Spaces can be escaped with a backslash.
/*
* Word Parsing Routine.
* Majenko Technologies, 2013
*
* Routine to split words out of a string using standard
* UNIX-style command-line escaping. Anything between
* matching single- or double-quotes is taken as one single
* atomic unit and returned as a word. Any spaces preceeded
* by a backslash (\) are taken as part of the current word
* instead of word breaks. Spaces and tabs are both used for
* word break characters.
*
* The first time you call the routine with a fresh string
* you provide the pointer to the start of the string.
* Subsequent calls with the same string should have NULL passed
* as the string - it keeps an internal static pointer for its
* progress through the string.
*
* Returns a pointer to the next word or NULL if no more
* words exists.
*
* This routine modifies the string in place.
*/
char *getWord(char *buf) {
static char *ptr = NULL;
char *start, *scan;
char term = ' ';
if (buf != NULL) {
ptr = buf;
}
while (*ptr == ' ' || *ptr == '\t' && *ptr != '\0') {
ptr++;
}
if (*ptr == '\0') {
return NULL;
}
if (*ptr == '"' || *ptr == '\'') {
term = *ptr;
ptr++;
}
start = ptr;
while (*ptr != '\0') {
if (*ptr == '\\') {
for (scan = ptr; *scan != '\0'; scan++) {
*scan = *(scan+1);
}
ptr++;
continue;
}
if (*ptr == term || (term == ' ' && *ptr == '\t')) {
*ptr = '\0';
ptr++;
return start;
}
ptr++;
}
if (ptr == start) {
return NULL;
}
return start;
}
void setup() {
Serial.begin(9600);
}
void loop() {
char test[80];
// This is the string we're going to split. It may have arrived via serial?
sprintf(test, "The following \"escaped section\" should not\\ be\\ separated");
// We get the first word from the string
char *b = getWord(test);
while (b) {
Serial.println(b);
// All subsequent words come from the internal static pointer
b = getWord(NULL);
}
delay(1000);
}
The example string
The following "escaped section" should not\ be\ separated
should result in this output:
The
following
escaped section
should
not be separated
Note how all the words between the quotes are kept together, and any spaces that are escaped form part of the word instead of breaking a new word.
Great for parsing simple serial command line interfaces.