XML Parser problem

Hey,

I’ve used the Arduino XML weather parser for more than two years to get data from this website: http://www.ivb.at/…

Until last week, the code worked just fine. Now it just connects but can’t retrieve any data. Obviously, there was a server-update and a slight change in the structure of the XML file, but the tags stayed the same.

So here is the code (I just replaced the mac/ip address):

//1/6/08 Bob S. - Created

#include <string.h>
#include <Ethernet.h>
#include <SPI.h>

#define MAX_STRING_LEN  20

char tagStr[MAX_STRING_LEN] = "";
char dataStr[MAX_STRING_LEN] = "";
char tmpStr[MAX_STRING_LEN] = "";
char endTag[3] = {'<', '/', '\0'};
int len;

boolean tagFlag = false;
boolean dataFlag = false;

byte mac[] = { 
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
byte ip[] = { 
  000, 00, 000, 000 };
byte server[] = { 
  83, 175, 126, 90 };

EthernetClient client;

void setup()
{
 Serial.begin(9600);
 Serial.println("connecting...");
 Ethernet.begin(mac, ip);
 delay(1000);

 if (client.connect(server, 80)) {
   Serial.println("connected");
   client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.1");    
   client.println();
   delay(2000);
 } else {
   Serial.println("connection failed");
 }  
}

void loop() {

 while (client.available()) {
   serialEvent(); 
 }

 if (!client.connected()) {
   client.stop();

   for (int t = 1; t <= 15; t++) {
     delay(60000);
   }

   if (client.connect(server, 80)) {
     client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.1");    
     client.println();
     delay(2000);
   } else {
     Serial.println("Reconnect failed");
   }       
 }
}

void serialEvent() {

     char inChar = client.read();
 
  if (inChar == '<') {
     addChar(inChar, tmpStr);
     tagFlag = true;
     dataFlag = false;

  } else if (inChar == '>') {
     addChar(inChar, tmpStr);

     if (tagFlag) {      
        strncpy(tagStr, tmpStr, strlen(tmpStr)+1);
     }

     clearStr(tmpStr);

     tagFlag = false;
     dataFlag = true;      
     
  } else if (inChar != 10) {
     if (tagFlag) {
        addChar(inChar, tmpStr);

        if ( tagFlag && strcmp(tmpStr, endTag) == 0 ) {
           clearStr(tmpStr);
           tagFlag = false;
           dataFlag = false;
        }
     }
     
     if (dataFlag) {
        addChar(inChar, dataStr);
     }
  }  
 
  if (inChar == 10 ) {

/*
     Serial.print("tagStr: ");
     Serial.println(tagStr);
     Serial.print("dataStr: ");
     Serial.println(dataStr);
*/

     if (matchTag("<route>")) {
        Serial.print(dataStr);
     }
     if (matchTag("<direction>")) {
        Serial.print(dataStr);
     }
     if (matchTag("<time>")) {
        Serial.print(dataStr);
     }

     clearStr(tmpStr);
     clearStr(tagStr);
     clearStr(dataStr);

     tagFlag = false;
     dataFlag = false;
  }
}

void clearStr (char* str) {
  int len = strlen(str);
  for (int c = 0; c < len; c++) {
     str[c] = 0;
  }
}

void addChar (char ch, char* str) {
  char *tagMsg  = "<TRUNCATED_TAG>";
  char *dataMsg = "-TRUNCATED_DATA-";

  if (strlen(str) > MAX_STRING_LEN - 2) {
     if (tagFlag) {
        clearStr(tagStr);
        strcpy(tagStr,tagMsg);
     }
     if (dataFlag) {
        clearStr(dataStr);
        strcpy(dataStr,dataMsg);
     }

     clearStr(tmpStr);
     tagFlag = false;
     dataFlag = false;

  } else {
     str[strlen(str)] = ch;
  }
}

boolean matchTag (char* searchTag) {
  if ( strcmp(tagStr, searchTag) == 0 ) {
     return true;
  } else {
     return false;
  }
}

There is no problem with the internet connection or with the server. The Arduino Ethernet Example (WebClient) retrieves the whole XML file. Probably it’s a problem with the tags or with the inChar…

Any help is appreciated :slight_smile:
Max

thmmax:
There is no problem with the internet connection or with the server. The Arduino Ethernet Example (WebClient) retrieves the whole XML file. Probably it's a problem with the tags or with the inChar...

To me it looks like you would NOT have to use the server IP address:

byte server[] = {  83, 175, 126, 90 };

to retrieve the file, but you would have to use the server name.

I’ve used the Arduino XML weather parser for more than two years to get data from this website

That link did not successfully load in firefox. You may need to get an updated link to the site.

The link doesn’t load in Chrome either.

I've used the Arduino XML weather parser

The Arduino IDE does NOT come with an XML weather parser.

Why the hell is the function to get data from the client object called serialEvent? It is not triggered by some event and it does not get data from the serial port.

Thank you all for your replies!

@jurs: I just tried to use the server name, it makes no difference…

@zoomkat/Isaac96: that’s very interesting - the link works for me in Safari and Firefox!

I just tried to use the WebClient example, it retrieves the whole XML file:

#include <SPI.h>
#include <Ethernet.h>

byte mac[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
char server[] = "www.ivb.at";

IPAddress ip(000, 00, 000, 000);

EthernetClient client;

void setup() {
  Serial.begin(9600);
   while (!Serial) {
  }

  if (Ethernet.begin(mac) == 0) {
    Serial.println("Failed to configure Ethernet using DHCP");
    Ethernet.begin(mac, ip);
  }
  delay(1000);
  Serial.println("connecting...");

  if (client.connect(server, 80)) {
    Serial.println("connected");
    client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=all&stopId=Absam%20Dorf&optDir=-1&nRows=1&showArrivals=n&optTime=now&time=&allLines=n&app=sionline HTTP/1.1");
    client.println("Host: www.ivb.at");
    client.println("Connection: close");
    client.println();
  } 
  else {
    Serial.println("connection failed");
  }
}

void loop()
{
  if (client.available()) {
    char c = client.read();
    Serial.print(c);
  }

  if (!client.connected()) {
    Serial.println();
    Serial.println("disconnecting.");
    client.stop();

    while(true);
  }
}

@PaulS: I know it is not THE XML weather parser :slight_smile:

With the WebClient working, I assume that the problem is somewhere in the serialEvent() function. It is not my code and to be honest: I had some problems understanding the char and flag functions…

thmmax:
@jurs: I just tried to use the server name, it makes no difference…

I just tried to use the WebClient example, it retrieves the whole XML file:

But in that example you use the server name instead of the IP address?

Anyway.
Retrieving the contents from the server by doing a GET request is absolutely no problem.
You just did that yourself.

The response is just two lines long:

<?xml version="1.0" encoding="utf-8"?>
<ivbsmartinfo output="timetable" version="Compatibity Version 1.0"><smartinfo><route>D</route><direction>Hauptbahnhof</direction><time>7 min</time></smartinfo><smartinfo><route>E</route><direction>Hauptbahnhof</direction><time>18:55</time></smartinfo><smartinfo><route>D</route><direction>Kurhaus</direction><time>18:57</time></smartinfo><smartinfo><route>D</route><direction>Hauptbahnhof</direction><time>19:10</time></smartinfo><linedirections route="D" direction="Hauptbahnhof" dirforrequest="Hauptbahnhof"/><linedirections route="D" direction="Kurhaus" dirforrequest="Kurhaus"/><linedirections route="E" direction="Hauptbahnhof" dirforrequest="Hauptbahnhof"/><stopidname>Absam Dorf</stopidname></ivbsmartinfo>

Unfortunately your “tag matching code” has one big problem:
You seem to expect line seperating characters ASCII-10 while parsing the data:

    if (inChar == 10 )

But those ASCII-10 characters seem not to be in the XML response code.
So your “tag matching code” doesn’t work.

I’ve posted the two lines that you get from the server.
Please let me know which data you want to extract from that, I can perhaps then write a working parser for your server.

Hey jurs,

wow, thank you so much for your help! :slight_smile:
That is probably what was changed during the server update…

I am looking for the data for , and (in this case: D, Hauptbahnhof, 7 min and so on)…

thmmax:
I am looking for the data for , and (in this case: D, Hauptbahnhof, 7 min and so on)…

So perhaps try this rewritten code:

// code for http://forum.arduino.cc/index.php?topic=327574.0
#include <string.h>
#include <Ethernet.h>
#include <SPI.h>

char* tags[]= {"route", "direction", "time"};
#define NUMTAGS (sizeof(tags)/sizeof(tags[0]))

byte mac[] = { 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F };
byte ip[] = { 192,168,2,251};

char server[] = "www.ivb.at";
EthernetClient client;

#define MAX_STRING_LEN 20
char tagStr[MAX_STRING_LEN+1];  // max. length and terminating '\0'
char dataStr[MAX_STRING_LEN+1]; // max. length and terminating '\0'

boolean readTagData(char c)
{
  static byte dataCount=0; // counting chars in data
  static byte tagCount=0;  // counting chars in tag
  static boolean inTag=false;
  static int currentTag=-1;
  if (c=='<') 
  {
    inTag=true;
    tagCount=0;
    dataCount=0;
    if (currentTag>=0)
    {
      currentTag=-1;
      return true;
    }
    else return false;
  }
  else if (c=='>')
  {
    inTag=false;
    currentTag=-1;
    tagCount=0;
    dataCount=0;
    for (int i=0;i<NUMTAGS;i++)
    {
      if (strcmp(tags[i],tagStr)==0)
      {
        currentTag=i;
        break;
      }
    }
  }
  else if (c>=32 && inTag && tagCount<MAX_STRING_LEN)
  {
    if (tagCount==0) memset(tagStr,0,sizeof(tagStr)); // clear string
    tagStr[tagCount]=c;
    tagCount++;
  }
  else if (c>=32 && dataCount<MAX_STRING_LEN)
  {
    if (dataCount==0) memset(dataStr,0,sizeof(dataStr)); // clear string
    dataStr[dataCount]=c;
    dataCount++;
  }
  return false;
}

void skipHeader(EthernetClient &client)
{ // skip response header from server
  boolean currentLineIsEmpty=true;
  while (client.connected()) 
  {
    if (client.available()) 
    {
      char c = client.read();
      if (c=='\n' && currentLineIsEmpty) break;
      else if (c=='\n') currentLineIsEmpty=true;
      else if (c>=' ') currentLineIsEmpty=false;
    }
  }
}


void doOneServerRequest()
{
 if (client.connect(server, 80))
 {
   Serial.println(F("connected"));
   client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.1");
   client.print("Host: ");client.println(server);
   client.println("Connection: close");
   client.println();
   skipHeader(client);
   while (client.connected())
   {
    if (client.available())
    {
      boolean found=readTagData(client.read());
      if (found)
      {
        Serial.print("Tag:\t");
        Serial.println(tagStr);
        Serial.print("Data:\t");
        Serial.println(dataStr);
      }
    }
  }
  client.stop();
  Serial.println();
 } 
 else Serial.println(F("connection failed"));
}


void setup()
{
 Serial.begin(9600);
 Ethernet.begin(mac, ip);
 Serial.print(F("Local IP: "));
 Serial.println(Ethernet.localIP());

}

#define MILLIS_PER_MINUTE 60000L
void loop() 
{
  doOneServerRequest();
  delay(15*MILLIS_PER_MINUTE);
}

P.S.: Just edited the initial posting and added a “client.stop();”, so that repeated connections will work.

OK?

Hey jurs,

oh wow, thank you so much! It works! :slight_smile:
I will have a detailed look at the code and try to understand it!

Thank you!
Max

thmmax:
oh wow, thank you so much! It works! :slight_smile:

Please add the missing "client.stop();" (just edited my last posting)!

BTW: Your main loop() is blocking the program execution with delay for 15 minutes. If you'd do it different, the same sketch could do many other things between the http requests.

Hey jurs,

just added the client.stop!
Thanks, I have just seen the millis function :slight_smile:

Max

@ jurs: just wanted to say thank you again, your code works like a charm!

However, I have one more question: I’ve tried to use the “matchTag” function of the “old” code

boolean matchTag (char* searchTag) {
  if ( strcmp(tagStr, searchTag) == 0 ) {
     return true;
  } else {
     return false;
  }
}

and

boolean found=readTagData(client.read());
     if (matchTag("<route>")) {
        Serial.print(dataStr);
     }

instead of this function

 boolean found=readTagData(client.read());
      if (found)
      {
        Serial.print("Tag:\t");
        Serial.println(tagStr);
        Serial.print("Data:\t");
        Serial.println(dataStr);
      }

to analyse single tags.
However, it doesn’t work and I can’t figure out why…

Max

However, it doesn't work and I can't figure out why...

"it" does something. You didn't way what.

You expect "it" to do something. You didn't way what.

All that we can conclude is that what it does is not what you want. But, I'm sure you already knew that.

thmmax:
to analyse single tags.
However, it doesn't work and I can't figure out why...

The condition " if ( strcmp(tagStr, searchTag) == 0 )" is only true, if the contents of both char arrays, 'tagStr' and 'searchTag', are absolutely identical. From the first char to the last char.

Normally mixing 'non working code' with 'working code' is not a good idea, because that will result in a 'non working code' after merging it.

If you have any problems, try to explain what you want to do as detailed as possible. And post the full code instead of just code fragments.

Here is the full code:

#include <string.h>
#include <Ethernet.h>
#include <SPI.h>

char* tags[]= {
  "route", "direction", "time"};
#define NUMTAGS (sizeof(tags)/sizeof(tags[0]))

byte mac[] = { 
  0x90, 0xA2, 0xDA, 0x0D, 0x52, 0xBD };
byte ip[] = { 
  172, 16, 254, 103};

char server[] = "www.ivb.at";
EthernetClient client;

#define MAX_STRING_LEN 20
char tagStr[MAX_STRING_LEN+1];  // max. length and terminating '\0'
char dataStr[MAX_STRING_LEN+1]; // max. length and terminating '\0'

boolean readTagData(char c)
{
  static byte dataCount=0; // counting chars in data
  static byte tagCount=0;  // counting chars in tag
  static boolean inTag=false;
  static int currentTag=-1;
  if (c=='<') 
  {
    inTag=true;
    tagCount=0;
    dataCount=0;
    if (currentTag>=0)
    {
      currentTag=-1;
      return true;
    }
    else return false;
  }
  else if (c=='>')
  {
    inTag=false;
    currentTag=-1;
    tagCount=0;
    dataCount=0;
    for (int i=0;i<NUMTAGS;i++)
    {
      if (strcmp(tags[i],tagStr)==0)
      {
        currentTag=i;
        break;
      }
    }
  }
  else if (c>=32 && inTag && tagCount<MAX_STRING_LEN)
  {
    if (tagCount==0) memset(tagStr,0,sizeof(tagStr)); // clear string
    tagStr[tagCount]=c;
    tagCount++;
  }
  else if (c>=32 && dataCount<MAX_STRING_LEN)
  {
    if (dataCount==0) memset(dataStr,0,sizeof(dataStr)); // clear string
    dataStr[dataCount]=c;
    dataCount++;
  }
  return false;
}

void skipHeader(EthernetClient &client)
{ // skip response header from server
  boolean currentLineIsEmpty=true;
  while (client.connected()) 
  {
    if (client.available()) 
    {
      char c = client.read();
      if (c=='\n' && currentLineIsEmpty) break;
      else if (c=='\n') currentLineIsEmpty=true;
      else if (c>=' ') currentLineIsEmpty=false;
    }
  }
}


void doOneServerRequest()
{
  if (client.connect(server, 80))
  {
    Serial.println(F("connected"));
    client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=1&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.1");
    client.print("Host: ");
    client.println(server);
    client.println("Connection: close");
    client.println();
    skipHeader(client);
    while (client.connected())
    {
      if (client.available())
      {
        boolean found=readTagData(client.read());

        if (matchTag("<route>")) {
          Serial.print(F(" Route: "));
          Serial.println(dataStr); 
        }
      }
    }
    client.stop();
  }
  else Serial.println(F("connection failed"));
}

void setup()
{
  Serial.begin(9600);
  Ethernet.begin(mac, ip);
  Serial.print(F("Local IP: "));
  Serial.println(Ethernet.localIP());

}

#define MILLIS_PER_MINUTE 600
void loop() 
{
  doOneServerRequest();
  delay(15*MILLIS_PER_MINUTE);
}

boolean matchTag (char* searchTag) {
  if ( strcmp(tagStr, searchTag) == 0 ) {
    return true;
  } 
  else {
    return false;
  }
}

I would like to display the data on an LED-matrix, therefore I need the single tags to set the cursor (there are different positions for every tag)
For example (using “matchTag”):

    if (matchTag("<route>")) {
      Serial.print(F(" route: "));
      Serial.print(dataStr); 
      matrix1.setCursor(0, 10);
      matrix1.print(dataStr);
      matrix1.writeScreen();
      }
    }

Thanks :slight_smile:

Max

Edit: I really tried to understand the code, but I had some problems in the “boolean readTagData(char c)” function…

The code you posted does something. You want the code to do something. In general, if what the code does is what is wanted, there is no reason to post here. So, it is probably safe to assume that the code does not do what you want.

What it actually does, and how that differs from what you want, is a real mystery, though.

@ PaulS: it's not too much of a mystery, actually :slight_smile:

Alright, the code jurs posted a little above basically gets XML data from a website and displays the results in the serial monitor. What I wanted to do now is search the results for certain tags, that's why I wanted to use the "matchTag" function...

What I wanted to do now is search the results for certain tags, that's why I wanted to use the "matchTag" function...

And? It works great? The Arduino exploded into a million pieces?

thmmax:
Edit: I really tried to understand the code, but I had some problems in the "boolean readTagData(char c)" function...

I suppose the same: Your understanding is the problem.
You don't understand the non-working code that you used initially.
You don't understand the working code that I posted for you.

Now you are mixing non-working code you don't understand with working code you don't understand and the result is non-working code. That really doesn't surprise me.

I fix it for you, so that you can test for single tags and handle them seperately. Just replace the whole function:

void doOneServerRequest()
{
 if (client.connect(server, 80))
 {
   Serial.println(F("connected"));
   client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.1");
   client.print("Host: ");client.println(server);
   client.println("Connection: close");
   client.println();
   skipHeader(client);
   while (client.connected())
   {
    if (client.available())
    {
      boolean found=readTagData(client.read());
      if (found)
      {
        if (strcmp(tagStr,"route")==0) // test for special tag
        {
          Serial.print("Special handling tag: ");
          Serial.print(tagStr);
          Serial.print("\tData:\t");
          Serial.println(dataStr);
        }
        else // normal handling for other tags
        {
          Serial.print("Normal handling tag: ");
          Serial.print(tagStr);
          Serial.print("\tData:\t");
          Serial.println(dataStr);
        }
      } //  if (found)
    }
  }
  client.stop();
  Serial.println();
 } 
 else Serial.println(F("connection failed"));
}

I try to explain how it works:
The main work is done by the readTagData() function, which handles each incoming char: If no complete tag and its data is received, the function will return with a 'false' value. But if the readTagData() function returns with a 'true' value, then you have (at that moment of data processing) two strings in two global char arrays:
tagStr and dataSTr

You then can deal with them using AVR LIBC string functions like 'strcmp'.

So if this code snippet:

     boolean found=readTagData(client.read());
      if (found)

creates a (found==true), you then can handle the 'tagStr' and the 'dataStr' as you like.

I.e. you could compare the 'tagStr' against a fixed tag literal using 'strcmp()' function and if the compare returns that they are equal you can do different handling of this 'tagStr'.