XML Parser

Hey,

I want to use the Arduino with Ethernet Shield to parse XML data from a website and screen it on the serial monitor.
I found a XML-Parser for weather data and changed the settings a little bit.

It should retrieve data from this website: http://www.ivb.at/smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Breitweg&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline

The connection to the server works, but I just don't get the data!
So the code looks like this:

//////////////////////////////////////////////
// Get XML formatted data from the web.
// 1/6/08 Bob S. - Created
//
//  Assumptions: single XML line looks like: 
//    <tag>data</tag> or <tag>data 
//
//////////////////////////////////////////////
#include <SPI.h>
#include <string.h>
#include <Ethernet.h>

// Define Constants
// Max string length may have to be adjusted depending on data to be extracted
#define MAX_STRING_LEN  20

// Setup vars
char tagStr[MAX_STRING_LEN] = "";
char dataStr[MAX_STRING_LEN] = "";
char tmpStr[MAX_STRING_LEN] = "";
char endTag[3] = {'<', '/', '\0'};
int len;

// Flags to differentiate XML tags from document elements (ie. data)
boolean tagFlag = false;
boolean dataFlag = false;

// Ethernet vars
byte mac[] = { 0x90, 0xA2, 0xDA, 0x0D, 0x52, 0xBD };
byte ip[] = { 192, 168, 2, 3 };
byte server[] = { 83, 175, 126, 90 }; // www.ivb.at

// Start ethernet client
EthernetClient client;

void setup()
{
  Serial.begin(9600);
  Serial.println("Starting IVB.at");
  Serial.println("connecting...");
  Ethernet.begin(mac, ip);
  delay(1000);

  if (client.connect(server, 80)) {
    Serial.println("connected");
    client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Breitweg&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.0");    
    client.println();
    delay(2000);
  } else {
    Serial.println("connection failed");
  }  
}

void loop() {

  // Read serial data in from web:
  while (client.available()) {
    serialEvent(); 
  }

  if (!client.connected()) {
    //Serial.println();
    //Serial.println("Disconnected");
    client.stop();

    // Time until next update
    //Serial.println("Waiting");
    for (int t = 1; t <= 15; t++) {
      delay(60000); // 1 minute
    }

    if (client.connect(server, 80)) {
      //Serial.println("Reconnected");
      client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Breitweg&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.0");    
      client.println();
      delay(2000);
    } else {
      Serial.println("Reconnect failed");
    }       
  }
}

// Process each char from web
void serialEvent() {

   // Read a char
      char inChar = client.read();
   //Serial.print(".");
  
   if (inChar == '<') {
      addChar(inChar, tmpStr);
      tagFlag = true;
      dataFlag = false;

   } else if (inChar == '>') {
      addChar(inChar, tmpStr);

      if (tagFlag) {      
         strncpy(tagStr, tmpStr, strlen(tmpStr)+1);
      }

      // Clear tmp
      clearStr(tmpStr);

      tagFlag = false;
      dataFlag = true;      
      
   } else if (inChar != 10) {
      if (tagFlag) {
         // Add tag char to string
         addChar(inChar, tmpStr);

         // Check for </XML> end tag, ignore it
         if ( tagFlag && strcmp(tmpStr, endTag) == 0 ) {
            clearStr(tmpStr);
            tagFlag = false;
            dataFlag = false;
         }
      }
      
      if (dataFlag) {
         // Add data char to string
         addChar(inChar, dataStr);
      }
   }  
  
   // If a LF, process the line
   if (inChar == 10 ) {

/*
      Serial.print("tagStr: ");
      Serial.println(tagStr);
      Serial.print("dataStr: ");
      Serial.println(dataStr);
*/

      // Find specific tags and print data
      if (matchTag("<time>")) {
            Serial.print("Time: ");
         Serial.print(dataStr);
      }
      if (matchTag("<direction>")) {
            Serial.print(", Direction: ");
         Serial.print(dataStr);
      }
      if (matchTag("<route>")) {
            Serial.print(", Route: ");
         Serial.print(dataStr);
         Serial.println("");
      }

      // Clear all strings
      clearStr(tmpStr);
      clearStr(tagStr);
      clearStr(dataStr);

      // Clear Flags
      tagFlag = false;
      dataFlag = false;
   }
}

/////////////////////
// Other Functions //
/////////////////////

// Function to clear a string
void clearStr (char* str) {
   int len = strlen(str);
   for (int c = 0; c < len; c++) {
      str[c] = 0;
   }
}

//Function to add a char to a string and check its length
void addChar (char ch, char* str) {
   char *tagMsg  = "<TRUNCATED_TAG>";
   char *dataMsg = "-TRUNCATED_DATA-";

   // Check the max size of the string to make sure it doesn't grow too
   // big.  If string is beyond MAX_STRING_LEN assume it is unimportant
   // and replace it with a warning message.
   if (strlen(str) > MAX_STRING_LEN - 2) {
      if (tagFlag) {
         clearStr(tagStr);
         strcpy(tagStr,tagMsg);
      }
      if (dataFlag) {
         clearStr(dataStr);
         strcpy(dataStr,dataMsg);
      }

      // Clear the time buffer and flags to stop current processing 
      clearStr(tmpStr);
      tagFlag = false;
      dataFlag = false;

   } else {
      // Add char to string
      str[strlen(str)] = ch;
   }
}

// Function to check the current tag for a specific string
boolean matchTag (char* searchTag) {
   if ( strcmp(tagStr, searchTag) == 0 ) {
      return true;
   } else {
      return false;
   }
}

Can anyone help me please?

Max

I'm working on a similar project just I'm in the beginning , hope you fix it soon

Can anyone help me please?

The below code seems to work to get weather data, so maybe you should compare it to your code and identify differences.

// Include description files for other libraries used (if any)
#include <SPI.h>
#include <Ethernet.h>

// Define Constants
// Max string length may have to be adjusted depending on data to be extracted
#define MAX_STRING_LEN  20

// Setup vars
char tagStr[MAX_STRING_LEN] = "";
char dataStr[MAX_STRING_LEN] = "";
char tmpStr[MAX_STRING_LEN] = "";
char endTag[3] = {'<', '/', '\0'};
int len;

// Flags to differentiate XML tags from document elements (ie. data)
boolean tagFlag = false;
boolean dataFlag = false;

// Ethernet vars
byte mac[] = { 0xDE, 0xAD, 0xBE, 0xEF, 0xFE, 0xED };
byte ip[] = { 192, 168, 1, 102 };
byte server[] = { 140, 90, 113, 200 }; // www.weather.gov

// Start ethernet client
EthernetClient client;

void setup()
{
  Serial.begin(9600);
  Serial.println("Starting WebWx");
  Serial.println("connecting...");
  Ethernet.begin(mac, ip);
  delay(1000);

  if (client.connect(server, 80)) {
    Serial.println("connected");
    client.println("GET /xml/current_obs/KRDU.xml HTTP/1.0");    
    client.println();
    delay(2000);
  } else {
    Serial.println("connection failed");
  }  
}

void loop() {

  // Read serial data in from web:
  while (client.available()) {
    serialEvent();
  }

  if (!client.connected()) {
    Serial.println();
    Serial.println("Disconnected");
    Serial.println("==================================");
    Serial.println("");
    client.stop();

    // Time until next update
    //Serial.println("Waiting");
    for (int t = 1; t <= 15; t++) {
      delay(60000); // 1 minute
    }

    if (client.connect(server, 80)) {
      //Serial.println("Reconnected");
      client.println("GET /xml/current_obs/KRDU.xml HTTP/1.0");    
      client.println();
      delay(2000);
    } else {
      Serial.println("Reconnect failed");
    }      
  }
}

// Process each char from web
void serialEvent() {

   // Read a char
	char inChar = client.read();
   //Serial.print(".");
  
   if (inChar == '<') {
      addChar(inChar, tmpStr);
      tagFlag = true;
      dataFlag = false;

   } else if (inChar == '>') {
      addChar(inChar, tmpStr);

      if (tagFlag) {      
         strncpy(tagStr, tmpStr, strlen(tmpStr)+1);
      }

      // Clear tmp
      clearStr(tmpStr);

      tagFlag = false;
      dataFlag = true;      
      
   } else if (inChar != 10) {
      if (tagFlag) {
         // Add tag char to string
         addChar(inChar, tmpStr);

         // Check for </XML> end tag, ignore it
         if ( tagFlag && strcmp(tmpStr, endTag) == 0 ) {
            clearStr(tmpStr);
            tagFlag = false;
            dataFlag = false;
         }
      }
      
      if (dataFlag) {
         // Add data char to string
         addChar(inChar, dataStr);
      }
   }  
  
   // If a LF, process the line
   if (inChar == 10 ) {

/*
      Serial.print("tagStr: ");
      Serial.println(tagStr);
      Serial.print("dataStr: ");
      Serial.println(dataStr);
*/

      // Find specific tags and print data
      if (matchTag("<temp_f>")) {
	      Serial.print("Temp: ");
         Serial.print(dataStr);
      }
      if (matchTag("<relative_humidity>")) {
	      Serial.print(", Humidity: ");
         Serial.print(dataStr);
      }
      if (matchTag("<pressure_in>")) {
	      Serial.print(", Pressure: ");
         Serial.print(dataStr);
         Serial.println("");
      }

      // Clear all strings
      clearStr(tmpStr);
      clearStr(tagStr);
      clearStr(dataStr);

      // Clear Flags
      tagFlag = false;
      dataFlag = false;
   }
}

/////////////////////
// Other Functions //
/////////////////////

// Function to clear a string
void clearStr (char* str) {
   int len = strlen(str);
   for (int c = 0; c < len; c++) {
      str[c] = 0;
   }
}

//Function to add a char to a string and check its length
void addChar (char ch, char* str) {
   char *tagMsg  = "<TRUNCATED_TAG>";
   char *dataMsg = "-TRUNCATED_DATA-";

   // Check the max size of the string to make sure it doesn't grow too
   // big.  If string is beyond MAX_STRING_LEN assume it is unimportant
   // and replace it with a warning message.
   if (strlen(str) > MAX_STRING_LEN - 2) {
      if (tagFlag) {
         clearStr(tagStr);
         strcpy(tagStr,tagMsg);
      }
      if (dataFlag) {
         clearStr(dataStr);
         strcpy(dataStr,dataMsg);
      }

      // Clear the temp buffer and flags to stop current processing
      clearStr(tmpStr);
      tagFlag = false;
      dataFlag = false;

   } else {
      // Add char to string
      str[strlen(str)] = ch;
   }
}

// Function to check the current tag for a specific string
boolean matchTag (char* searchTag) {
   if ( strcmp(tagStr, searchTag) == 0 ) {
      return true;
   } else {
      return false;
   }
}

Thanks for your answer!
That's exactly the code I'm using. It works perfectly to parse weather data - but it just doesn't work with my website!
I've checked it over and over again but I can't find the error!

thmmax:
Thanks for your answer!
That's exactly the code I'm using. It works perfectly to parse weather data - but it just doesn't work with my website!
I've checked it over and over again but I can't find the error!

Could be an issue with your web site. Does your get string work with your web site when used with a browser? Does your arduino get anything returned from your web site? You might put a serial print function like below to see just what is being returned from your web site.

   // Read a char
      char inChar = client.read();
      Serial.print(inChar);
   //Serial.print(".");

I just tried your tip - that's what the serial monitor printed:

StStarting IVB.at
connecting...
connected
HTTP/1.1 404 Not Found
Content-Type: text/html
Server: Microsoft-IIS/7.5
X-Powered-By: ASP.NET
Date: Sat, 23 Mar 2013 18:18:35 GMT
Connection: close
Content-Length: 1245

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
<title>404 - File or directory not found.</title>
<style type="text/css">
<!--
body{margin:0;font-size:.7em;font-family:Verdana, Arial, Helvetica, sans-serif;background:#EEEEEE;}
fieldset{padding:0 15px 10px 15px;} 
h1{font-size:2.4em;margin:0;color:#FFF;}
h2{font-size:1.7em;margin:0;color:#CC0000;} 
h3{font-size:1.2em;margin:10px 0 0 0;color:#000000;} 
#header{width:96%;margin:0 0 0 0;padding:6px 2% 6px 2%;font-family:"trebuchet MS", Verdana, sans-serif;color:#FFF;
background-color:#555555;}
#content{margin:0 0 0 2%;position:relative;}
.content-container{background:#FFF;width:96%;margin-top:8px;padding:10px;position:relative;}
-->
</style>
</head>
<body>
<div id="header"><h1>Server Error</h1></div>
<div id="content">
 <div class="content-container"><fieldset>
  <h2>404 - File or directory not found.</h2>
  <h3>The resource you are looking for might have been removed, had its name changed, or is temporarily unavailable.</h3>
 </fieldset></div>
</div>
</body>
</html>

The problem is that I can't change the website (it's from our local transport authority). It provides real-time information about bus departures...
Maybe there is another solution?

The below returned indicates your problem, possibly with your GET string, or the way the server is setup. Again, does your GET string work when used with a browser?

<h2>404 - File or directory not found.</h2>
  <h3>The resource you are looking for might have been removed, had its name changed, or is temporarily unavailable.</h3>
 </fieldset></div>

Ok, I modified some of my test code to include your server and url, and it appears to work to get the feed. In some server cases the domain name and the "host:" need to be used instead of the ip address. Try the below and see if you get the desired feed.

//zoomkat 9-22-12
//simple client test
//for use with IDE 1.0.1
//with DNS, DHCP, and Host
//open serial monitor and send an e to test
//for use with W5100 based ethernet shields
//remove SD card if inserted

#include <SPI.h>
#include <Ethernet.h>

byte mac[] = { 0xDE, 0xAD, 0xBE, 0xEF, 0xFE, 0xED }; //physical mac address

char serverName[] = "www.ivb.at"; // zoomkat's test web page server
EthernetClient client;

//////////////////////

void setup(){

  if (Ethernet.begin(mac) == 0) {
    Serial.println("Failed to configure Ethernet using DHCP");
    // no point in carrying on, so do nothing forevermore:
    while(true);
  }

  Serial.begin(9600); 
  Serial.println("Better client test 9/22/12"); // so I can keep track of what is loaded
  Serial.println("Send an e in serial monitor to test"); // what to do to test
}

void loop(){
  // check for serial input
  if (Serial.available() > 0) //if something in serial buffer
  {
    byte inChar; // sets inChar as a byte
    inChar = Serial.read(); //gets byte from buffer
    if(inChar == 'e') // checks to see byte is an e
    {
      sendGET(); // call sendGET function below when byte is an e
    }
  }  
} 

//////////////////////////

void sendGET() //client function to send/receive GET request data.
{
  if (client.connect(serverName, 80)) {  //starts client connection, checks for connection
    Serial.println("connected");
    client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Breitweg&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.0"); //download text
    client.println("Host: www.ivb.at");
    client.println(); //end of get request
  } 
  else {
    Serial.println("connection failed"); //error message if no client connect
    Serial.println();
  }

  while(client.connected() && !client.available()) delay(1); //waits for data
  while (client.connected() || client.available()) { //connected or data available
    char c = client.read(); //gets byte from ethernet buffer
    Serial.print(c); //prints byte to serial monitor 
  }

  Serial.println();
  Serial.println("disconnecting.");
  Serial.println("==================");
  Serial.println();
  client.stop(); //stop client

}

Wow! It really works 8)
I will try to edit your code so I just get the information needed.
Thank you so much!

Edit: I just tried the 'original' code and added the 'host' information - and it works properly now!
You're a genius :slight_smile: I've worked on this script for weeks now and finally it works :slight_smile:

I've worked on the code and now I need your help once more :slight_smile:
The download of the data works perfect, also the output on the LED Matrix (as you can see in the photos).

When I just download one row of data (http://www.ivb.at/smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=1&showArrivals=n&optTime=now&time=&allLines=y&app=sionline) the matrix displays everything correct (Photo 1).

But when I download 4 rows of data (as originally planned) (http://www.ivb.at/smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline), the data is printed in one row on the matrix and not in four rows (Photo2).

Has anyone an idea how I could 'split' the four strings?

BTW the code:

#include <SPI.h>
#include <string.h>
#include <Ethernet.h>
#include "HT1632.h"

#define MAX_STRING_LEN  20
#define DATA 2
#define WR   3
#define CS   4
#define CS2  5
#define CS3  6
#define CS4  7

HT1632LEDMatrix matrix = HT1632LEDMatrix(DATA, WR, CS, CS2, CS3, CS4);

char tagStr[MAX_STRING_LEN] = "";
char dataStr[MAX_STRING_LEN] = "";
char tmpStr[MAX_STRING_LEN] = "";
char endTag[3] = {
  '<', '/', '\0'};
int len;

boolean tagFlag = false;
boolean dataFlag = false;

byte mac[] = { 
  0x90, 0xA2, 0xDA, 0x0D, 0x52, 0xBD };
byte ip[] = { 
  192, 168, 2, 3 };
byte server[] = { 
  83, 175, 126, 90 };

EthernetClient client;

void setup()
{
  Serial.begin(9600);
  matrix.begin(HT1632_COMMON_16NMOS);
  Serial.println("Starting IVB.at");
  Serial.println("connecting...");
  Ethernet.begin(mac, ip);
  delay(0);

  if (client.connect(server, 80)) {
    Serial.println("connected");
    client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.0");    
    client.println("Host: www.ivb.at");  
    client.println();
    delay(10);
  } 
  else {
    Serial.println("connection failed");
    matrix.clearScreen(); 
    matrix.setTextSize(1);
    matrix.setTextColor(1);
    {
      matrix.setCursor(0, 0);
      matrix.print("Bitte   beachten");
      matrix.setCursor(0, 8);
      matrix.print("Fahrplan");
      matrix.writeScreen();
    }
    delay(100);
  }  
}

void loop() {

  // Read serial data in from web:
  while (client.available()) {
    serialEvent(); 
  }

  if (!client.connected()) {
    //Serial.println();
    //Serial.println("Disconnected");
    client.stop();

    // Time until next update
    //Serial.println("Waiting");
    for (int t = 1; t <= 15; t++) {
      delay(1000); // 1 minute
    }

    if (client.connect(server, 80)) {
      //Serial.println("Reconnected");
      client.println("GET /smartinfo/ivb_smartinfo_kernel.php?olifServerId=84&autorefresh=20&default_autorefresh=20&routeId=&stopId=Absam%20Dorf&optDir=-1&nRows=4&showArrivals=n&optTime=now&time=&allLines=y&app=sionline HTTP/1.0");    
      client.println("Host: www.ivb.at");
      matrix.clearScreen();  
      client.println();
      delay(0);
    } 
    else {
      Serial.println("Reconnect failed");
    }       
  }
}

// Process each char from web
void serialEvent() {

  // Read a char
  char inChar = client.read();
  //Serial.print(".");

  if (inChar == '<') {
    addChar(inChar, tmpStr);
    tagFlag = true;
    dataFlag = false;

  } 
  else if (inChar == '>') {
    addChar(inChar, tmpStr);

    if (tagFlag) {      
      strncpy(tagStr, tmpStr, strlen(tmpStr)+1);
    }

    // Clear tmp
    clearStr(tmpStr);

    tagFlag = false;
    dataFlag = true;      

  } 
  else if (inChar != 10) {
    if (tagFlag) {
      // Add tag char to string
      addChar(inChar, tmpStr);

      // Check for </XML> end tag, ignore it
      if ( tagFlag && strcmp(tmpStr, endTag) == 0 ) {
        clearStr(tmpStr);
        tagFlag = false;
        dataFlag = false;
      }
    }

    if (dataFlag) {
      // Add data char to string
      addChar(inChar, dataStr);
    }
  }  

  // If a LF, process the line
  if (inChar == 10 ) {

    /*
      Serial.print("routeStr: ");
     Serial.println(directionStr);
     Serial.print("directionStr: ");
     Serial.println(directionStr);
     */

    // Find specific tags and print data
    if (matchTag("<time>")) {
      Serial.print(" Abfahrt: ");
      Serial.print(dataStr);
      String stringOne = dataStr;
      Serial.println(stringOne);
      String stringTwo = stringOne;
      stringTwo.replace("min","'");
      stringTwo.replace(":","");
      matrix.setTextSize(1);
      matrix.setTextColor(1);
      {
        matrix.setCursor(24, 0);
        matrix.println(stringTwo);
        matrix.writeScreen();
      }

    }
    if (matchTag("<direction>")) {
      Serial.print(" Ziel: ");
      Serial.print(dataStr);
      String reportString = dataStr;
      char mostSignificantDigit = reportString.charAt(0);
      matrix.setTextSize(1);
      matrix.setTextColor(1);
      {
        matrix.setCursor(16, 0);
        matrix.println(mostSignificantDigit);
        matrix.writeScreen();
      }
    }
    if (matchTag("<route>")) {
      Serial.print(" Linie: ");
      Serial.print(dataStr); 
      matrix.setTextSize(1);
      matrix.setTextColor(1);
      {
        matrix.setCursor(0, 0);
        matrix.print(dataStr);
        matrix.writeScreen();
      }
      }
      
    // Clear all strings
    clearStr(tmpStr);
    clearStr(tagStr);
    clearStr(dataStr);

    // Clear Flags
    tagFlag = false;
    dataFlag = false;
  }
}

/////////////////////
// Other Functions //
/////////////////////

// Function to clear a string
void clearStr (char* str) {
  int len = strlen(str);
  for (int c = 0; c < len; c++) {
    str[c] = 0;
  }
}

//Function to add a char to a string and check its length
void addChar (char ch, char* str) {
  char *tagMsg  = "<TRUNCATED_TAG>";
  char *dataMsg = "-TRUNCATED_DATA-";

  // Check the max size of the string to make sure it doesn't grow too
  // big.  If string is beyond MAX_STRING_LEN assume it is unimportant
  // and replace it with a warning message.
  if (strlen(str) > MAX_STRING_LEN - 2) {
    if (tagFlag) {
      clearStr(tagStr);
      strcpy(tagStr,tagMsg);
    }
    if (dataFlag) {
      clearStr(dataStr);
      strcpy(dataStr,dataMsg);
    }

    // Clear the time buffer and flags to stop current processing 
    clearStr(tmpStr);
    tagFlag = false;
    dataFlag = false;

  } 
  else {
    // Add char to string
    str[strlen(str)] = ch;
  }
}

// Function to check the current tag for a specific string
boolean matchTag (char* searchTag) {
  if ( strcmp(tagStr, searchTag) == 0 ) {
    return true;
  } 
  else {
    return false;
  }
}

Thanks!

Edit: as result, it should look like in Photo 3!