Parse XML to Extract Weather Data from Web

I couldn't find anything like this posted so I thought I'd throw it out here. I wanted the Arduino to get XML formatted weather data from a NOAA web site.

This kind of thing definitely isn't Arduino's strong suite but it seems to work great. As I'm no C lang guru, any hints for improvement appreciated.

//////////////////////////////////////////////
// Get XML formatted data from the web.
// 1/6/08 Bob S. - Created
//
//  Assumptions: single XML line looks like: 
//    <tag>data</tag> or <tag>data 
//
// Get current weather observation for Raleigh from weather.gov in XML format
//
//////////////////////////////////////////////

// Include description files for other libraries used (if any)
#include <string.h>
#include <Ethernet.h>

// Define Constants
// Max string length may have to be adjusted depending on data to be extracted
#define MAX_STRING_LEN  20

// Setup vars
char tagStr[MAX_STRING_LEN] = "";
char dataStr[MAX_STRING_LEN] = "";
char tmpStr[MAX_STRING_LEN] = "";
char endTag[3] = {'<', '/', '\0'};
int len;

// Flags to differentiate XML tags from document elements (ie. data)
boolean tagFlag = false;
boolean dataFlag = false;

// Ethernet vars
byte mac[] = { 0xDE, 0xAD, 0xBE, 0xEF, 0xFE, 0xED };
byte ip[] = { 192, 168, 0, 41 };
byte server[] = { 140, 90, 113, 200 }; // www.weather.gov

// Start ethernet client
Client client(server, 80);

void setup()
{
  Serial.begin(9600);
  Serial.println("Starting WebWx");
  Serial.println("connecting...");
  Ethernet.begin(mac, ip);
  delay(1000);

  if (client.connect()) {
    Serial.println("connected");
    client.println("GET /xml/current_obs/KRDU.xml HTTP/1.0");    
    client.println();
    delay(2000);
  } else {
    Serial.println("connection failed");
  }  
}

void loop() {

  // Read serial data in from web:
  while (client.available()) {
    serialEvent(); 
  }

  if (!client.connected()) {
    //Serial.println();
    //Serial.println("Disconnected");
    client.stop();

    // Time until next update
    //Serial.println("Waiting");
    for (int t = 1; t <= 15; t++) {
      delay(60000); // 1 minute
    }

    if (client.connect()) {
      //Serial.println("Reconnected");
      client.println("GET /xml/current_obs/KRDU.xml HTTP/1.0");    
      client.println();
      delay(2000);
    } else {
      Serial.println("Reconnect failed");
    }       
  }
}

// Process each char from web
void serialEvent() {

   // Read a char
      char inChar = client.read();
   //Serial.print(".");
  
   if (inChar == '<') {
      addChar(inChar, tmpStr);
      tagFlag = true;
      dataFlag = false;

   } else if (inChar == '>') {
      addChar(inChar, tmpStr);

      if (tagFlag) {      
         strncpy(tagStr, tmpStr, strlen(tmpStr)+1);
      }

      // Clear tmp
      clearStr(tmpStr);

      tagFlag = false;
      dataFlag = true;      
      
   } else if (inChar != 10) {
      if (tagFlag) {
         // Add tag char to string
         addChar(inChar, tmpStr);

         // Check for </XML> end tag, ignore it
         if ( tagFlag && strcmp(tmpStr, endTag) == 0 ) {
            clearStr(tmpStr);
            tagFlag = false;
            dataFlag = false;
         }
      }
      
      if (dataFlag) {
         // Add data char to string
         addChar(inChar, dataStr);
      }
   }  
  
   // If a LF, process the line
   if (inChar == 10 ) {

/*
      Serial.print("tagStr: ");
      Serial.println(tagStr);
      Serial.print("dataStr: ");
      Serial.println(dataStr);
*/

      // Find specific tags and print data
      if (matchTag("<temp_f>")) {
            Serial.print("Temp: ");
         Serial.print(dataStr);
      }
      if (matchTag("<relative_humidity>")) {
            Serial.print(", Humidity: ");
         Serial.print(dataStr);
      }
      if (matchTag("<pressure_in>")) {
            Serial.print(", Pressure: ");
         Serial.print(dataStr);
         Serial.println("");
      }

      // Clear all strings
      clearStr(tmpStr);
      clearStr(tagStr);
      clearStr(dataStr);

      // Clear Flags
      tagFlag = false;
      dataFlag = false;
   }
}

/////////////////////
// Other Functions //
/////////////////////

// Function to clear a string
void clearStr (char* str) {
   int len = strlen(str);
   for (int c = 0; c < len; c++) {
      str[c] = 0;
   }
}

//Function to add a char to a string and check its length
void addChar (char ch, char* str) {
   char *tagMsg  = "<TRUNCATED_TAG>";
   char *dataMsg = "-TRUNCATED_DATA-";

   // Check the max size of the string to make sure it doesn't grow too
   // big.  If string is beyond MAX_STRING_LEN assume it is unimportant
   // and replace it with a warning message.
   if (strlen(str) > MAX_STRING_LEN - 2) {
      if (tagFlag) {
         clearStr(tagStr);
         strcpy(tagStr,tagMsg);
      }
      if (dataFlag) {
         clearStr(dataStr);
         strcpy(dataStr,dataMsg);
      }

      // Clear the temp buffer and flags to stop current processing 
      clearStr(tmpStr);
      tagFlag = false;
      dataFlag = false;

   } else {
      // Add char to string
      str[strlen(str)] = ch;
   }
}

// Function to check the current tag for a specific string
boolean matchTag (char* searchTag) {
   if ( strcmp(tagStr, searchTag) == 0 ) {
      return true;
   } else {
      return false;
   }
}

Thanks for this. I was going to be looking into this myself soon to display weather data on an LED Message Board.

I take it the ethernet shield is required for this?

Yes, I used this code with the "official" Arduino ethernet shield. I'm sure it would probably work with some of the others as well.

I wanted the Arduino to get XML formatted weather data from a NOAA web site.

I too wish to get weather data for an Irrigation system project I have in mind. My early plans include using a Linux host to gather the weather forecast and send adjusted watering plan to an AVR using a MRMP Bridge<>Controller network.
MRMP? http://www.arduino.cc/cgi-bin/yabb2/YaBB.pl?num=1232140631

However your use of an Ethernet shield may be feasible.
How much code space remains after you upload your sketch?

My concern is that there may be too little space for all the 'necessary' irrigation functions.

Why do you thinks it's necessary to do that kind of work within the Arduino?

Create a script (I'd use PHP or PERL) within your own webspace that will connect to the NOAA site scrape the data you need, present it as a string in the exact format you want within the Arduino, and serve it as the web page.

Then point your Arduino at your script instead of the NOAA page and it will receive the data as a string already in the correct format.

Why do you thinks it's necessary to do that kind of work within the Arduino?

I think George wants to avoid having a PC in the loop. It will be interesting to see if the functionality can be achieved in the memory space of an arduino, particularly now that ATmega328 is just around the corner.

My way doesn't include a PC in the loop (as part of the project), just a web server somewhere in the world that he can put a little script on. If he doesn't have access to a webserver, I'd be surprised if nobody here will offer to host the file for him. Up until about a year ago, I could have done it easily for him but I'm not set up for that right now.

Conceptually, to me at least, calling that a PC in the loop is no different from calling the NOAA server a PC in the loop.

Thanks for the clarification, I do now see what you mean. I still thinks its worth exploring to see if it can be done without the need for that extra server.

Well I dont have much to say about your way of trying but i think you should try python for the web server script. It can do wonders with xml in a simple fashion.

thank you for sharing! I bought my arduino for a very similar application... nice to see your code!

I am a NOOB but I have a simple understanding of python and html...
I would be interested interested in putting up a script on a server...
Could someone describe briefly how is this achieved? Just a few lines to get me started? Just so that I can start googling for sources?
Any help would be appreciated

When I google "python xml parsing", my first hit is:
http://oreilly.com/catalog/pythonxml/chapter/ch01.html

It looks useful, but very basic (it seems to assume you don't know what xml is or why you'd use it).

thank you!

Hi,

was just wondering whether you can get this to work without looking for line feeds? I've tried tweaking it but can't seem to get it read XML without line feeds.

Cheers

Boppyer

I think George wants to avoid having a PC in the loop. It will be interesting to see if the functionality can be achieved in the memory space of an arduino, particularly now that ATmega328 is just around the corner.

I went the route of using a PC to parse the XML file using Perl. I already use the PC to push the UNIX time and other tasks to a MRMP Bridge. I suspect it would be practical to use an Ethernet shield to do this as well.

This is the forecast flow...
At 05:00...
Environment Canada forecast XML > Linux Perl parse > MRMP* Bridge <> MRMP Irrigation Controller > Zone

The Perl script parses the XML for Probability of Percipitation (POP) and Forecast mm rain (mm) early in the morning. POP and mm sent sent serial using MRMP to the Irrigation controller. The Irrigation controller then has a threshold set for mm or POP. If the threshold is reached, the selected zone (of 4) watering is delayed until 23:00. At 23:00, the controller adjusts the planned mm applied to the zone deducting mm measured by a tipping bucket rain gauge over the last 24 hours.

Has worked well for the past three months.

*MRMP http://www.arduino.cc/cgi-bin/yabb2/YaBB.pl?num=1232140631