Hi there,
I tried web scraping on google map without success from my esp8266.
I tried with python from PC and it works but on my NodeMCU there doesn't seem to be enough buffer to hold this big string.
Here is my code:
#include <ESP8266WiFi.h>
const char* ssid = "mySSID";
const char* password = "myPassword";
const char* host = "www.google.com";
String url = "/maps/dir/Holsten's+Ice+Cream,+Chocolate+%26+Restaurant/Hilton+Meadowlands,+Two+Meadowlands+Plaza,+East+Rutherford,+NJ+07073,+United+States/@40.7980681,-74.1696393,14z/data=!4m14!4m13!1m5!1m1!1s0x89c2ff8eccccef2b:0x92a0fedfb95721e5!2m2!1d-74.1867349!2d40.8283166!1m5!1m1!1s0x89c257d534f587a9:0xc1913bca390e84f9!2m2!1d-74.0781257!2d40.8052015!3e3?entry=ttu";
void setup() {
Serial.begin(115200);
delay(10);
// We start by connecting to a WiFi network
Serial.println();
Serial.println();
Serial.print("Connecting to ");
Serial.println(ssid);
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED) {
delay(500);
Serial.print(".");
}
Serial.println("");
Serial.println("WiFi connected");
Serial.println("IP address: ");
Serial.println(WiFi.localIP());
}
int value = 0;
void loop() {
if(value == 0){
int linecounter=0;
String vindchillString;
float vindchill;
String line="";
++value;
Serial.print("connecting to ");
Serial.println(host);
// Use WiFiClient class to create TCP connections
WiFiClientSecure client;
client.setInsecure();
const int httpPort = 443;
if (!client.connect(host, httpPort)) {
Serial.println("connection failed");
delay(5000);
return;
}
// give the esp a breather
yield();
Serial.print("Requesting URL: ");
Serial.println(url);
// This will send the request to the server
// The header should look like this:
/*
> GET /data/stenomuseet.htm HTTP/1.1
> Host: vejret.stenomuseet.dk
> Connection: close
>
*/
client.print(
String("GET ") + url + " HTTP/1.1\r\n" + //Server + protocol
"Host: " + host + "\r\n" + // Host
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0"+ "\r\n" + // User agent
"Connection: close\r\n" + // I don't want to talk after you sent me the data.
"\r\n" // Blank line at end of header.
);
delay(5000); //wait a while to let the server respond (increase on shitty connections)
// if (httpCode == HTTP_CODE_OK || httpCode == HTTP_CODE_MOVED_PERMANENTLY)
// Read all the lines of the reply from server
while(client.available()){
line = client.readStringUntil('\r'); //cariage return as delimiter
Serial.println(line.substring(0, 200)); //print out every received line.
Serial.println(line.length());
linecounter++; //count up for every line we read.
yield(); //Avoid a crash by letting the ESP core handle its wifi stuff.
}
Serial.println();
Serial.println("closing connection");
delay(10000); //run every 10 secound
}
}
Serial log:
HTTP/1.1 200 OK
03:26:04.935 -> 15
03:26:04.935 ->
03:26:04.935 -> Date: Sun, 24 Dec 2023 10:26:00 GMT
03:26:04.935 -> 36
...
03:26:28.684 -> 5
03:26:28.844 ->
03:26:28.844 -> line-height:29px;min-width:54px;padding:0 8px;text-align:center;text-decoration:none!important;border-radius:2px;-moz-user-select:-moz-none}.gbqfba:focus{border:1px solid #4d90fe;outline:none;box-sha
03:26:28.844 -> 6175
03:26:28.844 ->
03:26:28.844 -> 1413
03:26:28.844 -> 5
03:26:29.005 ->
03:26:29.005 -> tant;padding-top:0;position:relative;top:310px}.gbqfh #gbqf{margin:auto;min-width:534px;padding:0 !important}.gbqfh #gbqfbw{display:none}.gbqfh #gbqfbwa{display:block}.gbqfh #gbqf{max-width:${elastic
03:26:29.005 -> 5140
03:26:29.005 ->
03:26:29.005 -> 0
03:26:29.005 -> 2
03:26:29.005 ->
03:26:29.005 ->
03:26:29.005 -> 1
03:26:33.995 ->
03:26:33.995 ->
03:26:33.995 -> 1
03:26:33.995 ->
03:26:33.995 -> closing connection
I'm just trying to extract the next coming bus/train info:
I'm sure there's an api for that, but in the past I've often received emails from Google saying I need to update this or that months or years after I've finished a project, and it's tiring, so I try to avoid Google Api if possible.
Is it possible to scrap a heavy html page like Google map from my ESP?
Thanks