commit: 286ec040d3eb2df7cba1ebce86474cd73a241316
parent 8b5e1cfe18a4bca45216f750ea11ba462822f450
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Wed, 14 Dec 2022 11:58:10 +0100
monecowatt.pm: Scrapping de monecowatt.fr
Diffstat:
1 file changed, 46 insertions(+), 0 deletions(-)
diff --git a/monecowatt.pm b/monecowatt.pm
@@ -0,0 +1,46 @@
+#!/usr/bin/env perl
+# J'emmerde les données dites "publiques" avec une API verrouillée.
+# Copyright © 2022 Haelwenn (lanodan) Monnier <contact+monecowatt.fr@hacktivis.me>
+# SPDX-License-Identifier: AGPL-3-only
+use strict;
+use utf8;
+
+use HTML::TreeBuilder;
+use HTML::TreeBuilder::XPath;
+use LWP::UserAgent;
+use URI;
+
+my $ua = LWP::UserAgent->new;
+
+$ua->agent('Monécowatt scrapper, HTML reste la seule API ouverte <contact+monecowatt.fr@hacktivis.me>');
+
+sub scrap_html_response {
+ my ($res) = @_;
+
+ my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
+
+ foreach($tree->findnodes('//*[@id="previsions"]/div[1]/div/div')) {
+ my $jour = $_->findvalue('div');
+ $jour =~ s@^ *@@;
+ my $status = $_->findvalue('div/img/@src');
+ $status =~ s@/courbes-signaux/courbe-signal-([^\.\-]*).png@\1@;
+
+ print $jour, ": ", $status, "\n";
+ }
+}
+
+my $req = HTTP::Request->new(GET => "https://www.monecowatt.fr/");
+
+my $res = $ua->request($req);
+
+if($res->is_success) {
+ my $content_type = $res->header("Content-Type");
+
+ if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
+ scrap_html_response($res);
+ } else {
+ print "La réponse reçue n'est pas de l'HTML\n";
+ }
+} else {
+ print "Erreur ", $res->status_line, " obtenue au lieu de 2xx\n";
+}