logo

scripts

A bunch of scripts, some to be moved to their own repository git clone https://hacktivis.me/git/scripts.git
commit: 286ec040d3eb2df7cba1ebce86474cd73a241316
parent 8b5e1cfe18a4bca45216f750ea11ba462822f450
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Wed, 14 Dec 2022 11:58:10 +0100

monecowatt.pm: Scrapping de monecowatt.fr

Diffstat:

Amonecowatt.pm46++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+), 0 deletions(-)

diff --git a/monecowatt.pm b/monecowatt.pm @@ -0,0 +1,46 @@ +#!/usr/bin/env perl +# J'emmerde les données dites "publiques" avec une API verrouillée. +# Copyright © 2022 Haelwenn (lanodan) Monnier <contact+monecowatt.fr@hacktivis.me> +# SPDX-License-Identifier: AGPL-3-only +use strict; +use utf8; + +use HTML::TreeBuilder; +use HTML::TreeBuilder::XPath; +use LWP::UserAgent; +use URI; + +my $ua = LWP::UserAgent->new; + +$ua->agent('Monécowatt scrapper, HTML reste la seule API ouverte <contact+monecowatt.fr@hacktivis.me>'); + +sub scrap_html_response { + my ($res) = @_; + + my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed"; + + foreach($tree->findnodes('//*[@id="previsions"]/div[1]/div/div')) { + my $jour = $_->findvalue('div'); + $jour =~ s@^ *@@; + my $status = $_->findvalue('div/img/@src'); + $status =~ s@/courbes-signaux/courbe-signal-([^\.\-]*).png@\1@; + + print $jour, ": ", $status, "\n"; + } +} + +my $req = HTTP::Request->new(GET => "https://www.monecowatt.fr/"); + +my $res = $ua->request($req); + +if($res->is_success) { + my $content_type = $res->header("Content-Type"); + + if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) { + scrap_html_response($res); + } else { + print "La réponse reçue n'est pas de l'HTML\n"; + } +} else { + print "Erreur ", $res->status_line, " obtenue au lieu de 2xx\n"; +}