logo

multimedia-dl

Unnamed repository; edit this file 'description' to name the repository.
commit: 0154fe62819ac4ccf7f4466e5c9ed4751a2d4f73
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Sat,  5 Feb 2022 18:48:32 +0100

Initial Commit

Diffstat:

Aextract-links.pl58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ageneric.pl56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aopenings-moe.pl62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 176 insertions(+), 0 deletions(-)

diff --git a/extract-links.pl b/extract-links.pl @@ -0,0 +1,58 @@ +#!/usr/bin/env perl +# Multimedia-DL: Youtube-DL inspired scraper +# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/> +# SPDX-License-Identifier: AGPL-3-only +use strict; +use utf8; + +use HTML::TreeBuilder; +use HTML::TreeBuilder::XPath; +use LWP::UserAgent; +use URI; + +my $ua = LWP::UserAgent->new; + + +my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"; + +$ua->agent($webkit_ua . "Multimedia-DL/1.0"); + +if($#ARGV != 0) { + print "usage: multimedia-dl <url>\n"; + exit 1; +} + +my $req = HTTP::Request->new(GET => $ARGV[0]); + +my $res = $ua->request($req); + +sub scrap_html_response { + my ($res) = @_; + + my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed"; + + foreach($tree->findvalues('//a/@href')) { + print URI->new_abs($_, $res->base), "\n"; + } + + foreach($tree->findvalues('//link/@href')) { + print URI->new_abs($_, $res->base), "\n"; + } + + foreach($tree->findvalues('//@src')) { + print URI->new_abs($_, $res->base), "\n"; + } + +} + +if($res->is_success) { + my $content_type = $res->header("Content-Type"); + + if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) { + scrap_html_response($res); + } else { + print "Doesn't seems to be HTML\n"; + } +} else { + print "Got ", $res->status_line, " instead of 2xx\n"; +} diff --git a/generic.pl b/generic.pl @@ -0,0 +1,56 @@ +#!/usr/bin/env perl +# Multimedia-DL: Youtube-DL inspired scraper +# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/> +# SPDX-License-Identifier: AGPL-3-only +use strict; +use utf8; +use HTML::TreeBuilder::XPath; + +use LWP::UserAgent; + +my $ua = LWP::UserAgent->new; + + +my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"; + +$ua->agent($webkit_ua . "Multimedia-DL/1.0"); + +if($#ARGV != 0) { + print "usage: multimedia-dl <url>\n"; + exit 1; +} + +my $req = HTTP::Request->new(GET => $ARGV[0]); + +my $res = $ua->request($req); + +sub scrap_html_response { + my ($res) = @_; + + my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed"; + + my $title = $tree->findvalue('//title'); + if($title) { + print "Title: ", $title, "\n"; + } + + foreach($tree->findvalues('//video/@src')) { + print $_, "\n"; + } + + foreach($tree->findvalues('//video/source/@src')) { + print $_, "\n"; + } + + # TODO: meta og:video +} + +if($res->is_success) { + my $content_type = $res->header("Content-Type"); + + if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) { + scrap_html_response($res); + } +} else { + print "Got ", $res->status_line, " instead of 2xx\n"; +} diff --git a/openings-moe.pl b/openings-moe.pl @@ -0,0 +1,62 @@ +#!/usr/bin/env perl +# Multimedia-DL: Youtube-DL inspired scraper +# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/> +# SPDX-License-Identifier: AGPL-3-only +use strict; +use utf8; +use HTML::TreeBuilder::XPath; +use URI; +use LWP::UserAgent; +require HTTP::Request; + +my $ua = LWP::UserAgent->new; + +# Picked this one for it's stability +my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"; + +$ua->agent($webkit_ua . "Multimedia-DL/1.0"); + +if($#ARGV != 0) { + print "usage: multimedia-dl <url>\n"; + exit 1; +} + +my $req = HTTP::Request->new(GET => $ARGV[0]); + +my $res = $ua->request($req); + +sub scrap_html_response { + my ($res) = @_; + + my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed"; + + my $title = $tree->findvalue('//title'); + if($title) { + print STDERR "Title: ", $title, "\n"; + } + + foreach($tree->findvalues('//a[@download]/@href')) { + my $href = $_; + my $href_abs = URI->new_abs($_, $res->base); + + if ($href =~ /^subtitles\//) { + print "--sub-file='", $href_abs, "' "; + } else { + print "'", $href_abs, "' "; + } + } + + print "--title='", $title, "' "; + + print "\n"; +} + +if($res->is_success) { + my $content_type = $res->header("Content-Type"); + + if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) { + scrap_html_response($res); + } +} else { + print "Got ", $res->status_line, " instead of 2xx\n"; +}