commit: 0154fe62819ac4ccf7f4466e5c9ed4751a2d4f73
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Sat, 5 Feb 2022 18:48:32 +0100
Initial Commit
Diffstat:
A | extract-links.pl | 58 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | generic.pl | 56 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | openings-moe.pl | 62 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 176 insertions(+), 0 deletions(-)
diff --git a/extract-links.pl b/extract-links.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/env perl
+# Multimedia-DL: Youtube-DL inspired scraper
+# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/>
+# SPDX-License-Identifier: AGPL-3-only
+use strict;
+use utf8;
+
+use HTML::TreeBuilder;
+use HTML::TreeBuilder::XPath;
+use LWP::UserAgent;
+use URI;
+
+my $ua = LWP::UserAgent->new;
+
+
+my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15";
+
+$ua->agent($webkit_ua . "Multimedia-DL/1.0");
+
+if($#ARGV != 0) {
+ print "usage: multimedia-dl <url>\n";
+ exit 1;
+}
+
+my $req = HTTP::Request->new(GET => $ARGV[0]);
+
+my $res = $ua->request($req);
+
+sub scrap_html_response {
+ my ($res) = @_;
+
+ my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
+
+ foreach($tree->findvalues('//a/@href')) {
+ print URI->new_abs($_, $res->base), "\n";
+ }
+
+ foreach($tree->findvalues('//link/@href')) {
+ print URI->new_abs($_, $res->base), "\n";
+ }
+
+ foreach($tree->findvalues('//@src')) {
+ print URI->new_abs($_, $res->base), "\n";
+ }
+
+}
+
+if($res->is_success) {
+ my $content_type = $res->header("Content-Type");
+
+ if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
+ scrap_html_response($res);
+ } else {
+ print "Doesn't seems to be HTML\n";
+ }
+} else {
+ print "Got ", $res->status_line, " instead of 2xx\n";
+}
diff --git a/generic.pl b/generic.pl
@@ -0,0 +1,56 @@
+#!/usr/bin/env perl
+# Multimedia-DL: Youtube-DL inspired scraper
+# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/>
+# SPDX-License-Identifier: AGPL-3-only
+use strict;
+use utf8;
+use HTML::TreeBuilder::XPath;
+
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new;
+
+
+my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15";
+
+$ua->agent($webkit_ua . "Multimedia-DL/1.0");
+
+if($#ARGV != 0) {
+ print "usage: multimedia-dl <url>\n";
+ exit 1;
+}
+
+my $req = HTTP::Request->new(GET => $ARGV[0]);
+
+my $res = $ua->request($req);
+
+sub scrap_html_response {
+ my ($res) = @_;
+
+ my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
+
+ my $title = $tree->findvalue('//title');
+ if($title) {
+ print "Title: ", $title, "\n";
+ }
+
+ foreach($tree->findvalues('//video/@src')) {
+ print $_, "\n";
+ }
+
+ foreach($tree->findvalues('//video/source/@src')) {
+ print $_, "\n";
+ }
+
+ # TODO: meta og:video
+}
+
+if($res->is_success) {
+ my $content_type = $res->header("Content-Type");
+
+ if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
+ scrap_html_response($res);
+ }
+} else {
+ print "Got ", $res->status_line, " instead of 2xx\n";
+}
diff --git a/openings-moe.pl b/openings-moe.pl
@@ -0,0 +1,62 @@
+#!/usr/bin/env perl
+# Multimedia-DL: Youtube-DL inspired scraper
+# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/>
+# SPDX-License-Identifier: AGPL-3-only
+use strict;
+use utf8;
+use HTML::TreeBuilder::XPath;
+use URI;
+use LWP::UserAgent;
+require HTTP::Request;
+
+my $ua = LWP::UserAgent->new;
+
+# Picked this one for it's stability
+my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15";
+
+$ua->agent($webkit_ua . "Multimedia-DL/1.0");
+
+if($#ARGV != 0) {
+ print "usage: multimedia-dl <url>\n";
+ exit 1;
+}
+
+my $req = HTTP::Request->new(GET => $ARGV[0]);
+
+my $res = $ua->request($req);
+
+sub scrap_html_response {
+ my ($res) = @_;
+
+ my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
+
+ my $title = $tree->findvalue('//title');
+ if($title) {
+ print STDERR "Title: ", $title, "\n";
+ }
+
+ foreach($tree->findvalues('//a[@download]/@href')) {
+ my $href = $_;
+ my $href_abs = URI->new_abs($_, $res->base);
+
+ if ($href =~ /^subtitles\//) {
+ print "--sub-file='", $href_abs, "' ";
+ } else {
+ print "'", $href_abs, "' ";
+ }
+ }
+
+ print "--title='", $title, "' ";
+
+ print "\n";
+}
+
+if($res->is_success) {
+ my $content_type = $res->header("Content-Type");
+
+ if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
+ scrap_html_response($res);
+ }
+} else {
+ print "Got ", $res->status_line, " instead of 2xx\n";
+}