Initial Commit - multimedia-dl - Unnamed repository; edit this file 'description' to name the repository.

commit: 0154fe62819ac4ccf7f4466e5c9ed4751a2d4f73
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Sat,  5 Feb 2022 18:48:32 +0100

Initial Commit

Diffstat:
A extract-links.pl 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A generic.pl 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A openings-moe.pl 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

3 files changed, 176 insertions(+), 0 deletions(-)
diff --git a/extract-links.pl b/extract-links.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/env perl
+# Multimedia-DL: Youtube-DL inspired scraper
+# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/>
+# SPDX-License-Identifier: AGPL-3-only
+use strict;
+use utf8;
+
+use HTML::TreeBuilder;
+use HTML::TreeBuilder::XPath;
+use LWP::UserAgent;
+use URI;
+
+my $ua = LWP::UserAgent->new;
+
+
+my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15";
+
+$ua->agent($webkit_ua . "Multimedia-DL/1.0");
+
+if($#ARGV != 0) {
+	print "usage: multimedia-dl <url>\n";
+	exit 1;
+}
+
+my $req = HTTP::Request->new(GET => $ARGV[0]);
+
+my $res = $ua->request($req);
+
+sub scrap_html_response {
+	my ($res) = @_;
+
+	my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
+
+	foreach($tree->findvalues('//a/@href')) {
+		print URI->new_abs($_, $res->base), "\n";
+	}
+
+	foreach($tree->findvalues('//link/@href')) {
+		print URI->new_abs($_, $res->base), "\n";
+	}
+
+	foreach($tree->findvalues('//@src')) {
+		print URI->new_abs($_, $res->base), "\n";
+	}
+
+}
+
+if($res->is_success) {
+	my $content_type = $res->header("Content-Type");
+
+	if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
+		scrap_html_response($res);
+	} else {
+		print "Doesn't seems to be HTML\n";
+	}
+} else {
+	print "Got ", $res->status_line, " instead of 2xx\n";
+}
diff --git a/generic.pl b/generic.pl
@@ -0,0 +1,56 @@
+#!/usr/bin/env perl
+# Multimedia-DL: Youtube-DL inspired scraper
+# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/>
+# SPDX-License-Identifier: AGPL-3-only
+use strict;
+use utf8;
+use HTML::TreeBuilder::XPath;
+
+use LWP::UserAgent;
+
+my $ua = LWP::UserAgent->new;
+
+
+my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15";
+
+$ua->agent($webkit_ua . "Multimedia-DL/1.0");
+
+if($#ARGV != 0) {
+	print "usage: multimedia-dl <url>\n";
+	exit 1;
+}
+
+my $req = HTTP::Request->new(GET => $ARGV[0]);
+
+my $res = $ua->request($req);
+
+sub scrap_html_response {
+	my ($res) = @_;
+
+	my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
+
+	my $title = $tree->findvalue('//title');
+	if($title) {
+		print "Title: ", $title, "\n";
+	}
+
+	foreach($tree->findvalues('//video/@src')) {
+		print $_, "\n";
+	}
+
+	foreach($tree->findvalues('//video/source/@src')) {
+		print $_, "\n";
+	}
+
+	# TODO: meta og:video
+}
+
+if($res->is_success) {
+	my $content_type = $res->header("Content-Type");
+
+	if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
+		scrap_html_response($res);
+	}
+} else {
+	print "Got ", $res->status_line, " instead of 2xx\n";
+}
diff --git a/openings-moe.pl b/openings-moe.pl
@@ -0,0 +1,62 @@
+#!/usr/bin/env perl
+# Multimedia-DL: Youtube-DL inspired scraper
+# Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/>
+# SPDX-License-Identifier: AGPL-3-only
+use strict;
+use utf8;
+use HTML::TreeBuilder::XPath;
+use URI;
+use LWP::UserAgent;
+require HTTP::Request;
+
+my $ua = LWP::UserAgent->new;
+
+# Picked this one for it's stability
+my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15";
+
+$ua->agent($webkit_ua . "Multimedia-DL/1.0");
+
+if($#ARGV != 0) {
+	print "usage: multimedia-dl <url>\n";
+	exit 1;
+}
+
+my $req = HTTP::Request->new(GET => $ARGV[0]);
+
+my $res = $ua->request($req);
+
+sub scrap_html_response {
+	my ($res) = @_;
+
+	my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
+
+	my $title = $tree->findvalue('//title');
+	if($title) {
+		print STDERR "Title: ", $title, "\n";
+	}
+
+	foreach($tree->findvalues('//a[@download]/@href')) {
+		my $href = $_;
+		my $href_abs = URI->new_abs($_, $res->base);
+
+		if ($href =~ /^subtitles\//) {
+			print "--sub-file='", $href_abs, "' ";
+		} else {
+			print "'", $href_abs, "' ";
+		}
+	}
+
+	print "--title='", $title, "' ";
+
+	print "\n";
+}
+
+if($res->is_success) {
+	my $content_type = $res->header("Content-Type");
+
+	if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
+		scrap_html_response($res);
+	}
+} else {
+	print "Got ", $res->status_line, " instead of 2xx\n";
+}

A	extract-links.pl	58	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	generic.pl	56	++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	openings-moe.pl	62	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++