logo

multimedia-dl

Unnamed repository; edit this file 'description' to name the repository.

openings-moe.pl (1474B)


  1. #!/usr/bin/env perl
  2. # Multimedia-DL: Youtube-DL inspired scraper
  3. # Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/>
  4. # SPDX-License-Identifier: AGPL-3-only
  5. use strict;
  6. use utf8;
  7. use HTML::TreeBuilder::XPath;
  8. use URI;
  9. use LWP::UserAgent;
  10. require HTTP::Request;
  11. my $ua = LWP::UserAgent->new;
  12. # Picked this one for it's stability
  13. my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15";
  14. $ua->agent($webkit_ua . "Multimedia-DL/1.0");
  15. if($#ARGV != 0) {
  16. print "usage: multimedia-dl <url>\n";
  17. exit 1;
  18. }
  19. my $req = HTTP::Request->new(GET => $ARGV[0]);
  20. my $res = $ua->request($req);
  21. sub scrap_html_response {
  22. my ($res) = @_;
  23. my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
  24. my $title = $tree->findvalue('//title');
  25. if($title) {
  26. print STDERR "Title: ", $title, "\n";
  27. }
  28. foreach($tree->findvalues('//a[@download]/@href')) {
  29. my $href = $_;
  30. my $href_abs = URI->new_abs($_, $res->base);
  31. if ($href =~ /^subtitles\//) {
  32. print "--sub-file='", $href_abs, "' ";
  33. } else {
  34. print "'", $href_abs, "' ";
  35. }
  36. }
  37. print "--title='", $title, "' ";
  38. print "\n";
  39. }
  40. if($res->is_success) {
  41. my $content_type = $res->header("Content-Type");
  42. if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
  43. scrap_html_response($res);
  44. }
  45. } else {
  46. print "Got ", $res->status_line, " instead of 2xx\n";
  47. }