logo

multimedia-dl

Unnamed repository; edit this file 'description' to name the repository. git clone https://anongit.hacktivis.me/git/multimedia-dl.git/

generic.pl (1274B)


  1. #!/usr/bin/env perl
  2. # Multimedia-DL: Youtube-DL inspired scraper
  3. # Copyright © 2021 Multimedia-DL Authors <https://hacktivis.me/git/multimedia-dl/>
  4. # SPDX-License-Identifier: AGPL-3-only
  5. use strict;
  6. use utf8;
  7. use HTML::TreeBuilder::XPath;
  8. use LWP::UserAgent;
  9. my $ua = LWP::UserAgent->new;
  10. my $webkit_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15";
  11. $ua->agent($webkit_ua . "Multimedia-DL/1.0");
  12. if($#ARGV != 0) {
  13. print "usage: multimedia-dl <url>\n";
  14. exit 1;
  15. }
  16. my $req = HTTP::Request->new(GET => $ARGV[0]);
  17. my $res = $ua->request($req);
  18. sub scrap_html_response {
  19. my ($res) = @_;
  20. my $tree = HTML::TreeBuilder::XPath->new_from_content($res->content) or die "HTML parsing failed";
  21. my $title = $tree->findvalue('//title');
  22. if($title) {
  23. print "Title: ", $title, "\n";
  24. }
  25. foreach($tree->findvalues('//video/@src')) {
  26. print $_, "\n";
  27. }
  28. foreach($tree->findvalues('//video/source/@src')) {
  29. print $_, "\n";
  30. }
  31. # TODO: meta og:video
  32. }
  33. if($res->is_success) {
  34. my $content_type = $res->header("Content-Type");
  35. if(($content_type == "text/html") or ($content_type == "application/xhtml+xml")) {
  36. scrap_html_response($res);
  37. }
  38. } else {
  39. print "Got ", $res->status_line, " instead of 2xx\n";
  40. }