logo

youtube-dl

[mirror] Download/Watch videos from video hosters
commit: 5a1fbbf8b7215aab0e6382e93eaa1561093352cf
parent e2bdf8bf4f3de7698d1d2844687e3acc760b34e7
Author: Sergey M․ <dstftw@gmail.com>
Date:   Wed,  9 Dec 2020 00:05:21 +0700

[extractor/common] Fix inline HTML5 media tags processing and add test (closes #27345)

Diffstat:

Mtest/test_InfoExtractor.py12++++++++++++
Myoutube_dl/extractor/common.py6+++---
2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py @@ -108,6 +108,18 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) def test_parse_html5_media_entries(self): + # inline video tag + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://127.0.0.1/video.html', + r'<html><video src="/vid.mp4" /></html>', None)[0], + { + 'formats': [{ + 'url': 'https://127.0.0.1/vid.mp4', + }], + }) + # from https://www.r18.com/ # with kpbs in label expect_dict( diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -2515,9 +2515,9 @@ class InfoExtractor(object): # https://www.ampproject.org/docs/reference/components/amp-video) # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/ _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)' - media_tags = [(media_tag, media_type, '') - for media_tag, media_type - in re.findall(r'(?s)(<%s[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)] + media_tags = [(media_tag, media_tag_name, media_type, '') + for media_tag, media_tag_name, media_type + in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)] media_tags.extend(re.findall( # We only allow video|audio followed by a whitespace or '>'. # Allowing more characters may end up in significant slow down (see