commit: 3e4e338133463e57125ade82fe5dfd57bb42dc1a
parent be19ae11fde9eb8cb28ff967ded15b457b5f80ea
Author: Sergey M․ <dstftw@gmail.com>
Date: Tue, 1 Dec 2020 01:53:12 +0700
[cspan] Extract info from jwplayer data (closes #3672, closes #3734, closes #10638, closes #13030, closes #18806, closes #23148, closes #24461, closes #26171, closes #26800, closes #27263)
Diffstat:
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
@@ -10,6 +10,8 @@ from ..utils import (
find_xpath_attr,
get_element_by_class,
int_or_none,
+ js_to_json,
+ merge_dicts,
smuggle_url,
unescapeHTML,
)
@@ -98,6 +100,26 @@ class CSpanIE(InfoExtractor):
bc_attr['data-bcid'])
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
+ def add_referer(formats):
+ for f in formats:
+ f.setdefault('http_headers', {})['Referer'] = url
+
+ # As of 01.12.2020 this path looks to cover all cases making the rest
+ # of the code unnecessary
+ jwsetup = self._parse_json(
+ self._search_regex(
+ r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if jwsetup:
+ info = self._parse_jwplayer_data(
+ jwsetup, video_id, require_title=False, m3u8_id='hls',
+ base_url=url)
+ add_referer(info['formats'])
+ ld_info = self._search_json_ld(webpage, video_id, default={})
+ return merge_dicts(info, ld_info)
+
+ # Obsolete
# We first look for clipid, because clipprog always appears before
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
@@ -165,8 +187,7 @@ class CSpanIE(InfoExtractor):
formats = self._extract_m3u8_formats(
path, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
- for f in formats:
- f.setdefault('http_headers', {})['Referer'] = url
+ add_referer(formats)
self._sort_formats(formats)
entries.append({
'id': '%s_%d' % (video_id, partnum + 1),