logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git
commit: f66372403fd9e1661199fea100ba2600fa9697b2
parent 7216fa2ac4706e099ea2ad9a04fe7bf4300bc745
Author: dirkf <fieldhouse@gmx.net>
Date:   Wed, 21 Feb 2024 00:09:48 +0000

[InfoExtractor] Rework and improve JWPlayer extraction
* use traverse_obj() and _search_json()
* support playlist `.load({**video1},{**video2}, ...)`
* support transform_source=... for _extract_jwplayer_data()

Diffstat:

Myoutube_dl/extractor/common.py55++++++++++++++++++++++---------------------------------
1 file changed, 22 insertions(+), 33 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -3021,25 +3021,22 @@ class InfoExtractor(object): return formats def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): - mobj = re.search( - r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''', - webpage) - if mobj: - try: - jwplayer_data = self._parse_json(mobj.group('options'), - video_id=video_id, - transform_source=transform_source) - except ExtractorError: - pass - else: - if isinstance(jwplayer_data, dict): - return jwplayer_data + return self._search_json( + r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''', + webpage, 'JWPlayer data', video_id, + # must be a {...} or sequence, ending + contains_pattern=r'\{[\s\S]*}(?(load)(?:\s*,\s*\{[\s\S]*})*)', end_pattern=r'(?(load)\]|\))', + transform_source=transform_source, default=None) def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): - jwplayer_data = self._find_jwplayer_data( - webpage, video_id, transform_source=js_to_json) - return self._parse_jwplayer_data( - jwplayer_data, video_id, *args, **kwargs) + + # allow passing `transform_source` through to _find_jwplayer_data() + transform_source = kwargs.pop('transform_source', None) + kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {} + + jwplayer_data = self._find_jwplayer_data(webpage, video_id, **kwfind) + + return self._parse_jwplayer_data(jwplayer_data, video_id, *args, **kwargs) def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): @@ -3073,22 +3070,14 @@ class InfoExtractor(object): mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) subtitles = {} - tracks = video_data.get('tracks') - if tracks and isinstance(tracks, list): - for track in tracks: - if not isinstance(track, dict): - continue - track_kind = track.get('kind') - if not track_kind or not isinstance(track_kind, compat_str): - continue - if track_kind.lower() not in ('captions', 'subtitles'): - continue - track_url = urljoin(base_url, track.get('file')) - if not track_url: - continue - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track_url) - }) + for track in traverse_obj(video_data, ( + 'tracks', lambda _, t: t.get('kind').lower() in ('captions', 'subtitles'))): + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) entry = { 'id': this_video_id,