commit: 4ef1fc970727a1bbdd9f20bea54c37deb8794fb0
parent f9e6aa1dcfa5e5731f04b407ac00ef24009891a3
Author: Remita Amine <remitamine@gmail.com>
Date: Thu, 24 Dec 2020 16:05:03 +0100
[youtube] fix automatic captions extraction(closes #27162)(closes #27388)
Diffstat:
1 file changed, 14 insertions(+), 18 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
@@ -1322,17 +1322,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
- def _get_automatic_captions(self, video_id, webpage):
+ def _get_automatic_captions(self, video_id, player_response, player_config):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
self.to_screen('%s: Looking for automatic captions' % video_id)
- player_config = self._get_ytplayer_config(video_id, webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
- if not player_config:
+ if not (player_response or player_config):
self._downloader.report_warning(err_msg)
return {}
try:
- args = player_config['args']
+ args = player_config.get('args') if player_config else {}
caption_url = args.get('ttsurl')
if caption_url:
timestamp = args['timestamp']
@@ -1391,19 +1390,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return captions
# New captions format as of 22.06.2017
- player_response = args.get('player_response')
- if player_response and isinstance(player_response, compat_str):
- player_response = self._parse_json(
- player_response, video_id, fatal=False)
- if player_response:
- renderer = player_response['captions']['playerCaptionsTracklistRenderer']
- base_url = renderer['captionTracks'][0]['baseUrl']
- sub_lang_list = []
- for lang in renderer['translationLanguages']:
- lang_code = lang.get('languageCode')
- if lang_code:
- sub_lang_list.append(lang_code)
- return make_captions(base_url, sub_lang_list)
+ if player_response:
+ renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+ base_url = renderer['captionTracks'][0]['baseUrl']
+ sub_lang_list = []
+ for lang in renderer['translationLanguages']:
+ lang_code = lang.get('languageCode')
+ if lang_code:
+ sub_lang_list.append(lang_code)
+ return make_captions(base_url, sub_lang_list)
# Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA)
@@ -1652,6 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get video info
video_info = {}
embed_webpage = None
+ ytplayer_config = None
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
age_gate = True
@@ -2276,7 +2272,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
- automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
+ automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
video_duration = try_get(
video_info, lambda x: int_or_none(x['length_seconds'][0]))