commit: 526b3b071632bc3c840ae4dd3579e015f41df6f5
parent 61f92af1cfacb9a5a6e368d0093fb71dbac0af6b
Author: Sergey M․ <dstftw@gmail.com>
Date: Mon, 23 Nov 2015 21:14:03 +0600
[youtube] Clarify ytplayer.config extraction rationale
Diffstat:
1 file changed, 7 insertions(+), 0 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
@@ -898,6 +898,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_ytplayer_config(self, video_id, webpage):
patterns = (
+ # User data may contain arbitrary character sequences that may affect
+ # JSON extraction with regex, e.g. when '};' is contained the second
+ # regex won't capture the whole JSON. Yet working around by trying more
+ # concrete regex first keeping in mind proper quoted string handling
+ # to be implemented in future that will replace this workaround (see
+ # https://github.com/rg3/youtube-dl/issues/7468,
+ # https://github.com/rg3/youtube-dl/pull/7599)
r';ytplayer\.config\s*=\s*({.+?});ytplayer',
r';ytplayer\.config\s*=\s*({.+?});',
)