commit: 58988c1421b88875a33015b08e4d2ada43021e09
parent e19ec5232216fd801ded88728df5b50bfb05c1cc
Author: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Jan 2022 04:28:54 +0000
[YouTube] Bypass age-gating for certain restricted videos
* Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client
* Also add and fix tests
* Introduce and use new utility function `update_url()`
Diffstat:
2 files changed, 168 insertions(+), 45 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
@@ -42,6 +42,7 @@ from ..utils import (
unescapeHTML,
unified_strdate,
unsmuggle_url,
+ update_url,
update_url_query,
url_or_none,
urlencode_postdata,
@@ -286,15 +287,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
- def _call_api(self, ep, query, video_id, fatal=True):
+ def _call_api(self, ep, query, video_id, fatal=True, headers=None):
data = self._DEFAULT_API_DATA.copy()
data.update(query)
+ real_headers = {'content-type': 'application/json'}
+ if headers:
+ real_headers.update(headers)
return self._download_json(
'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
note='Downloading API JSON', errnote='Unable to download API page',
data=json.dumps(data).encode('utf8'), fatal=fatal,
- headers={'content-type': 'application/json'},
+ headers=real_headers,
query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
def _extract_yt_initial_data(self, video_id, webpage):
@@ -515,6 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'channel': 'Philipp Hagemeister',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
@@ -524,10 +529,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 10,
'view_count': int,
'like_count': int,
- 'dislike_count': int,
+ 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
'start_time': 1,
'end_time': 9,
- }
+ },
},
{
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
@@ -562,7 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 10,
'view_count': int,
'like_count': int,
- 'dislike_count': int,
},
'params': {
'skip_download': True,
@@ -621,8 +625,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
},
- # Normal age-gate video (No vevo, embed allowed), available via embed page
+ # Age-gated videos
{
+ 'note': 'Age-gated video (No vevo, embed allowed)',
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
@@ -631,17 +636,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
'duration': 142,
'uploader': 'The Witcher',
- 'uploader_id': 'WitcherGame',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
'upload_date': '20140605',
+ 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
+ 'age_limit': 18,
+ 'categories': ['Gaming'],
+ 'tags': 'count:17',
+ 'channel': 'The Witcher',
+ 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
+ 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ },
+ {
+ 'note': 'Age-gated video with embed allowed in public site',
+ 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
+ 'info_dict': {
+ 'id': 'HsUATh_Nc2U',
+ 'ext': 'mp4',
+ 'title': 'Godzilla 2 (Official Video)',
+ 'description': 'md5:bf77e03fcae5529475e500129b05668a',
+ 'duration': 177,
+ 'uploader': 'FlyingKitty',
+ 'upload_date': '20200408',
+ 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
'age_limit': 18,
+ 'categories': ['Entertainment'],
+ 'tags': ['Flyingkitty', 'godzilla 2'],
+ 'channel': 'FlyingKitty',
+ 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
+ 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
+ 'view_count': int,
+ 'like_count': int,
},
},
{
- # Age-gated video only available with authentication (unavailable
- # via embed page workaround)
+ 'note': 'Age-gated video embedable only with clientScreen=EMBED',
+ 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
+ 'info_dict': {
+ 'id': 'Tq92D6wQ1mg',
+ 'ext': 'mp4',
+ 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+ 'description': 'md5:17eccca93a786d51bc67646756894066',
+ 'duration': 106,
+ 'uploader': 'Projekt Melody',
+ 'upload_date': '20191227',
+ 'age_limit': 18,
+ 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
+ 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
+ 'categories': ['Entertainment'],
+ 'channel': 'Projekt Melody',
+ 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ },
+ {
+ 'note': 'Non-Age-gated non-embeddable video',
+ 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
+ 'info_dict': {
+ 'id': 'MeJVWBSsPAY',
+ 'ext': 'mp4',
+ 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
+ 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
+ 'duration': 210,
+ 'uploader': 'Herr Lurik',
+ 'uploader_id': 'st3in234',
+ 'upload_date': '20130730',
+ 'uploader_url': 'http://www.youtube.com/user/st3in234',
+ 'age_limit': 0,
+ 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
+ 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
+ 'categories': ['Music'],
+ 'channel': 'Herr Lurik',
+ 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
+ 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
+ 'artist': 'OOMPH!',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ },
+ {
+ 'note': 'Non-bypassable age-gated video',
+ 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
+ 'only_matching': True,
+ },
+ {
+ 'note': 'Age-gated video only available with authentication (not via embed workaround)',
'url': 'XgnwCQzjau8',
'only_matching': True,
+ 'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
},
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
@@ -670,17 +755,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': {
'id': 'lqQg6PlCWgI',
'ext': 'mp4',
+ 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ 'description': r're:(?s)(?:.+\s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
'duration': 6085,
'upload_date': '20150827',
'uploader_id': 'olympic',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
- 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
- 'uploader': 'Olympic',
- 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ 'uploader': r're:Olympics?',
+ 'age_limit': 0,
+ 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
+ 'categories': ['Sports'],
+ 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
+ 'channel': 'Olympics',
+ 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
+ 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
+ 'view_count': int,
+ 'like_count': int,
},
- 'params': {
- 'skip_download': 'requires avconv',
- }
},
# Non-square pixels
{
@@ -1683,27 +1774,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_response = self._call_api(
'player', {'videoId': video_id}, video_id)
- playability_status = player_response.get('playabilityStatus') or {}
- if playability_status.get('reason') == 'Sign in to confirm your age':
- video_info = self._download_webpage(
- base_url + 'get_video_info', video_id,
- 'Refetching age-gated info webpage',
- 'unable to download video info webpage', query={
- 'video_id': video_id,
- 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'html5': 1,
- # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
- 'c': 'TVHTML5',
- 'cver': '6.20180913',
- }, fatal=False)
- if video_info:
- pr = self._parse_json(
- try_get(
- compat_parse_qs(video_info),
- lambda x: x['player_response'][0], compat_str) or '{}',
- video_id, fatal=False)
- if pr and isinstance(pr, dict):
- player_response = pr
+ def is_agegated(playability):
+ if not isinstance(playability, dict):
+ return
+
+ if playability.get('desktopLegacyAgeGateReason'):
+ return True
+
+ reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
+ AGE_GATE_REASONS = (
+ 'confirm your age', 'age-restricted', 'inappropriate', # reason
+ 'age_verification_required', 'age_check_required', # status
+ )
+ return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
+
+ def get_playability_status(response):
+ return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
+
+ playability_status = get_playability_status(player_response)
+ if (is_agegated(playability_status)
+ and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
+
+ self.report_age_confirmation()
+
+ # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
+ pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+ query = {
+ 'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
+ 'contentCheckOk': True,
+ 'racyCheckOk': True,
+ 'context': {
+ 'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
+ 'thirdParty': {'embedUrl': 'https://google.com'},
+ },
+ 'videoId': video_id,
+ }
+ headers = {
+ 'X-YouTube-Client-Name': '85',
+ 'X-YouTube-Client-Version': '2.0',
+ 'Origin': 'https://www.youtube.com'
+ }
+
+ video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
+ age_gate_status = get_playability_status(video_info)
+ if age_gate_status.get('status') == 'OK':
+ player_response = video_info
+ playability_status = age_gate_status
trailer_video_id = try_get(
playability_status,
@@ -1932,12 +2048,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for thumbnail in (try_get(
container,
lambda x: x['thumbnail']['thumbnails'], list) or []):
- thumbnail_url = thumbnail.get('url')
+ thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'height': int_or_none(thumbnail.get('height')),
- 'url': thumbnail_url,
+ 'url': update_url(thumbnail_url, query=None, fragment=None),
'width': int_or_none(thumbnail.get('width')),
})
if thumbnails:
@@ -2142,6 +2258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sbr_tooltip = try_get(
vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
if sbr_tooltip:
+ # however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
like_count, dislike_count = sbr_tooltip.split(' / ')
info.update({
'like_count': str_to_int(like_count),
@@ -2411,7 +2528,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'tags': list,
'view_count': int,
'like_count': int,
- 'dislike_count': int,
},
'params': {
'skip_download': True,
@@ -2438,7 +2554,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'categories': ['News & Politics'],
'tags': list,
'like_count': int,
- 'dislike_count': int,
},
'params': {
'skip_download': True,
@@ -2458,7 +2573,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'categories': ['News & Politics'],
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
'like_count': int,
- 'dislike_count': int,
},
'params': {
'skip_download': True,
@@ -3043,8 +3157,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
def _real_extract(self, url):
item_id = self._match_id(url)
- url = compat_urlparse.urlunparse(
- compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+ url = update_url(url, netloc='www.youtube.com')
# Handle both video/playlist URLs
qs = parse_qs(url)
video_id = qs.get('v', [None])[0]
@@ -3178,7 +3291,6 @@ class YoutubeYtBeIE(InfoExtractor):
'categories': ['Nonprofits & Activism'],
'tags': list,
'like_count': int,
- 'dislike_count': int,
},
'params': {
'noplaylist': True,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
@@ -4121,6 +4121,17 @@ def update_url_query(url, query):
query=compat_urllib_parse_urlencode(qs, True)))
+def update_url(url, **kwargs):
+ """Replace URL components specified by kwargs
+ url: compat_str or parsed URL tuple
+ returns: compat_str"""
+ if not kwargs:
+ return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
+ if not isinstance(url, tuple):
+ url = compat_urlparse.urlparse(url)
+ return compat_urlparse.urlunparse(url._replace(**kwargs))
+
+
def update_Request(req, url=None, data=None, headers={}, query={}):
req_headers = req.headers.copy()
req_headers.update(headers)