logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git
commit: 58988c1421b88875a33015b08e4d2ada43021e09
parent e19ec5232216fd801ded88728df5b50bfb05c1cc
Author: dirkf <fieldhouse@gmx.net>
Date:   Mon, 31 Jan 2022 04:28:54 +0000

[YouTube] Bypass age-gating for certain restricted videos

* Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client

* Also add and fix tests

* Introduce and use new utility function `update_url()`

Diffstat:

Myoutube_dl/extractor/youtube.py202+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Myoutube_dl/utils.py11+++++++++++
2 files changed, 168 insertions(+), 45 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -42,6 +42,7 @@ from ..utils import ( unescapeHTML, unified_strdate, unsmuggle_url, + update_url, update_url_query, url_or_none, urlencode_postdata, @@ -286,15 +287,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)' - def _call_api(self, ep, query, video_id, fatal=True): + def _call_api(self, ep, query, video_id, fatal=True, headers=None): data = self._DEFAULT_API_DATA.copy() data.update(query) + real_headers = {'content-type': 'application/json'} + if headers: + real_headers.update(headers) return self._download_json( 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id, note='Downloading API JSON', errnote='Unable to download API page', data=json.dumps(data).encode('utf8'), fatal=fatal, - headers={'content-type': 'application/json'}, + headers=real_headers, query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'}) def _extract_yt_initial_data(self, video_id, webpage): @@ -515,6 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', + 'channel': 'Philipp Hagemeister', 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'upload_date': '20121002', @@ -524,10 +529,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 10, 'view_count': int, 'like_count': int, - 'dislike_count': int, + 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg', 'start_time': 1, 'end_time': 9, - } + }, }, { 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', @@ -562,7 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 10, 'view_count': int, 'like_count': int, - 'dislike_count': int, }, 'params': { 'skip_download': True, @@ -621,8 +625,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', } }, - # Normal age-gate video (No vevo, embed allowed), available via embed page + # Age-gated videos { + 'note': 'Age-gated video (No vevo, embed allowed)', 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', 'info_dict': { 'id': 'HtVdAasjOgU', @@ -631,17 +636,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', 'duration': 142, 'uploader': 'The Witcher', - 'uploader_id': 'WitcherGame', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 'upload_date': '20140605', + 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg', + 'age_limit': 18, + 'categories': ['Gaming'], + 'tags': 'count:17', + 'channel': 'The Witcher', + 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg', + 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg', + 'view_count': int, + 'like_count': int, + }, + }, + { + 'note': 'Age-gated video with embed allowed in public site', + 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U', + 'info_dict': { + 'id': 'HsUATh_Nc2U', + 'ext': 'mp4', + 'title': 'Godzilla 2 (Official Video)', + 'description': 'md5:bf77e03fcae5529475e500129b05668a', + 'duration': 177, + 'uploader': 'FlyingKitty', + 'upload_date': '20200408', + 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg', 'age_limit': 18, + 'categories': ['Entertainment'], + 'tags': ['Flyingkitty', 'godzilla 2'], + 'channel': 'FlyingKitty', + 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg', + 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg', + 'view_count': int, + 'like_count': int, }, }, { - # Age-gated video only available with authentication (unavailable - # via embed page workaround) + 'note': 'Age-gated video embedable only with clientScreen=EMBED', + 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg', + 'info_dict': { + 'id': 'Tq92D6wQ1mg', + 'ext': 'mp4', + 'title': '[MMD] Adios - EVERGLOW [+Motion DL]', + 'description': 'md5:17eccca93a786d51bc67646756894066', + 'duration': 106, + 'uploader': 'Projekt Melody', + 'upload_date': '20191227', + 'age_limit': 18, + 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg', + 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'], + 'categories': ['Entertainment'], + 'channel': 'Projekt Melody', + 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ', + 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', + 'view_count': int, + 'like_count': int, + }, + }, + { + 'note': 'Non-Age-gated non-embeddable video', + 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY', + 'info_dict': { + 'id': 'MeJVWBSsPAY', + 'ext': 'mp4', + 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)', + 'description': 'Fan Video. Music & Lyrics by OOMPH!.', + 'duration': 210, + 'uploader': 'Herr Lurik', + 'uploader_id': 'st3in234', + 'upload_date': '20130730', + 'uploader_url': 'http://www.youtube.com/user/st3in234', + 'age_limit': 0, + 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg', + 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'], + 'categories': ['Music'], + 'channel': 'Herr Lurik', + 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA', + 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA', + 'artist': 'OOMPH!', + 'view_count': int, + 'like_count': int, + }, + }, + { + 'note': 'Non-bypassable age-gated video', + 'url': 'https://youtube.com/watch?v=Cr381pDsSsA', + 'only_matching': True, + }, + { + 'note': 'Age-gated video only available with authentication (not via embed workaround)', 'url': 'XgnwCQzjau8', 'only_matching': True, + 'skip': '''This video has been removed for violating YouTube's Community Guidelines''', }, # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421) # YouTube Red ad is not captured for creator @@ -670,17 +755,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'lqQg6PlCWgI', 'ext': 'mp4', + 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', + 'description': r're:(?s)(?:.+\s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*', 'duration': 6085, 'upload_date': '20150827', 'uploader_id': 'olympic', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', - 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', - 'uploader': 'Olympic', - 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', + 'uploader': r're:Olympics?', + 'age_limit': 0, + 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg', + 'categories': ['Sports'], + 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'], + 'channel': 'Olympics', + 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q', + 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q', + 'view_count': int, + 'like_count': int, }, - 'params': { - 'skip_download': 'requires avconv', - } }, # Non-square pixels { @@ -1683,27 +1774,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_response = self._call_api( 'player', {'videoId': video_id}, video_id) - playability_status = player_response.get('playabilityStatus') or {} - if playability_status.get('reason') == 'Sign in to confirm your age': - video_info = self._download_webpage( - base_url + 'get_video_info', video_id, - 'Refetching age-gated info webpage', - 'unable to download video info webpage', query={ - 'video_id': video_id, - 'eurl': 'https://youtube.googleapis.com/v/' + video_id, - 'html5': 1, - # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544 - 'c': 'TVHTML5', - 'cver': '6.20180913', - }, fatal=False) - if video_info: - pr = self._parse_json( - try_get( - compat_parse_qs(video_info), - lambda x: x['player_response'][0], compat_str) or '{}', - video_id, fatal=False) - if pr and isinstance(pr, dict): - player_response = pr + def is_agegated(playability): + if not isinstance(playability, dict): + return + + if playability.get('desktopLegacyAgeGateReason'): + return True + + reasons = filter(None, (playability.get(r) for r in ('status', 'reason'))) + AGE_GATE_REASONS = ( + 'confirm your age', 'age-restricted', 'inappropriate', # reason + 'age_verification_required', 'age_check_required', # status + ) + return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons) + + def get_playability_status(response): + return try_get(response, lambda x: x['playabilityStatus'], dict) or {} + + playability_status = get_playability_status(player_response) + if (is_agegated(playability_status) + and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18): + + self.report_age_confirmation() + + # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233 + pb_context = {'html5Preference': 'HTML5_PREF_WANTS'} + query = { + 'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}}, + 'contentCheckOk': True, + 'racyCheckOk': True, + 'context': { + 'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'}, + 'thirdParty': {'embedUrl': 'https://google.com'}, + }, + 'videoId': video_id, + } + headers = { + 'X-YouTube-Client-Name': '85', + 'X-YouTube-Client-Version': '2.0', + 'Origin': 'https://www.youtube.com' + } + + video_info = self._call_api('player', query, video_id, fatal=False, headers=headers) + age_gate_status = get_playability_status(video_info) + if age_gate_status.get('status') == 'OK': + player_response = video_info + playability_status = age_gate_status trailer_video_id = try_get( playability_status, @@ -1932,12 +2048,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for thumbnail in (try_get( container, lambda x: x['thumbnail']['thumbnails'], list) or []): - thumbnail_url = thumbnail.get('url') + thumbnail_url = url_or_none(thumbnail.get('url')) if not thumbnail_url: continue thumbnails.append({ 'height': int_or_none(thumbnail.get('height')), - 'url': thumbnail_url, + 'url': update_url(thumbnail_url, query=None, fragment=None), 'width': int_or_none(thumbnail.get('width')), }) if thumbnails: @@ -2142,6 +2258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sbr_tooltip = try_get( vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip']) if sbr_tooltip: + # however dislike_count was hidden by YT, as if there could ever be dislikable content on YT like_count, dislike_count = sbr_tooltip.split(' / ') info.update({ 'like_count': str_to_int(like_count), @@ -2411,7 +2528,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'tags': list, 'view_count': int, 'like_count': int, - 'dislike_count': int, }, 'params': { 'skip_download': True, @@ -2438,7 +2554,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'categories': ['News & Politics'], 'tags': list, 'like_count': int, - 'dislike_count': int, }, 'params': { 'skip_download': True, @@ -2458,7 +2573,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'categories': ['News & Politics'], 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], 'like_count': int, - 'dislike_count': int, }, 'params': { 'skip_download': True, @@ -3043,8 +3157,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): def _real_extract(self, url): item_id = self._match_id(url) - url = compat_urlparse.urlunparse( - compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com')) + url = update_url(url, netloc='www.youtube.com') # Handle both video/playlist URLs qs = parse_qs(url) video_id = qs.get('v', [None])[0] @@ -3178,7 +3291,6 @@ class YoutubeYtBeIE(InfoExtractor): 'categories': ['Nonprofits & Activism'], 'tags': list, 'like_count': int, - 'dislike_count': int, }, 'params': { 'noplaylist': True, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -4121,6 +4121,17 @@ def update_url_query(url, query): query=compat_urllib_parse_urlencode(qs, True))) +def update_url(url, **kwargs): + """Replace URL components specified by kwargs + url: compat_str or parsed URL tuple + returns: compat_str""" + if not kwargs: + return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url + if not isinstance(url, tuple): + url = compat_urlparse.urlparse(url) + return compat_urlparse.urlunparse(url._replace(**kwargs)) + + def update_Request(req, url=None, data=None, headers={}, query={}): req_headers = req.headers.copy() req_headers.update(headers)