8b703800ecb-pull-16105-rooster-teeth.diff (9443B)
1 diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py 2 index 8b703800ecb..69a3c6d0164 100644 3 --- a/youtube_dl/extractor/roosterteeth.py 4 +++ b/youtube_dl/extractor/roosterteeth.py 5 @@ -1,35 +1,37 @@ 6 # coding: utf-8 7 from __future__ import unicode_literals 8 9 -import re 10 +import time 11 12 from .common import InfoExtractor 13 from ..utils import ( 14 ExtractorError, 15 - int_or_none, 16 - strip_or_none, 17 - unescapeHTML, 18 + compat_str, 19 + str_or_none, 20 + try_get, 21 + unified_timestamp, 22 urlencode_postdata, 23 ) 24 25 26 class RoosterTeethIE(InfoExtractor): 27 _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)' 28 - _LOGIN_URL = 'https://roosterteeth.com/login' 29 + _LOGIN_URL = 'https://auth.roosterteeth.com/oauth/token' 30 + _API_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/' 31 + _ACCESS_TOKEN = None 32 _NETRC_MACHINE = 'roosterteeth' 33 _TESTS = [{ 34 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 35 'md5': 'e2bd7764732d785ef797700a2489f212', 36 'info_dict': { 37 - 'id': '26576', 38 + 'id': '9156', 39 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 40 'ext': 'mp4', 41 - 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', 42 + 'title': 'Million Dollars, But... The Game Announcement', 43 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', 44 'thumbnail': r're:^https?://.*\.png$', 45 'series': 'Million Dollars, But...', 46 - 'episode': 'Million Dollars, But... The Game Announcement', 47 - 'comment_count': int, 48 + 'episode': 'S2:E10 - Million Dollars, But... The Game Announcement', 49 }, 50 }, { 51 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', 52 @@ -54,64 +56,94 @@ def _login(self): 53 if username is None: 54 return 55 56 - login_page = self._download_webpage( 57 - self._LOGIN_URL, None, 58 - note='Downloading login page', 59 - errnote='Unable to download login page') 60 - 61 - login_form = self._hidden_inputs(login_page) 62 - 63 - login_form.update({ 64 - 'username': username, 65 - 'password': password, 66 - }) 67 + cookie = self._get_cookie('rt_access_token') 68 + if cookie and not cookie.is_expired(): 69 + self._ACCESS_TOKEN = cookie.value 70 + return 71 72 - login_request = self._download_webpage( 73 + response = self._download_json( 74 self._LOGIN_URL, None, 75 note='Logging in', 76 - data=urlencode_postdata(login_form), 77 - headers={ 78 - 'Referer': self._LOGIN_URL, 79 + errnote='Unable to log in', 80 + data=urlencode_postdata({ 81 + 'username': username, 82 + 'password': password, 83 + 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5', 84 + 'grant_type': 'password', 85 }) 86 + ) 87 88 - if not any(re.search(p, login_request) for p in ( 89 - r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"', 90 - r'>Sign Out<')): 91 - error = self._html_search_regex( 92 - r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>', 93 - login_request, 'alert', default=None, group='error') 94 - if error: 95 - raise ExtractorError('Unable to login: %s' % error, expected=True) 96 + self._ACCESS_TOKEN = response.get('access_token') 97 + if not self._ACCESS_TOKEN: 98 raise ExtractorError('Unable to log in') 99 100 + created_at = response.get('created_at', 0) 101 + expires_in = response.get('expires_in', 0) 102 + 103 + self._set_cookie('.roosterteeth.com', 'rt_access_token', self._ACCESS_TOKEN, created_at + expires_in) 104 + 105 def _real_initialize(self): 106 self._login() 107 108 def _real_extract(self, url): 109 display_id = self._match_id(url) 110 111 - webpage = self._download_webpage(url, display_id) 112 - 113 - episode = strip_or_none(unescapeHTML(self._search_regex( 114 - (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', 115 - r'<title>(?P<title>[^<]+)</title>'), webpage, 'title', 116 - default=None, group='title'))) 117 - 118 - title = strip_or_none(self._og_search_title( 119 - webpage, default=None)) or episode 120 - 121 - m3u8_url = self._search_regex( 122 - r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1', 123 - webpage, 'm3u8 url', default=None, group='url') 124 - 125 + headers = {} 126 + if self._ACCESS_TOKEN: 127 + headers['Authorization'] = 'Bearer ' + self._ACCESS_TOKEN 128 + 129 + api_response = self._call_api( 130 + display_id, 131 + note='Downloading video information (1/2)', 132 + errnote='Unable to download video information (1/2)', 133 + headers=headers, 134 + ) 135 + 136 + data = api_response['data'][0] 137 + 138 + attributes = data['attributes'] 139 + episode = attributes.get('display_title') 140 + title = attributes['title'] 141 + description = attributes.get('caption') 142 + series = attributes.get('show_title') 143 + 144 + thumbnails = [] 145 + for i, size in enumerate(['thumb', 'small', 'medium', 'large']): 146 + thumbnail = try_get(data, lambda x: x['included']['images'][0]['attributes'][size], compat_str) 147 + if thumbnail: 148 + thumbnails.append({'url': thumbnail, 'id': i}) 149 + 150 + video_response = self._call_api( 151 + display_id, 152 + path='/videos', 153 + note='Downloading video information (2/2)', 154 + errnote='Unable to download video information (2/2)', 155 + headers=headers, 156 + ) 157 + 158 + if video_response.get('access') is not None: 159 + now = time.time() 160 + sponsor_golive = unified_timestamp(attributes.get('sponsor_golive_at')) 161 + member_golive = unified_timestamp(attributes.get('member_golive_at')) 162 + public_golive = unified_timestamp(attributes.get('public_golive_at')) 163 + 164 + if attributes.get('is_sponsors_only', False): 165 + if now < sponsor_golive: 166 + self._golive_error(display_id, 'FIRST members') 167 + else: 168 + self.raise_login_required('{0} is only available for FIRST members'.format(display_id)) 169 + else: 170 + if now < member_golive: 171 + self._golive_error(display_id, 'site members') 172 + elif now < public_golive: 173 + self._golive_error(display_id, 'the public') 174 + else: 175 + raise ExtractorError('Video is not available') 176 + 177 + video_attributes = try_get(video_response, lambda x: x['data'][0]['attributes']) 178 + 179 + m3u8_url = video_attributes.get('url') 180 if not m3u8_url: 181 - if re.search(r'<div[^>]+class=["\']non-sponsor', webpage): 182 - self.raise_login_required( 183 - '%s is only available for FIRST members' % display_id) 184 - 185 - if re.search(r'<div[^>]+class=["\']golive-gate', webpage): 186 - self.raise_login_required('%s is not available yet' % display_id) 187 - 188 raise ExtractorError('Unable to extract m3u8 URL') 189 190 formats = self._extract_m3u8_formats( 191 @@ -119,30 +151,31 @@ def _real_extract(self, url): 192 entry_protocol='m3u8_native', m3u8_id='hls') 193 self._sort_formats(formats) 194 195 - description = strip_or_none(self._og_search_description(webpage)) 196 - thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) 197 - 198 - series = self._search_regex( 199 - (r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'), 200 - webpage, 'series', fatal=False) 201 - 202 - comment_count = int_or_none(self._search_regex( 203 - r'>Comments \((\d+)\)<', webpage, 204 - 'comment count', fatal=False)) 205 - 206 - video_id = self._search_regex( 207 - (r'containerId\s*=\s*["\']episode-(\d+)\1', 208 - r'<div[^<]+id=["\']episode-(\d+)'), webpage, 209 - 'video id', default=display_id) 210 + video_id = str_or_none(video_attributes.get('content_id')) 211 212 return { 213 'id': video_id, 214 'display_id': display_id, 215 'title': title, 216 'description': description, 217 - 'thumbnail': thumbnail, 218 + 'thumbnails': thumbnails, 219 'series': series, 220 'episode': episode, 221 - 'comment_count': comment_count, 222 'formats': formats, 223 } 224 + 225 + def _golive_error(self, video_id, member_level): 226 + raise ExtractorError('{0} is not yet live for {1}'.format(video_id, member_level)) 227 + 228 + def _call_api(self, video_id, path=None, **kwargs): 229 + url = self._API_URL + video_id 230 + if path: 231 + url = url + path 232 + 233 + return self._download_json(url, video_id, **kwargs) 234 + 235 + def _get_cookie(self, name): 236 + for cookie in self._downloader.cookiejar: 237 + if cookie.name == name: 238 + return cookie 239 + return None