8b703800ecb-pull-16105-rooster-teeth.diff - etc_portage - Unnamed repository; edit this file 'description' to name the repository.

8b703800ecb-pull-16105-rooster-teeth.diff (9443B)
      1 diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py
      2 index 8b703800ecb..69a3c6d0164 100644
      3 --- a/youtube_dl/extractor/roosterteeth.py
      4 +++ b/youtube_dl/extractor/roosterteeth.py
      5 @@ -1,35 +1,37 @@
      6  # coding: utf-8
      7  from __future__ import unicode_literals
      8  
      9 -import re
     10 +import time
     11  
     12  from .common import InfoExtractor
     13  from ..utils import (
     14      ExtractorError,
     15 -    int_or_none,
     16 -    strip_or_none,
     17 -    unescapeHTML,
     18 +    compat_str,
     19 +    str_or_none,
     20 +    try_get,
     21 +    unified_timestamp,
     22      urlencode_postdata,
     23  )
     24  
     25  
     26  class RoosterTeethIE(InfoExtractor):
     27      _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
     28 -    _LOGIN_URL = 'https://roosterteeth.com/login'
     29 +    _LOGIN_URL = 'https://auth.roosterteeth.com/oauth/token'
     30 +    _API_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/'
     31 +    _ACCESS_TOKEN = None
     32      _NETRC_MACHINE = 'roosterteeth'
     33      _TESTS = [{
     34          'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
     35          'md5': 'e2bd7764732d785ef797700a2489f212',
     36          'info_dict': {
     37 -            'id': '26576',
     38 +            'id': '9156',
     39              'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
     40              'ext': 'mp4',
     41 -            'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
     42 +            'title': 'Million Dollars, But... The Game Announcement',
     43              'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
     44              'thumbnail': r're:^https?://.*\.png$',
     45              'series': 'Million Dollars, But...',
     46 -            'episode': 'Million Dollars, But... The Game Announcement',
     47 -            'comment_count': int,
     48 +            'episode': 'S2:E10 - Million Dollars, But... The Game Announcement',
     49          },
     50      }, {
     51          'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
     52 @@ -54,64 +56,94 @@ def _login(self):
     53          if username is None:
     54              return
     55  
     56 -        login_page = self._download_webpage(
     57 -            self._LOGIN_URL, None,
     58 -            note='Downloading login page',
     59 -            errnote='Unable to download login page')
     60 -
     61 -        login_form = self._hidden_inputs(login_page)
     62 -
     63 -        login_form.update({
     64 -            'username': username,
     65 -            'password': password,
     66 -        })
     67 +        cookie = self._get_cookie('rt_access_token')
     68 +        if cookie and not cookie.is_expired():
     69 +            self._ACCESS_TOKEN = cookie.value
     70 +            return
     71  
     72 -        login_request = self._download_webpage(
     73 +        response = self._download_json(
     74              self._LOGIN_URL, None,
     75              note='Logging in',
     76 -            data=urlencode_postdata(login_form),
     77 -            headers={
     78 -                'Referer': self._LOGIN_URL,
     79 +            errnote='Unable to log in',
     80 +            data=urlencode_postdata({
     81 +                'username': username,
     82 +                'password': password,
     83 +                'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
     84 +                'grant_type': 'password',
     85              })
     86 +        )
     87  
     88 -        if not any(re.search(p, login_request) for p in (
     89 -                r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
     90 -                r'>Sign Out<')):
     91 -            error = self._html_search_regex(
     92 -                r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
     93 -                login_request, 'alert', default=None, group='error')
     94 -            if error:
     95 -                raise ExtractorError('Unable to login: %s' % error, expected=True)
     96 +        self._ACCESS_TOKEN = response.get('access_token')
     97 +        if not self._ACCESS_TOKEN:
     98              raise ExtractorError('Unable to log in')
     99  
    100 +        created_at = response.get('created_at', 0)
    101 +        expires_in = response.get('expires_in', 0)
    102 +
    103 +        self._set_cookie('.roosterteeth.com', 'rt_access_token', self._ACCESS_TOKEN, created_at + expires_in)
    104 +
    105      def _real_initialize(self):
    106          self._login()
    107  
    108      def _real_extract(self, url):
    109          display_id = self._match_id(url)
    110  
    111 -        webpage = self._download_webpage(url, display_id)
    112 -
    113 -        episode = strip_or_none(unescapeHTML(self._search_regex(
    114 -            (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
    115 -             r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
    116 -            default=None, group='title')))
    117 -
    118 -        title = strip_or_none(self._og_search_title(
    119 -            webpage, default=None)) or episode
    120 -
    121 -        m3u8_url = self._search_regex(
    122 -            r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
    123 -            webpage, 'm3u8 url', default=None, group='url')
    124 -
    125 +        headers = {}
    126 +        if self._ACCESS_TOKEN:
    127 +            headers['Authorization'] = 'Bearer ' + self._ACCESS_TOKEN
    128 +
    129 +        api_response = self._call_api(
    130 +            display_id,
    131 +            note='Downloading video information (1/2)',
    132 +            errnote='Unable to download video information (1/2)',
    133 +            headers=headers,
    134 +        )
    135 +
    136 +        data = api_response['data'][0]
    137 +
    138 +        attributes = data['attributes']
    139 +        episode = attributes.get('display_title')
    140 +        title = attributes['title']
    141 +        description = attributes.get('caption')
    142 +        series = attributes.get('show_title')
    143 +
    144 +        thumbnails = []
    145 +        for i, size in enumerate(['thumb', 'small', 'medium', 'large']):
    146 +            thumbnail = try_get(data, lambda x: x['included']['images'][0]['attributes'][size], compat_str)
    147 +            if thumbnail:
    148 +                thumbnails.append({'url': thumbnail, 'id': i})
    149 +
    150 +        video_response = self._call_api(
    151 +            display_id,
    152 +            path='/videos',
    153 +            note='Downloading video information (2/2)',
    154 +            errnote='Unable to download video information (2/2)',
    155 +            headers=headers,
    156 +        )
    157 +
    158 +        if video_response.get('access') is not None:
    159 +            now = time.time()
    160 +            sponsor_golive = unified_timestamp(attributes.get('sponsor_golive_at'))
    161 +            member_golive = unified_timestamp(attributes.get('member_golive_at'))
    162 +            public_golive = unified_timestamp(attributes.get('public_golive_at'))
    163 +
    164 +            if attributes.get('is_sponsors_only', False):
    165 +                if now < sponsor_golive:
    166 +                    self._golive_error(display_id, 'FIRST members')
    167 +                else:
    168 +                    self.raise_login_required('{0} is only available for FIRST members'.format(display_id))
    169 +            else:
    170 +                if now < member_golive:
    171 +                    self._golive_error(display_id, 'site members')
    172 +                elif now < public_golive:
    173 +                    self._golive_error(display_id, 'the public')
    174 +                else:
    175 +                    raise ExtractorError('Video is not available')
    176 +
    177 +        video_attributes = try_get(video_response, lambda x: x['data'][0]['attributes'])
    178 +
    179 +        m3u8_url = video_attributes.get('url')
    180          if not m3u8_url:
    181 -            if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
    182 -                self.raise_login_required(
    183 -                    '%s is only available for FIRST members' % display_id)
    184 -
    185 -            if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
    186 -                self.raise_login_required('%s is not available yet' % display_id)
    187 -
    188              raise ExtractorError('Unable to extract m3u8 URL')
    189  
    190          formats = self._extract_m3u8_formats(
    191 @@ -119,30 +151,31 @@ def _real_extract(self, url):
    192              entry_protocol='m3u8_native', m3u8_id='hls')
    193          self._sort_formats(formats)
    194  
    195 -        description = strip_or_none(self._og_search_description(webpage))
    196 -        thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
    197 -
    198 -        series = self._search_regex(
    199 -            (r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
    200 -            webpage, 'series', fatal=False)
    201 -
    202 -        comment_count = int_or_none(self._search_regex(
    203 -            r'>Comments \((\d+)\)<', webpage,
    204 -            'comment count', fatal=False))
    205 -
    206 -        video_id = self._search_regex(
    207 -            (r'containerId\s*=\s*["\']episode-(\d+)\1',
    208 -             r'<div[^<]+id=["\']episode-(\d+)'), webpage,
    209 -            'video id', default=display_id)
    210 +        video_id = str_or_none(video_attributes.get('content_id'))
    211  
    212          return {
    213              'id': video_id,
    214              'display_id': display_id,
    215              'title': title,
    216              'description': description,
    217 -            'thumbnail': thumbnail,
    218 +            'thumbnails': thumbnails,
    219              'series': series,
    220              'episode': episode,
    221 -            'comment_count': comment_count,
    222              'formats': formats,
    223          }
    224 +
    225 +    def _golive_error(self, video_id, member_level):
    226 +        raise ExtractorError('{0} is not yet live for {1}'.format(video_id, member_level))
    227 +
    228 +    def _call_api(self, video_id, path=None, **kwargs):
    229 +        url = self._API_URL + video_id
    230 +        if path:
    231 +            url = url + path
    232 +
    233 +        return self._download_json(url, video_id, **kwargs)
    234 +
    235 +    def _get_cookie(self, name):
    236 +        for cookie in self._downloader.cookiejar:
    237 +            if cookie.name == name:
    238 +                return cookie
    239 +        return None