logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

piksel.py (7045B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. dict_get,
  8. ExtractorError,
  9. int_or_none,
  10. parse_iso8601,
  11. try_get,
  12. unescapeHTML,
  13. )
  14. class PikselIE(InfoExtractor):
  15. _VALID_URL = r'''(?x)https?://
  16. (?:
  17. (?:
  18. player\.
  19. (?:
  20. olympusattelecom|
  21. vibebyvista
  22. )|
  23. (?:api|player)\.multicastmedia|
  24. (?:api-ovp|player)\.piksel
  25. )\.com|
  26. (?:
  27. mz-edge\.stream\.co|
  28. movie-s\.nhk\.or
  29. )\.jp|
  30. vidego\.baltimorecity\.gov
  31. )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
  32. _TESTS = [
  33. {
  34. 'url': 'http://player.piksel.com/v/ums2867l',
  35. 'md5': '34e34c8d89dc2559976a6079db531e85',
  36. 'info_dict': {
  37. 'id': 'ums2867l',
  38. 'ext': 'mp4',
  39. 'title': 'GX-005 with Caption',
  40. 'timestamp': 1481335659,
  41. 'upload_date': '20161210'
  42. }
  43. },
  44. {
  45. # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
  46. 'url': 'https://player.piksel.com/v/v80kqp41',
  47. 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
  48. 'info_dict': {
  49. 'id': 'v80kqp41',
  50. 'ext': 'mp4',
  51. 'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
  52. 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
  53. 'timestamp': 1486171129,
  54. 'upload_date': '20170204'
  55. }
  56. },
  57. {
  58. # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
  59. 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
  60. 'only_matching': True,
  61. }
  62. ]
  63. @staticmethod
  64. def _extract_url(webpage):
  65. mobj = re.search(
  66. r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
  67. webpage)
  68. if mobj:
  69. return mobj.group('url')
  70. def _call_api(self, app_token, resource, display_id, query, fatal=True):
  71. response = (self._download_json(
  72. 'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
  73. display_id, query=query, fatal=fatal) or {}).get('response')
  74. failure = try_get(response, lambda x: x['failure']['reason'])
  75. if failure:
  76. if fatal:
  77. raise ExtractorError(failure, expected=True)
  78. self.report_warning(failure)
  79. return response
  80. def _real_extract(self, url):
  81. ref_id, display_id = re.match(self._VALID_URL, url).groups()
  82. webpage = self._download_webpage(url, display_id)
  83. app_token = self._search_regex([
  84. r'clientAPI\s*:\s*"([^"]+)"',
  85. r'data-de-api-key\s*=\s*"([^"]+)"'
  86. ], webpage, 'app token')
  87. query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
  88. program = self._call_api(
  89. app_token, 'program', display_id, query)['WsProgramResponse']['program']
  90. video_id = program['uuid']
  91. video_data = program['asset']
  92. title = video_data['title']
  93. asset_type = dict_get(video_data, ['assetType', 'asset_type'])
  94. formats = []
  95. def process_asset_file(asset_file):
  96. if not asset_file:
  97. return
  98. # TODO: extract rtmp formats
  99. http_url = asset_file.get('http_url')
  100. if not http_url:
  101. return
  102. tbr = None
  103. vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
  104. abr = int_or_none(asset_file.get('audioBitrate'), 1024)
  105. if asset_type == 'video':
  106. tbr = vbr + abr
  107. elif asset_type == 'audio':
  108. tbr = abr
  109. format_id = ['http']
  110. if tbr:
  111. format_id.append(compat_str(tbr))
  112. formats.append({
  113. 'format_id': '-'.join(format_id),
  114. 'url': unescapeHTML(http_url),
  115. 'vbr': vbr,
  116. 'abr': abr,
  117. 'width': int_or_none(asset_file.get('videoWidth')),
  118. 'height': int_or_none(asset_file.get('videoHeight')),
  119. 'filesize': int_or_none(asset_file.get('filesize')),
  120. 'tbr': tbr,
  121. })
  122. def process_asset_files(asset_files):
  123. for asset_file in (asset_files or []):
  124. process_asset_file(asset_file)
  125. process_asset_files(video_data.get('assetFiles'))
  126. process_asset_file(video_data.get('referenceFile'))
  127. if not formats:
  128. asset_id = video_data.get('assetid') or program.get('assetid')
  129. if asset_id:
  130. process_asset_files(try_get(self._call_api(
  131. app_token, 'asset_file', display_id, {
  132. 'assetid': asset_id,
  133. }, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
  134. m3u8_url = dict_get(video_data, [
  135. 'm3u8iPadURL',
  136. 'ipadM3u8Url',
  137. 'm3u8AndroidURL',
  138. 'm3u8iPhoneURL',
  139. 'iphoneM3u8Url'])
  140. if m3u8_url:
  141. formats.extend(self._extract_m3u8_formats(
  142. m3u8_url, video_id, 'mp4', 'm3u8_native',
  143. m3u8_id='hls', fatal=False))
  144. smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
  145. if smil_url:
  146. transform_source = None
  147. if ref_id == 'nhkworld':
  148. # TODO: figure out if this is something to be fixed in urljoin,
  149. # _parse_smil_formats or keep it here
  150. transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
  151. formats.extend(self._extract_smil_formats(
  152. re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
  153. transform_source=transform_source, fatal=False))
  154. self._sort_formats(formats)
  155. subtitles = {}
  156. for caption in video_data.get('captions', []):
  157. caption_url = caption.get('url')
  158. if caption_url:
  159. subtitles.setdefault(caption.get('locale', 'en'), []).append({
  160. 'url': caption_url})
  161. return {
  162. 'id': video_id,
  163. 'title': title,
  164. 'description': video_data.get('description'),
  165. 'thumbnail': video_data.get('thumbnailUrl'),
  166. 'timestamp': parse_iso8601(video_data.get('dateadd')),
  167. 'formats': formats,
  168. 'subtitles': subtitles,
  169. }