logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

globo.py (10066B)


  1. import base64
  2. import hashlib
  3. import json
  4. import random
  5. import re
  6. from .common import InfoExtractor
  7. from ..networking import HEADRequest
  8. from ..utils import (
  9. ExtractorError,
  10. float_or_none,
  11. orderedSet,
  12. str_or_none,
  13. try_get,
  14. )
  15. class GloboIE(InfoExtractor):
  16. _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
  17. _NETRC_MACHINE = 'globo'
  18. _TESTS = [{
  19. 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
  20. 'info_dict': {
  21. 'id': '3607726',
  22. 'ext': 'mp4',
  23. 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
  24. 'duration': 103.204,
  25. 'uploader': 'G1',
  26. 'uploader_id': '2015',
  27. },
  28. 'params': {
  29. 'skip_download': True,
  30. },
  31. }, {
  32. 'url': 'http://globoplay.globo.com/v/4581987/',
  33. 'info_dict': {
  34. 'id': '4581987',
  35. 'ext': 'mp4',
  36. 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
  37. 'duration': 137.973,
  38. 'uploader': 'Rede Globo',
  39. 'uploader_id': '196',
  40. },
  41. 'params': {
  42. 'skip_download': True,
  43. },
  44. }, {
  45. 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
  46. 'only_matching': True,
  47. }, {
  48. 'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
  49. 'only_matching': True,
  50. }, {
  51. 'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
  52. 'only_matching': True,
  53. }, {
  54. 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
  55. 'only_matching': True,
  56. }, {
  57. 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
  58. 'only_matching': True,
  59. }, {
  60. 'url': 'globo:3607726',
  61. 'only_matching': True,
  62. }, {
  63. 'url': 'https://globoplay.globo.com/v/10248083/',
  64. 'info_dict': {
  65. 'id': '10248083',
  66. 'ext': 'mp4',
  67. 'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
  68. 'duration': 530.964,
  69. 'uploader': 'SporTV',
  70. 'uploader_id': '698',
  71. },
  72. 'params': {
  73. 'skip_download': True,
  74. },
  75. }]
  76. def _real_extract(self, url):
  77. video_id = self._match_id(url)
  78. self._request_webpage(
  79. HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
  80. video_id, 'Getting cookies')
  81. video = self._download_json(
  82. f'http://api.globovideos.com/videos/{video_id}/playlist',
  83. video_id)['videos'][0]
  84. if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True:
  85. self.report_drm(video_id)
  86. title = video['title']
  87. formats = []
  88. security = self._download_json(
  89. 'https://playback.video.globo.com/v2/video-session', video_id, f'Downloading security hash for {video_id}',
  90. headers={'content-type': 'application/json'}, data=json.dumps({
  91. 'player_type': 'desktop',
  92. 'video_id': video_id,
  93. 'quality': 'max',
  94. 'content_protection': 'widevine',
  95. 'vsid': '581b986b-4c40-71f0-5a58-803e579d5fa2',
  96. 'tz': '-3.0:00',
  97. }).encode())
  98. self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie')
  99. security_hash = security['sources'][0]['token']
  100. if not security_hash:
  101. message = security.get('message')
  102. if message:
  103. raise ExtractorError(
  104. f'{self.IE_NAME} returned error: {message}', expected=True)
  105. hash_code = security_hash[:2]
  106. padding = '%010d' % random.randint(1, 10000000000)
  107. if hash_code in ('04', '14'):
  108. received_time = security_hash[3:13]
  109. received_md5 = security_hash[24:]
  110. hash_prefix = security_hash[:23]
  111. elif hash_code in ('02', '12', '03', '13'):
  112. received_time = security_hash[2:12]
  113. received_md5 = security_hash[22:]
  114. padding += '1'
  115. hash_prefix = '05' + security_hash[:22]
  116. padded_sign_time = str(int(received_time) + 86400) + padding
  117. md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
  118. signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
  119. signed_hash = hash_prefix + padded_sign_time + signed_md5
  120. source = security['sources'][0]['url_parts']
  121. resource_url = source['scheme'] + '://' + source['domain'] + source['path']
  122. signed_url = '{}?h={}&k=html5&a={}'.format(resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
  123. fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
  124. signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
  125. formats.extend(fmts)
  126. for resource in video['resources']:
  127. if resource.get('type') == 'subtitle':
  128. subtitles.setdefault(resource.get('language') or 'por', []).append({
  129. 'url': resource.get('url'),
  130. })
  131. subs = try_get(security, lambda x: x['source']['subtitles'], expected_type=dict) or {}
  132. for sub_lang, sub_url in subs.items():
  133. if sub_url:
  134. subtitles.setdefault(sub_lang or 'por', []).append({
  135. 'url': sub_url,
  136. })
  137. subs = try_get(security, lambda x: x['source']['subtitles_webvtt'], expected_type=dict) or {}
  138. for sub_lang, sub_url in subs.items():
  139. if sub_url:
  140. subtitles.setdefault(sub_lang or 'por', []).append({
  141. 'url': sub_url,
  142. })
  143. duration = float_or_none(video.get('duration'), 1000)
  144. uploader = video.get('channel')
  145. uploader_id = str_or_none(video.get('channel_id'))
  146. return {
  147. 'id': video_id,
  148. 'title': title,
  149. 'duration': duration,
  150. 'uploader': uploader,
  151. 'uploader_id': uploader_id,
  152. 'formats': formats,
  153. 'subtitles': subtitles,
  154. }
  155. class GloboArticleIE(InfoExtractor):
  156. _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
  157. _VIDEOID_REGEXES = [
  158. r'\bdata-video-id=["\'](\d{7,})["\']',
  159. r'\bdata-player-videosids=["\'](\d{7,})["\']',
  160. r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
  161. r'\bdata-id=["\'](\d{7,})["\']',
  162. r'<div[^>]+\bid=["\'](\d{7,})["\']',
  163. r'<bs-player[^>]+\bvideoid=["\'](\d{8,})["\']',
  164. ]
  165. _TESTS = [{
  166. 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
  167. 'info_dict': {
  168. 'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
  169. 'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
  170. 'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
  171. },
  172. 'playlist_count': 1,
  173. }, {
  174. 'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
  175. 'info_dict': {
  176. 'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
  177. 'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
  178. 'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
  179. },
  180. 'playlist_count': 6,
  181. }, {
  182. 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
  183. 'only_matching': True,
  184. }, {
  185. 'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
  186. 'only_matching': True,
  187. }, {
  188. 'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
  189. 'only_matching': True,
  190. }, {
  191. 'url': 'https://ge.globo.com/video/ta-na-area-como-foi-assistir-ao-jogo-do-palmeiras-que-a-globo-nao-passou-10287094.ghtml',
  192. 'info_dict': {
  193. 'id': 'ta-na-area-como-foi-assistir-ao-jogo-do-palmeiras-que-a-globo-nao-passou-10287094',
  194. 'title': 'Tá na Área: como foi assistir ao jogo do Palmeiras que a Globo não passou',
  195. 'description': 'md5:2d089d036c4c9675117d3a56f8c61739',
  196. },
  197. 'playlist_count': 1,
  198. }, {
  199. 'url': 'https://redeglobo.globo.com/rpc/meuparana/noticia/a-producao-de-chocolates-no-parana.ghtml',
  200. 'info_dict': {
  201. 'id': 'a-producao-de-chocolates-no-parana',
  202. 'title': 'A produção de chocolates no Paraná',
  203. 'description': 'md5:f2e3daf00ffd1dc0e9a8a6c7cfb0a89e',
  204. },
  205. 'playlist_count': 2,
  206. }]
  207. @classmethod
  208. def suitable(cls, url):
  209. return False if GloboIE.suitable(url) else super().suitable(url)
  210. def _real_extract(self, url):
  211. display_id = self._match_id(url)
  212. webpage = self._download_webpage(url, display_id)
  213. video_ids = []
  214. for video_regex in self._VIDEOID_REGEXES:
  215. video_ids.extend(re.findall(video_regex, webpage))
  216. entries = [
  217. self.url_result(f'globo:{video_id}', GloboIE.ie_key())
  218. for video_id in orderedSet(video_ids)]
  219. title = self._og_search_title(webpage).strip()
  220. description = self._html_search_meta('description', webpage)
  221. return self.playlist_result(entries, display_id, title, description)