logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

gbnews.py (5138B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. extract_attributes,
  6. ExtractorError,
  7. T,
  8. traverse_obj,
  9. txt_or_none,
  10. url_or_none,
  11. )
  12. class GBNewsIE(InfoExtractor):
  13. IE_DESC = 'GB News clips, features and live stream'
  14. # \w+ is normally shows or news, but apparently any word redirects to the correct URL
  15. _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
  16. _PLATFORM = 'safari'
  17. _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
  18. _TESTS = [{
  19. 'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889',
  20. 'info_dict': {
  21. 'id': '106889',
  22. 'ext': 'mp4',
  23. 'title': "Andrew Neil's message to companies choosing to boycott GB News",
  24. 'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351',
  25. },
  26. 'skip': '404 not found',
  27. }, {
  28. 'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
  29. 'info_dict': {
  30. 'id': '52264136',
  31. 'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
  32. 'ext': 'mp4',
  33. 'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
  34. 'description': 'The post was criticised by former employers of the broadcaster',
  35. },
  36. }, {
  37. 'url': 'https://www.gbnews.uk/watchlive',
  38. 'info_dict': {
  39. 'id': '1069',
  40. 'display_id': 'watchlive',
  41. 'ext': 'mp4',
  42. 'title': 'GB News Live',
  43. 'is_live': True,
  44. },
  45. 'params': {
  46. 'skip_download': 'm3u8',
  47. },
  48. }]
  49. def _real_extract(self, url):
  50. display_id = self._match_id(url).split('/')[-1]
  51. webpage = self._download_webpage(url, display_id)
  52. # extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341
  53. '''
  54. <div id="video-106908"
  55. class="simplestream"
  56. data-id="GB001"
  57. data-type="vod"
  58. data-key="3Li3Nt2Qs8Ct3Xq9Fi5Uy0Mb2Bj0Qs"
  59. data-token="f9c317c727dc07f515b20036c8ef14a6"
  60. data-expiry="1624300052"
  61. data-uvid="37900558"
  62. data-poster="https://thumbnails.simplestreamcdn.com/gbnews/ondemand/37900558.jpg?width=700&"
  63. data-npaw="false"
  64. data-env="production">
  65. '''
  66. # exception if no match
  67. video_data = self._search_regex(
  68. r'(<div\s[^>]*\bclass\s*=\s*(\'|")(?!.*sidebar\b)simplestream(?:\s[\s\w$-]*)?\2[^>]*>)',
  69. webpage, 'video data')
  70. video_data = extract_attributes(video_data)
  71. ss_id = video_data.get('data-id')
  72. if not ss_id:
  73. raise ExtractorError('Simplestream ID not found')
  74. json_data = self._download_json(
  75. self._SSMP_URL, display_id,
  76. note='Downloading Simplestream JSON metadata',
  77. errnote='Unable to download Simplestream JSON metadata',
  78. query={
  79. 'id': ss_id,
  80. 'env': video_data.get('data-env', 'production'),
  81. }, fatal=False)
  82. meta_url = traverse_obj(json_data, ('response', 'api_hostname'))
  83. if not meta_url:
  84. raise ExtractorError('No API host found')
  85. uvid = video_data['data-uvid']
  86. dtype = video_data.get('data-type')
  87. stream_data = self._download_json(
  88. '%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid),
  89. uvid,
  90. query={
  91. 'key': video_data.get('data-key'),
  92. 'platform': self._PLATFORM,
  93. },
  94. headers={
  95. 'Token': video_data.get('data-token'),
  96. 'Token-Expiry': video_data.get('data-expiry'),
  97. 'Uvid': uvid,
  98. }, fatal=False)
  99. stream_url = traverse_obj(stream_data, (
  100. 'response', 'stream', T(url_or_none)))
  101. if not stream_url:
  102. raise ExtractorError('No stream data/URL')
  103. # now known to be a dict
  104. stream_data = stream_data['response']
  105. drm = stream_data.get('drm')
  106. if drm:
  107. self.report_drm(uvid)
  108. formats = self._extract_m3u8_formats(
  109. stream_url, uvid, ext='mp4', entry_protocol='m3u8_native',
  110. fatal=False)
  111. # exception if no formats
  112. self._sort_formats(formats)
  113. return {
  114. 'id': uvid,
  115. 'display_id': display_id,
  116. 'title': (traverse_obj(stream_data, ('title', T(txt_or_none)))
  117. or self._og_search_title(webpage, default=None)
  118. or display_id.replace('-', ' ').capitalize()),
  119. 'description': self._og_search_description(webpage, default=None),
  120. 'thumbnail': (traverse_obj(video_data, ('data-poster', T(url_or_none)))
  121. or self._og_search_thumbnail(webpage)),
  122. 'formats': formats,
  123. 'is_live': (dtype == 'live') or None,
  124. }