logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

globalplayer.py (10013B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. clean_html,
  6. join_nonempty,
  7. merge_dicts,
  8. parse_duration,
  9. str_or_none,
  10. T,
  11. traverse_obj,
  12. unified_strdate,
  13. unified_timestamp,
  14. urlhandle_detect_ext,
  15. )
  16. class GlobalPlayerBaseIE(InfoExtractor):
  17. def _get_page_props(self, url, video_id):
  18. webpage = self._download_webpage(url, video_id)
  19. return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
  20. def _request_ext(self, url, video_id):
  21. return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
  22. url, video_id, note='Determining source extension'))
  23. @staticmethod
  24. def _clean_desc(x):
  25. x = clean_html(x)
  26. if x:
  27. x = x.replace('\xa0', ' ')
  28. return x
  29. def _extract_audio(self, episode, series):
  30. return merge_dicts({
  31. 'vcodec': 'none',
  32. }, traverse_obj(series, {
  33. 'series': 'title',
  34. 'series_id': 'id',
  35. 'thumbnail': 'imageUrl',
  36. 'uploader': 'itunesAuthor', # podcasts only
  37. }), traverse_obj(episode, {
  38. 'id': 'id',
  39. 'description': ('description', T(self._clean_desc)),
  40. 'duration': ('duration', T(parse_duration)),
  41. 'thumbnail': 'imageUrl',
  42. 'url': 'streamUrl',
  43. 'timestamp': (('pubDate', 'startDate'), T(unified_timestamp)),
  44. 'title': 'title',
  45. }, get_all=False), rev=True)
  46. class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
  47. _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
  48. _TESTS = [{
  49. 'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
  50. 'info_dict': {
  51. 'id': '2mx1E',
  52. 'ext': 'aac',
  53. 'display_id': 'smoothchill-uk',
  54. 'title': 're:^Smooth Chill.+$',
  55. 'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
  56. 'description': 'Music To Chill To',
  57. # 'live_status': 'is_live',
  58. 'is_live': True,
  59. },
  60. }, {
  61. # national station
  62. 'url': 'https://www.globalplayer.com/live/heart/uk/',
  63. 'info_dict': {
  64. 'id': '2mwx4',
  65. 'ext': 'aac',
  66. 'description': 'turn up the feel good!',
  67. 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
  68. # 'live_status': 'is_live',
  69. 'is_live': True,
  70. 'title': 're:^Heart UK.+$',
  71. 'display_id': 'heart-uk',
  72. },
  73. }, {
  74. # regional variation
  75. 'url': 'https://www.globalplayer.com/live/heart/london/',
  76. 'info_dict': {
  77. 'id': 'AMqg',
  78. 'ext': 'aac',
  79. 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
  80. 'title': 're:^Heart London.+$',
  81. # 'live_status': 'is_live',
  82. 'is_live': True,
  83. 'display_id': 'heart-london',
  84. 'description': 'turn up the feel good!',
  85. },
  86. }]
  87. def _real_extract(self, url):
  88. video_id = self._match_id(url)
  89. station = self._get_page_props(url, video_id)['station']
  90. stream_url = station['streamUrl']
  91. return merge_dicts({
  92. 'id': station['id'],
  93. 'display_id': (
  94. join_nonempty('brandSlug', 'slug', from_dict=station)
  95. or station.get('legacyStationPrefix')),
  96. 'url': stream_url,
  97. 'ext': self._request_ext(stream_url, video_id),
  98. 'vcodec': 'none',
  99. 'is_live': True,
  100. }, {
  101. 'title': self._live_title(traverse_obj(
  102. station, (('name', 'brandName'), T(str_or_none)),
  103. get_all=False)),
  104. }, traverse_obj(station, {
  105. 'description': 'tagline',
  106. 'thumbnail': 'brandLogo',
  107. }), rev=True)
  108. class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
  109. _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
  110. _TESTS = [{
  111. # "live playlist"
  112. 'url': 'https://www.globalplayer.com/playlists/8bLk/',
  113. 'info_dict': {
  114. 'id': '8bLk',
  115. 'ext': 'aac',
  116. # 'live_status': 'is_live',
  117. 'is_live': True,
  118. 'description': r're:(?s).+\bclassical\b.+\bClassic FM Hall [oO]f Fame\b',
  119. 'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
  120. 'title': 're:Classic FM Hall of Fame.+$'
  121. },
  122. }]
  123. def _real_extract(self, url):
  124. video_id = self._match_id(url)
  125. station = self._get_page_props(url, video_id)['playlistData']
  126. stream_url = station['streamUrl']
  127. return merge_dicts({
  128. 'id': video_id,
  129. 'url': stream_url,
  130. 'ext': self._request_ext(stream_url, video_id),
  131. 'vcodec': 'none',
  132. 'is_live': True,
  133. }, traverse_obj(station, {
  134. 'title': 'title',
  135. 'description': ('description', T(self._clean_desc)),
  136. 'thumbnail': 'image',
  137. }), rev=True)
  138. class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
  139. _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
  140. _TESTS = [{
  141. # podcast
  142. 'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
  143. 'playlist_mincount': 5,
  144. 'info_dict': {
  145. 'id': '42KuaM',
  146. 'title': 'Filthy Ritual',
  147. 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
  148. 'categories': ['Society & Culture', 'True Crime'],
  149. 'uploader': 'Global',
  150. 'description': r're:(?s).+\bscam\b.+?\bseries available now\b',
  151. },
  152. }, {
  153. # radio catchup
  154. 'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
  155. 'playlist_mincount': 2,
  156. 'info_dict': {
  157. 'id': '46vyD7z',
  158. 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
  159. 'title': 'Nick Ferrari',
  160. 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
  161. },
  162. }]
  163. def _real_extract(self, url):
  164. video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
  165. props = self._get_page_props(url, video_id)
  166. series = props['podcastInfo'] if podcast else props['catchupInfo']
  167. return merge_dicts({
  168. '_type': 'playlist',
  169. 'id': video_id,
  170. 'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
  171. series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
  172. 'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
  173. }, traverse_obj(series, {
  174. 'description': ('description', T(self._clean_desc)),
  175. 'thumbnail': 'imageUrl',
  176. 'title': 'title',
  177. 'uploader': 'itunesAuthor', # podcasts only
  178. }), rev=True)
  179. class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
  180. _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
  181. _TESTS = [{
  182. # podcast
  183. 'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
  184. 'info_dict': {
  185. 'id': '7DrfNnE',
  186. 'ext': 'mp3',
  187. 'title': 'Filthy Ritual - Trailer',
  188. 'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
  189. 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
  190. 'duration': 225.0,
  191. 'timestamp': 1681254900,
  192. 'series': 'Filthy Ritual',
  193. 'series_id': '42KuaM',
  194. 'upload_date': '20230411',
  195. 'uploader': 'Global',
  196. },
  197. }, {
  198. # radio catchup
  199. 'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
  200. 'only_matching': True,
  201. # expired: refresh the details with a current show for a full test
  202. 'info_dict': {
  203. 'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
  204. 'ext': 'm4a',
  205. 'timestamp': 1682056800,
  206. 'series': 'Nick Ferrari',
  207. 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
  208. 'upload_date': '20230421',
  209. 'series_id': '46vyD7z',
  210. 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
  211. 'title': 'Nick Ferrari',
  212. 'duration': 10800.0,
  213. },
  214. }]
  215. def _real_extract(self, url):
  216. video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
  217. props = self._get_page_props(url, video_id)
  218. episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
  219. return self._extract_audio(
  220. episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
  221. class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
  222. _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
  223. _TESTS = [{
  224. 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
  225. 'info_dict': {
  226. 'id': '2JsSZ7Gm2uP',
  227. 'ext': 'mp4',
  228. 'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
  229. 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
  230. 'upload_date': '20230420',
  231. 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
  232. },
  233. }]
  234. def _real_extract(self, url):
  235. video_id = self._match_id(url)
  236. meta = self._get_page_props(url, video_id)['videoData']
  237. return merge_dicts({
  238. 'id': video_id,
  239. }, traverse_obj(meta, {
  240. 'url': 'url',
  241. 'thumbnail': ('image', 'url'),
  242. 'title': 'title',
  243. 'upload_date': ('publish_date', T(unified_strdate)),
  244. 'description': 'description',
  245. }), rev=True)