logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

spreaker.py (6466B)


  1. import itertools
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. filter_dict,
  5. float_or_none,
  6. int_or_none,
  7. parse_qs,
  8. str_or_none,
  9. try_get,
  10. unified_timestamp,
  11. url_or_none,
  12. )
  13. from ..utils.traversal import traverse_obj
  14. def _extract_episode(data, episode_id=None):
  15. title = data['title']
  16. download_url = data['download_url']
  17. series = try_get(data, lambda x: x['show']['title'], str)
  18. uploader = try_get(data, lambda x: x['author']['fullname'], str)
  19. thumbnails = []
  20. for image in ('image_original', 'image_medium', 'image'):
  21. image_url = url_or_none(data.get(f'{image}_url'))
  22. if image_url:
  23. thumbnails.append({'url': image_url})
  24. def stats(key):
  25. return int_or_none(try_get(
  26. data,
  27. (lambda x: x[f'{key}s_count'],
  28. lambda x: x['stats'][f'{key}s'])))
  29. def duration(key):
  30. return float_or_none(data.get(key), scale=1000)
  31. return {
  32. 'id': str(episode_id or data['episode_id']),
  33. 'url': download_url,
  34. 'display_id': data.get('permalink'),
  35. 'title': title,
  36. 'description': data.get('description'),
  37. 'timestamp': unified_timestamp(data.get('published_at')),
  38. 'uploader': uploader,
  39. 'uploader_id': str_or_none(data.get('author_id')),
  40. 'creator': uploader,
  41. 'duration': duration('duration') or duration('length'),
  42. 'view_count': stats('play'),
  43. 'like_count': stats('like'),
  44. 'comment_count': stats('message'),
  45. 'format': 'MPEG Layer 3',
  46. 'format_id': 'mp3',
  47. 'container': 'mp3',
  48. 'ext': 'mp3',
  49. 'thumbnails': thumbnails,
  50. 'series': series,
  51. 'extractor_key': SpreakerIE.ie_key(),
  52. }
  53. class SpreakerIE(InfoExtractor):
  54. _VALID_URL = [
  55. r'https?://api\.spreaker\.com/(?:(?:download/)?episode|v2/episodes)/(?P<id>\d+)',
  56. r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P<id>\d+)/?(?:[?#]|$)',
  57. ]
  58. _TESTS = [{
  59. 'url': 'https://api.spreaker.com/episode/12534508',
  60. 'info_dict': {
  61. 'id': '12534508',
  62. 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
  63. 'ext': 'mp3',
  64. 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
  65. 'description': 'md5:0588c43e27be46423e183076fa071177',
  66. 'timestamp': 1502250336,
  67. 'upload_date': '20170809',
  68. 'uploader': 'SWM',
  69. 'uploader_id': '9780658',
  70. 'duration': 1063.42,
  71. 'view_count': int,
  72. 'like_count': int,
  73. 'comment_count': int,
  74. 'series': 'Success With Music | SWM',
  75. 'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg',
  76. 'creators': ['SWM'],
  77. },
  78. }, {
  79. 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
  80. 'only_matching': True,
  81. }, {
  82. 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
  83. 'only_matching': True,
  84. }, {
  85. 'note': 'episode',
  86. 'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615',
  87. 'info_dict': {
  88. 'id': '60269615',
  89. 'display_id': 'grunge-music-origins-the-raw-sound-that-',
  90. 'ext': 'mp3',
  91. 'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation',
  92. 'description': str,
  93. 'timestamp': 1717468905,
  94. 'upload_date': '20240604',
  95. 'uploader': 'Katie Brown 2',
  96. 'uploader_id': '17733249',
  97. 'duration': 818.83,
  98. 'view_count': int,
  99. 'like_count': int,
  100. 'comment_count': int,
  101. 'series': '90s Grunge',
  102. 'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg',
  103. 'creators': ['Katie Brown 2'],
  104. },
  105. }, {
  106. 'url': 'https://www.spreaker.com/episode/60269615',
  107. 'only_matching': True,
  108. }]
  109. def _real_extract(self, url):
  110. episode_id = self._match_id(url)
  111. data = self._download_json(
  112. f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id,
  113. query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']
  114. return _extract_episode(data, episode_id)
  115. class SpreakerShowIE(InfoExtractor):
  116. _VALID_URL = [
  117. r'https?://api\.spreaker\.com/show/(?P<id>\d+)',
  118. r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P<id>[\d]+)',
  119. r'https?://(?:www\.)?spreaker\.com/show/(?P<id>\d+)/episodes/feed',
  120. ]
  121. _TESTS = [{
  122. 'url': 'https://api.spreaker.com/show/4652058',
  123. 'info_dict': {
  124. 'id': '4652058',
  125. },
  126. 'playlist_mincount': 118,
  127. }, {
  128. 'url': 'https://www.spreaker.com/podcast/health-wealth--5918323',
  129. 'info_dict': {
  130. 'id': '5918323',
  131. },
  132. 'playlist_mincount': 60,
  133. }, {
  134. 'url': 'https://www.spreaker.com/show/5887186/episodes/feed',
  135. 'info_dict': {
  136. 'id': '5887186',
  137. },
  138. 'playlist_mincount': 290,
  139. }]
  140. def _entries(self, show_id, key=None):
  141. for page_num in itertools.count(1):
  142. episodes = self._download_json(
  143. f'https://api.spreaker.com/show/{show_id}/episodes',
  144. show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({
  145. 'page': page_num,
  146. 'max_per_page': 100,
  147. 'key': key,
  148. }))
  149. pager = try_get(episodes, lambda x: x['response']['pager'], dict)
  150. if not pager:
  151. break
  152. results = pager.get('results')
  153. if not results or not isinstance(results, list):
  154. break
  155. for result in results:
  156. if not isinstance(result, dict):
  157. continue
  158. yield _extract_episode(result)
  159. if page_num == pager.get('last_page'):
  160. break
  161. def _real_extract(self, url):
  162. show_id = self._match_id(url)
  163. key = traverse_obj(parse_qs(url), ('key', 0))
  164. return self.playlist_result(self._entries(show_id, key), playlist_id=show_id)