logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

spreaker.py (6030B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import itertools
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. float_or_none,
  8. int_or_none,
  9. str_or_none,
  10. try_get,
  11. unified_timestamp,
  12. url_or_none,
  13. )
  14. def _extract_episode(data, episode_id=None):
  15. title = data['title']
  16. download_url = data['download_url']
  17. series = try_get(data, lambda x: x['show']['title'], compat_str)
  18. uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
  19. thumbnails = []
  20. for image in ('image_original', 'image_medium', 'image'):
  21. image_url = url_or_none(data.get('%s_url' % image))
  22. if image_url:
  23. thumbnails.append({'url': image_url})
  24. def stats(key):
  25. return int_or_none(try_get(
  26. data,
  27. (lambda x: x['%ss_count' % key],
  28. lambda x: x['stats']['%ss' % key])))
  29. def duration(key):
  30. return float_or_none(data.get(key), scale=1000)
  31. return {
  32. 'id': compat_str(episode_id or data['episode_id']),
  33. 'url': download_url,
  34. 'display_id': data.get('permalink'),
  35. 'title': title,
  36. 'description': data.get('description'),
  37. 'timestamp': unified_timestamp(data.get('published_at')),
  38. 'uploader': uploader,
  39. 'uploader_id': str_or_none(data.get('author_id')),
  40. 'creator': uploader,
  41. 'duration': duration('duration') or duration('length'),
  42. 'view_count': stats('play'),
  43. 'like_count': stats('like'),
  44. 'comment_count': stats('message'),
  45. 'format': 'MPEG Layer 3',
  46. 'format_id': 'mp3',
  47. 'container': 'mp3',
  48. 'ext': 'mp3',
  49. 'thumbnails': thumbnails,
  50. 'series': series,
  51. 'extractor_key': SpreakerIE.ie_key(),
  52. }
  53. class SpreakerIE(InfoExtractor):
  54. _VALID_URL = r'''(?x)
  55. https?://
  56. api\.spreaker\.com/
  57. (?:
  58. (?:download/)?episode|
  59. v2/episodes
  60. )/
  61. (?P<id>\d+)
  62. '''
  63. _TESTS = [{
  64. 'url': 'https://api.spreaker.com/episode/12534508',
  65. 'info_dict': {
  66. 'id': '12534508',
  67. 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
  68. 'ext': 'mp3',
  69. 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
  70. 'description': 'md5:0588c43e27be46423e183076fa071177',
  71. 'timestamp': 1502250336,
  72. 'upload_date': '20170809',
  73. 'uploader': 'SWM',
  74. 'uploader_id': '9780658',
  75. 'duration': 1063.42,
  76. 'view_count': int,
  77. 'like_count': int,
  78. 'comment_count': int,
  79. 'series': 'Success With Music (SWM)',
  80. },
  81. }, {
  82. 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
  83. 'only_matching': True,
  84. }, {
  85. 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
  86. 'only_matching': True,
  87. }]
  88. def _real_extract(self, url):
  89. episode_id = self._match_id(url)
  90. data = self._download_json(
  91. 'https://api.spreaker.com/v2/episodes/%s' % episode_id,
  92. episode_id)['response']['episode']
  93. return _extract_episode(data, episode_id)
  94. class SpreakerPageIE(InfoExtractor):
  95. _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
  96. _TESTS = [{
  97. 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
  98. 'only_matching': True,
  99. }]
  100. def _real_extract(self, url):
  101. display_id = self._match_id(url)
  102. webpage = self._download_webpage(url, display_id)
  103. episode_id = self._search_regex(
  104. (r'data-episode_id=["\'](?P<id>\d+)',
  105. r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
  106. return self.url_result(
  107. 'https://api.spreaker.com/episode/%s' % episode_id,
  108. ie=SpreakerIE.ie_key(), video_id=episode_id)
  109. class SpreakerShowIE(InfoExtractor):
  110. _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
  111. _TESTS = [{
  112. 'url': 'https://api.spreaker.com/show/4652058',
  113. 'info_dict': {
  114. 'id': '4652058',
  115. },
  116. 'playlist_mincount': 118,
  117. }]
  118. def _entries(self, show_id):
  119. for page_num in itertools.count(1):
  120. episodes = self._download_json(
  121. 'https://api.spreaker.com/show/%s/episodes' % show_id,
  122. show_id, note='Downloading JSON page %d' % page_num, query={
  123. 'page': page_num,
  124. 'max_per_page': 100,
  125. })
  126. pager = try_get(episodes, lambda x: x['response']['pager'], dict)
  127. if not pager:
  128. break
  129. results = pager.get('results')
  130. if not results or not isinstance(results, list):
  131. break
  132. for result in results:
  133. if not isinstance(result, dict):
  134. continue
  135. yield _extract_episode(result)
  136. if page_num == pager.get('last_page'):
  137. break
  138. def _real_extract(self, url):
  139. show_id = self._match_id(url)
  140. return self.playlist_result(self._entries(show_id), playlist_id=show_id)
  141. class SpreakerShowPageIE(InfoExtractor):
  142. _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
  143. _TESTS = [{
  144. 'url': 'https://www.spreaker.com/show/success-with-music',
  145. 'only_matching': True,
  146. }]
  147. def _real_extract(self, url):
  148. display_id = self._match_id(url)
  149. webpage = self._download_webpage(url, display_id)
  150. show_id = self._search_regex(
  151. r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
  152. return self.url_result(
  153. 'https://api.spreaker.com/show/%s' % show_id,
  154. ie=SpreakerShowIE.ie_key(), video_id=show_id)