logo

youtube-dl

[mirror] Download/Watch videos from video hostersgit clone https://hacktivis.me/git/mirror/youtube-dl.git

simplecast.py (6223B)


  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. clean_podcast_url,
  7. int_or_none,
  8. parse_iso8601,
  9. strip_or_none,
  10. try_get,
  11. urlencode_postdata,
  12. )
  13. class SimplecastBaseIE(InfoExtractor):
  14. _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
  15. _API_BASE = 'https://api.simplecast.com/'
  16. def _call_api(self, path_tmpl, video_id):
  17. return self._download_json(
  18. self._API_BASE + path_tmpl % video_id, video_id)
  19. def _call_search_api(self, resource, resource_id, resource_url):
  20. return self._download_json(
  21. 'https://api.simplecast.com/%ss/search' % resource, resource_id,
  22. data=urlencode_postdata({'url': resource_url}))
  23. def _parse_episode(self, episode):
  24. episode_id = episode['id']
  25. title = episode['title'].strip()
  26. audio_file = episode.get('audio_file') or {}
  27. audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
  28. season = episode.get('season') or {}
  29. season_href = season.get('href')
  30. season_id = None
  31. if season_href:
  32. season_id = self._search_regex(
  33. r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
  34. season_href, 'season id', default=None)
  35. webpage_url = episode.get('episode_url')
  36. channel_url = None
  37. if webpage_url:
  38. channel_url = self._search_regex(
  39. r'(https?://[^/]+\.simplecast\.com)',
  40. webpage_url, 'channel url', default=None)
  41. return {
  42. 'id': episode_id,
  43. 'display_id': episode.get('slug'),
  44. 'title': title,
  45. 'url': clean_podcast_url(audio_file_url),
  46. 'webpage_url': webpage_url,
  47. 'channel_url': channel_url,
  48. 'series': try_get(episode, lambda x: x['podcast']['title']),
  49. 'season_number': int_or_none(season.get('number')),
  50. 'season_id': season_id,
  51. 'thumbnail': episode.get('image_url'),
  52. 'episode_id': episode_id,
  53. 'episode_number': int_or_none(episode.get('number')),
  54. 'description': strip_or_none(episode.get('description')),
  55. 'timestamp': parse_iso8601(episode.get('published_at')),
  56. 'duration': int_or_none(episode.get('duration')),
  57. 'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
  58. }
  59. class SimplecastIE(SimplecastBaseIE):
  60. IE_NAME = 'simplecast'
  61. _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
  62. _COMMON_TEST_INFO = {
  63. 'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
  64. 'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
  65. 'ext': 'mp3',
  66. 'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
  67. 'episode_number': 1,
  68. 'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
  69. 'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
  70. 'season_number': 1,
  71. 'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
  72. 'series': 'The RE:BIND.io Podcast',
  73. 'duration': 5343,
  74. 'timestamp': 1580979475,
  75. 'upload_date': '20200206',
  76. 'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
  77. 'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
  78. }
  79. _TESTS = [{
  80. 'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
  81. 'md5': '8c93be7be54251bf29ee97464eabe61c',
  82. 'info_dict': _COMMON_TEST_INFO,
  83. }, {
  84. 'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
  85. 'only_matching': True,
  86. }]
  87. @staticmethod
  88. def _extract_urls(webpage):
  89. return re.findall(
  90. r'''(?x)<iframe[^>]+src=["\']
  91. (
  92. https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
  93. player\.simplecast\.com/%s
  94. ))''' % SimplecastBaseIE._UUID_REGEX, webpage)
  95. def _real_extract(self, url):
  96. episode_id = self._match_id(url)
  97. episode = self._call_api('episodes/%s', episode_id)
  98. return self._parse_episode(episode)
  99. class SimplecastEpisodeIE(SimplecastBaseIE):
  100. IE_NAME = 'simplecast:episode'
  101. _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
  102. _TEST = {
  103. 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
  104. 'md5': '8c93be7be54251bf29ee97464eabe61c',
  105. 'info_dict': SimplecastIE._COMMON_TEST_INFO,
  106. }
  107. def _real_extract(self, url):
  108. mobj = re.match(self._VALID_URL, url)
  109. episode = self._call_search_api(
  110. 'episode', mobj.group(1), mobj.group(0))
  111. return self._parse_episode(episode)
  112. class SimplecastPodcastIE(SimplecastBaseIE):
  113. IE_NAME = 'simplecast:podcast'
  114. _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
  115. _TESTS = [{
  116. 'url': 'https://the-re-bind-io-podcast.simplecast.com',
  117. 'playlist_mincount': 33,
  118. 'info_dict': {
  119. 'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
  120. 'title': 'The RE:BIND.io Podcast',
  121. },
  122. }, {
  123. 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
  124. 'only_matching': True,
  125. }]
  126. def _real_extract(self, url):
  127. subdomain = self._match_id(url)
  128. site = self._call_search_api('site', subdomain, url)
  129. podcast = site['podcast']
  130. podcast_id = podcast['id']
  131. podcast_title = podcast.get('title')
  132. def entries():
  133. episodes = self._call_api('podcasts/%s/episodes', podcast_id)
  134. for episode in (episodes.get('collection') or []):
  135. info = self._parse_episode(episode)
  136. info['series'] = podcast_title
  137. yield info
  138. return self.playlist_result(entries(), podcast_id, podcast_title)