commit: ac61f2e0581ad15727870e8dd9a80ddacf01636e
parent 8487e8b98afd1b469c2b9d29ee53bd173ff9a7e0
Author: Remita Amine <remitamine@gmail.com>
Date: Mon, 4 Jan 2021 01:09:11 +0100
[applepodcasts] Add new extractor(#25918)
Diffstat:
2 files changed, 62 insertions(+), 0 deletions(-)
diff --git a/youtube_dl/extractor/applepodcasts.py b/youtube_dl/extractor/applepodcasts.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_podcast_url,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class ApplePodcastsIE(InfoExtractor):
+ _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
+ 'md5': 'df02e6acb11c10e844946a39e7222b08',
+ 'info_dict': {
+ 'id': '1000482637777',
+ 'ext': 'mp3',
+ 'title': '207 - Whitney Webb Returns',
+ 'description': 'md5:13a73bade02d2e43737751e3987e1399',
+ 'upload_date': '20200705',
+ 'timestamp': 1593921600,
+ 'duration': 6425,
+ 'series': 'The Tim Dillon Show',
+ }
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ webpage = self._download_webpage(url, episode_id)
+ ember_data = self._parse_json(self._search_regex(
+ r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
+ webpage, 'ember data'), episode_id)
+ episode = ember_data['data']['attributes']
+ description = episode.get('description') or {}
+
+ series = None
+ for inc in (ember_data.get('included') or []):
+ if inc.get('type') == 'media/podcast':
+ series = try_get(inc, lambda x: x['attributes']['name'])
+
+ return {
+ 'id': episode_id,
+ 'title': episode['name'],
+ 'url': clean_podcast_url(episode['assetUrl']),
+ 'description': description.get('standard') or description.get('short'),
+ 'timestamp': parse_iso8601(episode.get('releaseDateTime')),
+ 'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
+ 'series': series,
+ }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -55,6 +55,7 @@ from .appletrailers import (
AppleTrailersIE,
AppleTrailersSectionIE,
)
+from .applepodcasts import ApplePodcastsIE
from .archiveorg import ArchiveOrgIE
from .arcpublishing import ArcPublishingIE
from .arkena import ArkenaIE