commit: 9ee984fc760c9a3f4818055ff28ea886aadc58cb
parent 53528e1d2385494c72349f609907f0164d6f8431
Author: Remita Amine <remitamine@gmail.com>
Date: Tue, 29 Dec 2020 16:13:36 +0100
[aenetworks] add support for biography.com (closes #3863)
Diffstat:
2 files changed, 73 insertions(+), 26 deletions(-)
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
@@ -6,6 +6,7 @@ import re
from .theplatform import ThePlatformIE
from ..utils import (
ExtractorError,
+ GeoRestrictedError,
int_or_none,
update_url_query,
urlencode_postdata,
@@ -28,6 +29,7 @@ class AENetworksBaseIE(ThePlatformIE):
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
'fyi.tv': ('FYI', 'fyi'),
'historyvault.com': (None, 'historyvault'),
+ 'biography.com': (None, 'biography'),
}
def _extract_aen_smil(self, smil_url, video_id, auth=None):
@@ -54,6 +56,8 @@ class AENetworksBaseIE(ThePlatformIE):
tp_formats, tp_subtitles = self._extract_theplatform_smil(
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
except ExtractorError as e:
+ if isinstance(e, GeoRestrictedError):
+ raise
last_e = e
continue
formats.extend(tp_formats)
@@ -67,6 +71,34 @@ class AENetworksBaseIE(ThePlatformIE):
'subtitles': subtitles,
}
+ def _extract_aetn_info(self, domain, filter_key, filter_value, url):
+ requestor_id, brand = self._DOMAIN_MAP[domain]
+ result = self._download_json(
+ 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
+ filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
+ title = result['title']
+ video_id = result['id']
+ media_url = result['publicUrl']
+ theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
+ r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
+ info = self._parse_theplatform_metadata(theplatform_metadata)
+ auth = None
+ if theplatform_metadata.get('AETN$isBehindWall'):
+ resource = self._get_mvpd_resource(
+ requestor_id, theplatform_metadata['title'],
+ theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
+ theplatform_metadata['ratings'][0]['rating'])
+ auth = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ info.update(self._extract_aen_smil(media_url, video_id, auth))
+ info.update({
+ 'title': title,
+ 'series': result.get('seriesName'),
+ 'season_number': int_or_none(result.get('tvSeasonNumber')),
+ 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
+ })
+ return info
+
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks'
@@ -139,32 +171,7 @@ class AENetworksIE(AENetworksBaseIE):
def _real_extract(self, url):
domain, canonical = re.match(self._VALID_URL, url).groups()
- requestor_id, brand = self._DOMAIN_MAP[domain]
- result = self._download_json(
- 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
- canonical, query={'filter[canonical]': '/' + canonical})['results'][0]
- title = result['title']
- video_id = result['id']
- media_url = result['publicUrl']
- theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
- r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
- info = self._parse_theplatform_metadata(theplatform_metadata)
- auth = None
- if theplatform_metadata.get('AETN$isBehindWall'):
- resource = self._get_mvpd_resource(
- requestor_id, theplatform_metadata['title'],
- theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
- theplatform_metadata['ratings'][0]['rating'])
- auth = self._extract_mvpd_auth(
- url, video_id, requestor_id, resource)
- info.update(self._extract_aen_smil(media_url, video_id, auth))
- info.update({
- 'title': title,
- 'series': result.get('seriesName'),
- 'season_number': int_or_none(result.get('tvSeasonNumber')),
- 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
- })
- return info
+ return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
class AENetworksListBaseIE(AENetworksBaseIE):
@@ -294,3 +301,41 @@ class HistoryTopicIE(AENetworksBaseIE):
return self.url_result(
'http://www.history.com/videos/' + display_id,
AENetworksIE.ie_key())
+
+
+class HistoryPlayerIE(AENetworksBaseIE):
+ IE_NAME = 'history:player'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_aetn_info(domain, 'id', video_id, url)
+
+
+class BiographyIE(AENetworksBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
+ 'info_dict': {
+ 'id': '30322987',
+ 'ext': 'mp4',
+ 'title': 'Vincent Van Gogh - Full Episode',
+ 'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
+ 'timestamp': 1311970571,
+ 'upload_date': '20110729',
+ 'uploader': 'AENE-NEW',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_url = self._search_regex(
+ r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
+ webpage, 'player URL')
+ return self.url_result(player_url, HistoryPlayerIE.ie_key())
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -33,6 +33,8 @@ from .aenetworks import (
AENetworksCollectionIE,
AENetworksShowIE,
HistoryTopicIE,
+ HistoryPlayerIE,
+ BiographyIE,
)
from .afreecatv import AfreecaTVIE
from .airmozilla import AirMozillaIE