commit: c32a059f52ccb76703785dccdd91ae9ce947a824
parent 6911312e53dc2b2ac76450741e7e86d963cb939a
Author: Remita Amine <remitamine@gmail.com>
Date: Sat, 26 Dec 2020 16:54:24 +0100
[bbc] switch to media selector v6
closes #23232
closes #23933
closes #26303
closes #26432
closes #26821
closes #27538
Diffstat:
1 file changed, 20 insertions(+), 54 deletions(-)
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
@@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
_LOGIN_URL = 'https://account.bbc.com/signin'
_NETRC_MACHINE = 'bbc'
- _MEDIASELECTOR_URLS = [
+ _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
+ _MEDIA_SETS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g.
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
+ 'iptv-all',
+ 'pc',
]
- _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
- _NAMESPACES = (
- _MEDIASELECTION_NS,
- _EMP_PLAYLIST_NS,
- )
-
_TESTS = [
{
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
@@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
'only_matching': True,
}]
- _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
-
def _login(self):
username, password = self._get_login_info()
if username is None:
@@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
def _extract_items(self, playlist):
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
- def _findall_ns(self, element, xpath):
- elements = []
- for ns in self._NAMESPACES:
- elements.extend(element.findall(xpath % ns))
- return elements
-
def _extract_medias(self, media_selection):
- error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
- if error is None:
- media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
- if error is not None:
- raise BBCCoUkIE.MediaSelectionError(error.get('id'))
- return self._findall_ns(media_selection, './{%s}media')
+ error = media_selection.get('result')
+ if error:
+ raise BBCCoUkIE.MediaSelectionError(error)
+ return media_selection.get('media') or []
def _extract_connections(self, media):
- return self._findall_ns(media, './{%s}connection')
+ return media.get('connection') or []
def _get_subtitles(self, media, programme_id):
subtitles = {}
@@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
cc_url, programme_id, 'Downloading captions', fatal=False)
if not isinstance(captions, compat_etree_Element):
continue
- lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
- subtitles[lang] = [
+ subtitles['en'] = [
{
'url': connection.get('href'),
'ext': 'ttml',
},
]
+ break
return subtitles
def _raise_extractor_error(self, media_selection_error):
@@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):
def _download_media_selector(self, programme_id):
last_exception = None
- for mediaselector_url in self._MEDIASELECTOR_URLS:
+ for media_set in self._MEDIA_SETS:
try:
return self._download_media_selector_url(
- mediaselector_url % programme_id, programme_id)
+ self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e
@@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
self._raise_extractor_error(last_exception)
def _download_media_selector_url(self, url, programme_id=None):
- media_selection = self._download_xml(
- url, programme_id, 'Downloading media selection XML',
+ media_selection = self._download_json(
+ url, programme_id, 'Downloading media selection JSON',
expected_status=(403, 404))
return self._process_media_selector(media_selection, programme_id)
@@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
if kind in ('video', 'audio'):
bitrate = int_or_none(media.get('bitrate'))
encoding = media.get('encoding')
- service = media.get('service')
width = int_or_none(media.get('width'))
height = int_or_none(media.get('height'))
file_size = int_or_none(media.get('media_file_size'))
@@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
supplier = connection.get('supplier')
transfer_format = connection.get('transferFormat')
format_id = supplier or conn_kind or protocol
- if service:
- format_id = '%s_%s' % (service, format_id)
# ASX playlist
if supplier == 'asx':
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
@@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False))
- if re.search(self._USP_RE, href):
- usp_formats = self._extract_m3u8_formats(
- re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
- programme_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id=format_id, fatal=False)
- for f in usp_formats:
- if f.get('height') and f['height'] > 720:
- continue
- formats.append(f)
elif transfer_format == 'hds':
formats.extend(self._extract_f4m_formats(
href, programme_id, f4m_id=format_id, fatal=False))
else:
- if not service and not supplier and bitrate:
+ if not supplier and bitrate:
format_id += '-%d' % bitrate
fmt = {
'format_id': format_id,
@@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page')
error = self._search_regex(
- r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
+ r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
webpage, 'error', default=None)
if error:
raise ExtractorError(error, expected=True)
@@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
IE_DESC = 'BBC'
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
- _MEDIASELECTOR_URLS = [
- # Provides HQ HLS streams but fails with geolocation in some cases when it's
- # even not geo restricted at all
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
- # Provides more formats, namely direct mp4 links, but fails on some videos with
- # notukerror for non UK (?) users (e.g.
- # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
- 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
- # Provides fewer formats, but works everywhere for everybody (hopefully)
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
+ _MEDIA_SETS = [
+ 'mobile-tablet-main',
+ 'pc',
]
_TESTS = [{