commit: 5e95e18ce949e759e8e26de76c386c44e50b2abd
parent e91df0c5501bd5df9987e310a37df51129ee1cf2
Author: Remita Amine <remitamine@gmail.com>
Date: Fri, 4 Dec 2020 18:04:38 +0100
[nrk] improve format extraction and geo-restriction detection (closes #24221)
Diffstat:
1 file changed, 21 insertions(+), 22 deletions(-)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
@@ -24,6 +24,11 @@ from ..utils import (
class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO']
+ def _extract_nrk_formats(self, asset_url, video_id):
+ return self._extract_m3u8_formats(
+ re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url),
+ video_id, 'mp4', 'm3u8_native', fatal=False)
+
class NRKIE(NRKBaseIE):
_VALID_URL = r'''(?x)
@@ -94,9 +99,7 @@ class NRKIE(NRKBaseIE):
if not format_url:
continue
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_nrk_formats(format_url, video_id))
self._sort_formats(formats)
data = self._download_json(
@@ -298,6 +301,7 @@ class NRKTVIE(NRKBaseIE):
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
+ urls = []
entries = []
conviva = data.get('convivaStatistics') or {}
@@ -314,19 +318,13 @@ class NRKTVIE(NRKBaseIE):
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
for num, asset in enumerate(media_assets, 1):
asset_url = asset.get('url')
- if not asset_url:
+ if not asset_url or asset_url in urls:
continue
- formats = self._extract_akamai_formats(asset_url, video_id)
+ formats = extract_nrk_formats(asset_url, video_id)
if not formats:
continue
self._sort_formats(formats)
- # Some f4m streams may not work with hdcore in fragments' URLs
- for f in formats:
- extra_param = f.get('extra_param_to_segment_url')
- if extra_param and 'hdcore' in extra_param:
- del f['extra_param_to_segment_url']
-
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
@@ -346,16 +344,17 @@ class NRKTVIE(NRKBaseIE):
if not entries:
media_url = data.get('mediaUrl')
- if media_url:
- formats = self._extract_akamai_formats(media_url, video_id)
- self._sort_formats(formats)
- duration = parse_duration(data.get('duration'))
- entries = [{
- 'id': video_id,
- 'title': make_title(title),
- 'duration': duration,
- 'formats': formats,
- }]
+ if media_url and media_url not in urls:
+ formats = extract_nrk_formats(media_url, video_id)
+ if formats:
+ self._sort_formats(formats)
+ duration = parse_duration(data.get('duration'))
+ entries = [{
+ 'id': video_id,
+ 'title': make_title(title),
+ 'duration': duration,
+ 'formats': formats,
+ }]
if not entries:
MESSAGES = {
@@ -366,7 +365,7 @@ class NRKTVIE(NRKBaseIE):
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
- if 'IsGeoBlocked' in message_type:
+ if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is Trues:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)