[nrktv] Switch to playback endpoint - youtube-dl - [mirror] Download/Watch videos from video hosters

commit: 2df93a0c4ada8eff399844ac4a249a743e0a980d
parent 75972e200d033429bf9d34793ad3ffc813042347
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sun,  3 Jan 2021 05:01:45 +0700

[nrktv] Switch to playback endpoint

mediaelement endpoint is no longer in use.

Diffstat:
M youtube_dl/extractor/nrk.py 273 +++++++++++++++++--------------------------------------------------------------

1 file changed, 57 insertions(+), 216 deletions(-)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
@@ -6,15 +6,11 @@ import random
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_urllib_parse_unquote,
-)
+from ..compat import compat_str
 from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
-    parse_age_limit,
     parse_duration,
     try_get,
     urljoin,
@@ -63,60 +59,8 @@ class NRKBaseIE(InfoExtractor):
         return self._download_json(
             urljoin('http://psapi.nrk.no/', path),
             video_id, note or 'Downloading %s JSON' % item,
-            fatal=fatal, query=query)
-
-
-class NRKIE(NRKBaseIE):
-    _VALID_URL = r'''(?x)
-                        (?:
-                            nrk:|
-                            https?://
-                                (?:
-                                    (?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
-                                    v8[-.]psapi\.nrk\.no/mediaelement/
-                                )
-                            )
-                            (?P<id>[^?\#&]+)
-                        '''
-
-    _TESTS = [{
-        # video
-        'url': 'http://www.nrk.no/video/PS*150533',
-        'md5': 'f46be075326e23ad0e524edfcb06aeb6',
-        'info_dict': {
-            'id': '150533',
-            'ext': 'mp4',
-            'title': 'Dompap og andre fugler i Piip-Show',
-            'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
-            'duration': 262,
-        }
-    }, {
-        # audio
-        'url': 'http://www.nrk.no/video/PS*154915',
-        # MD5 is unstable
-        'info_dict': {
-            'id': '154915',
-            'ext': 'mp4',
-            'title': 'Slik høres internett ut når du er blind',
-            'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
-            'duration': 20,
-        }
-    }, {
-        'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
-        'only_matching': True,
-    }, {
-        'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
-        'only_matching': True,
-    }, {
-        'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
-        'only_matching': True,
-    }]
+            fatal=fatal, query=query,
+            headers={'Accept-Encoding': 'gzip, deflate, br'})
 
     def _extract_from_playback(self, video_id):
         path_templ = 'playback/%s/' + video_id
@@ -178,6 +122,59 @@ class NRKIE(NRKBaseIE):
             'formats': formats,
         }
 
+
+class NRKIE(NRKBaseIE):
+    _VALID_URL = r'''(?x)
+                        (?:
+                            nrk:|
+                            https?://
+                                (?:
+                                    (?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
+                                    v8[-.]psapi\.nrk\.no/mediaelement/
+                                )
+                            )
+                            (?P<id>[^?\#&]+)
+                        '''
+
+    _TESTS = [{
+        # video
+        'url': 'http://www.nrk.no/video/PS*150533',
+        'md5': 'f46be075326e23ad0e524edfcb06aeb6',
+        'info_dict': {
+            'id': '150533',
+            'ext': 'mp4',
+            'title': 'Dompap og andre fugler i Piip-Show',
+            'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+            'duration': 262,
+        }
+    }, {
+        # audio
+        'url': 'http://www.nrk.no/video/PS*154915',
+        # MD5 is unstable
+        'info_dict': {
+            'id': '154915',
+            'ext': 'mp4',
+            'title': 'Slik høres internett ut når du er blind',
+            'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+            'duration': 20,
+        }
+    }, {
+        'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+        'only_matching': True,
+    }, {
+        'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
+        'only_matching': True,
+    }, {
+        'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
+        'only_matching': True,
+    }]
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         return self._extract_from_playback(video_id)
@@ -187,7 +184,6 @@ class NRKTVIE(NRKBaseIE):
     IE_DESC = 'NRK TV and NRK Radio'
     _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
     _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
-    _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
     _TESTS = [{
         'url': 'https://tv.nrk.no/program/MDDP12000117',
         'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
@@ -290,164 +286,9 @@ class NRKTVIE(NRKBaseIE):
         'only_matching': True,
     }]
 
-    _api_host = None
-
-    def _extract_from_mediaelement(self, video_id):
-        api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
-
-        for api_host in api_hosts:
-            data = self._download_json(
-                'http://%s/mediaelement/%s' % (api_host, video_id),
-                video_id, 'Downloading mediaelement JSON',
-                fatal=api_host == api_hosts[-1])
-            if not data:
-                continue
-            self._api_host = api_host
-            break
-
-        title = data.get('fullTitle') or data.get('mainTitle') or data['title']
-        video_id = data.get('id') or video_id
-
-        urls = []
-        entries = []
-
-        conviva = data.get('convivaStatistics') or {}
-        live = (data.get('mediaElementType') == 'Live'
-                or data.get('isLive') is True or conviva.get('isLive'))
-
-        def make_title(t):
-            return self._live_title(t) if live else t
-
-        media_assets = data.get('mediaAssets')
-        if media_assets and isinstance(media_assets, list):
-            def video_id_and_title(idx):
-                return ((video_id, title) if len(media_assets) == 1
-                        else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
-            for num, asset in enumerate(media_assets, 1):
-                asset_url = asset.get('url')
-                if not asset_url or asset_url in urls:
-                    continue
-                urls.append(asset_url)
-                formats = self._extract_nrk_formats(asset_url, video_id)
-                if not formats:
-                    continue
-                self._sort_formats(formats)
-
-                entry_id, entry_title = video_id_and_title(num)
-                duration = parse_duration(asset.get('duration'))
-                subtitles = {}
-                for subtitle in ('webVtt', 'timedText'):
-                    subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
-                    if subtitle_url:
-                        subtitles.setdefault('no', []).append({
-                            'url': compat_urllib_parse_unquote(subtitle_url)
-                        })
-                entries.append({
-                    'id': asset.get('carrierId') or entry_id,
-                    'title': make_title(entry_title),
-                    'duration': duration,
-                    'subtitles': subtitles,
-                    'formats': formats,
-                    'is_live': live,
-                })
-
-        if not entries:
-            media_url = data.get('mediaUrl')
-            if media_url and media_url not in urls:
-                formats = self._extract_nrk_formats(media_url, video_id)
-                if formats:
-                    self._sort_formats(formats)
-                    duration = parse_duration(data.get('duration'))
-                    entries = [{
-                        'id': video_id,
-                        'title': make_title(title),
-                        'duration': duration,
-                        'formats': formats,
-                        'is_live': live,
-                    }]
-
-        if not entries:
-            self._raise_error(data)
-
-        series = conviva.get('seriesName') or data.get('seriesTitle')
-        episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
-
-        season_number = None
-        episode_number = None
-        if data.get('mediaElementType') == 'Episode':
-            _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
-                data.get('relativeOriginUrl', '')
-            EPISODENUM_RE = [
-                r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
-                r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
-            ]
-            season_number = int_or_none(self._search_regex(
-                EPISODENUM_RE, _season_episode, 'season number',
-                default=None, group='season'))
-            episode_number = int_or_none(self._search_regex(
-                EPISODENUM_RE, _season_episode, 'episode number',
-                default=None, group='episode'))
-
-        thumbnails = None
-        images = data.get('images')
-        if images and isinstance(images, dict):
-            web_images = images.get('webImages')
-            if isinstance(web_images, list):
-                thumbnails = [{
-                    'url': image['imageUrl'],
-                    'width': int_or_none(image.get('width')),
-                    'height': int_or_none(image.get('height')),
-                } for image in web_images if image.get('imageUrl')]
-
-        description = data.get('description')
-        category = data.get('mediaAnalytics', {}).get('category')
-
-        common_info = {
-            'description': description,
-            'series': series,
-            'episode': episode,
-            'season_number': season_number,
-            'episode_number': episode_number,
-            'categories': [category] if category else None,
-            'age_limit': parse_age_limit(data.get('legalAge')),
-            'thumbnails': thumbnails,
-        }
-
-        vcodec = 'none' if data.get('mediaType') == 'Audio' else None
-
-        for entry in entries:
-            entry.update(common_info)
-            for f in entry['formats']:
-                f['vcodec'] = vcodec
-
-        points = data.get('shortIndexPoints')
-        if isinstance(points, list):
-            chapters = []
-            for next_num, point in enumerate(points, start=1):
-                if not isinstance(point, dict):
-                    continue
-                start_time = parse_duration(point.get('startPoint'))
-                if start_time is None:
-                    continue
-                end_time = parse_duration(
-                    data.get('duration')
-                    if next_num == len(points)
-                    else points[next_num].get('startPoint'))
-                if end_time is None:
-                    continue
-                chapters.append({
-                    'start_time': start_time,
-                    'end_time': end_time,
-                    'title': point.get('title'),
-                })
-            if chapters and len(entries) == 1:
-                entries[0]['chapters'] = chapters
-
-        return self.playlist_result(entries, video_id, title, description)
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        return self._extract_from_mediaelement(video_id)
+        return self._extract_from_playback(video_id)
 
 
 class NRKTVEpisodeIE(InfoExtractor):