commit: fab01080f402dbfad00122b73714d92b5d1deb24
parent 42db58ec7367e7ee6555e5f14107712add61d013
Author: Sergey M․ <dstftw@gmail.com>
Date: Mon, 16 Dec 2019 00:08:18 +0700
[tv2dk:bornholm:play] Add extractor (closes #23291)
Diffstat:
2 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -1168,7 +1168,10 @@ from .tv2 import (
TV2ArticleIE,
KatsomoIE,
)
-from .tv2dk import TV2DKIE
+from .tv2dk import (
+ TV2DKIE,
+ TV2DKBornholmPlayIE,
+)
from .tv2hu import TV2HuIE
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py
@@ -1,10 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
-from ..utils import extract_attributes
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ js_to_json,
+ url_or_none,
+)
class TV2DKIE(InfoExtractor):
@@ -80,3 +86,69 @@ class TV2DKIE(InfoExtractor):
'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
video_id=kaltura_id))
return self.playlist_result(entries)
+
+
+class TV2DKBornholmPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
+ 'info_dict': {
+ 'id': '781021',
+ 'ext': 'mp4',
+ 'title': '12Nyheder-27.11.19',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
+ data=json.dumps({
+ 'playlist_id': video_id,
+ 'serienavn': '',
+ }).encode(), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['d']
+
+ # TODO: generalize flowplayer
+ title = self._search_regex(
+ r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
+ group='value')
+ sources = self._parse_json(self._search_regex(
+ r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
+ video_id, js_to_json)
+
+ formats = []
+ srcs = set()
+ for source in sources:
+ src = url_or_none(source.get('src'))
+ if not src:
+ continue
+ if src in srcs:
+ continue
+ srcs.add(src)
+ ext = determine_ext(src)
+ src_type = source.get('type')
+ if src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }