commit: 286e5d6724f86ace38f6ccf0e33c511eeb1ab65d
parent 395981288ba0b2e1afabd4e595cb9c959ef62356
Author: Remita Amine <remitamine@gmail.com>
Date: Mon, 25 Jan 2021 14:46:04 +0100
[tv2] improve MTV Uutiset Article extraction
Diffstat:
2 files changed, 51 insertions(+), 24 deletions(-)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -1260,7 +1260,7 @@ from .tv2 import (
TV2IE,
TV2ArticleIE,
KatsomoIE,
- MTVuutisetIE,
+ MTVUutisetArticleIE,
)
from .tv2dk import (
TV2DKIE,
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py
@@ -20,7 +20,7 @@ from ..utils import (
class TV2IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.tv2.no/v/916509/',
'info_dict': {
'id': '916509',
@@ -33,7 +33,7 @@ class TV2IE(InfoExtractor):
'view_count': int,
'categories': list,
},
- }
+ }]
_API_DOMAIN = 'sumo.tv2.no'
_PROTOCOLS = ('HDS', 'HLS', 'DASH')
_GEO_COUNTRIES = ['NO']
@@ -42,6 +42,12 @@ class TV2IE(InfoExtractor):
video_id = self._match_id(url)
api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
+ asset = self._download_json(
+ api_base + '.json', video_id,
+ 'Downloading metadata JSON')['asset']
+ title = asset.get('subtitle') or asset['title']
+ is_live = asset.get('live') is True
+
formats = []
format_urls = []
for protocol in self._PROTOCOLS:
@@ -81,7 +87,8 @@ class TV2IE(InfoExtractor):
elif ext == 'm3u8':
if not data.get('drmProtected'):
formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ video_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
m3u8_id=format_id, fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
@@ -99,11 +106,6 @@ class TV2IE(InfoExtractor):
raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats)
- asset = self._download_json(
- api_base + '.json', video_id,
- 'Downloading metadata JSON')['asset']
- title = asset['title']
-
thumbnails = [{
'id': thumbnail.get('@type'),
'url': thumbnail.get('url'),
@@ -112,7 +114,7 @@ class TV2IE(InfoExtractor):
return {
'id': video_id,
'url': video_url,
- 'title': title,
+ 'title': self._live_title(title) if is_live else title,
'description': strip_or_none(asset.get('description')),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(asset.get('createTime')),
@@ -120,6 +122,7 @@ class TV2IE(InfoExtractor):
'view_count': int_or_none(asset.get('views')),
'categories': asset.get('keywords', '').split(','),
'formats': formats,
+ 'is_live': is_live,
}
@@ -168,13 +171,13 @@ class TV2ArticleIE(InfoExtractor):
class KatsomoIE(TV2IE):
- _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
+ _TESTS = [{
'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
'info_dict': {
'id': '1181321',
'ext': 'mp4',
- 'title': 'MTV Uutiset Live',
+ 'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
'description': 'Päätöksen teki Pelicansin hallitus.',
'timestamp': 1575116484,
'upload_date': '20191130',
@@ -186,20 +189,29 @@ class KatsomoIE(TV2IE):
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mtvuutiset.fi/video/prog1311159',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.katsomo.fi/#!/jakso/1311159',
+ 'only_matching': True,
+ }]
_API_DOMAIN = 'api.katsomo.fi'
_PROTOCOLS = ('HLS', 'MPD')
_GEO_COUNTRIES = ['FI']
-class MTVuutisetIE(KatsomoIE):
- _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/(?:artikkeli/[0-9a-z-]+/|video/prog)(?P<id>\d+)'
- _TEST = {
+class MTVUutisetArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
+ _TESTS = [{
'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
'info_dict': {
'id': '1311159',
'ext': 'mp4',
- 'title': 'MTV Uutiset Live',
+ 'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
'timestamp': 1600608966,
'upload_date': '20200920',
@@ -211,11 +223,26 @@ class MTVuutisetIE(KatsomoIE):
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ # multiple Youtube embeds
+ 'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- art_id = self._match_id(url)
- webpage = self._download_webpage(url, art_id)
- video_id = self._html_search_regex(
- r'<div class=\'player-container\' .*data-katsomoid="(.+?)"', webpage, 'video_id')
- return self.url_result("http://mtv.fi/a/0/a/%s" % video_id, video_id=video_id, ie="Katsomo")
+ article_id = self._match_id(url)
+ article = self._download_json(
+ 'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
+ article_id)
+
+ def entries():
+ for video in (article.get('videos') or []):
+ video_type = video.get('videotype')
+ video_url = video.get('url')
+ if not (video_url and video_type in ('katsomo', 'youtube')):
+ continue
+ yield self.url_result(
+ video_url, video_type.capitalize(), video.get('video_id'))
+
+ return self.playlist_result(
+ entries(), article_id, article.get('title'), article.get('description'))