commit: f2c704e1126d614a7cd3283ed94a2226587d7a33
parent 5e822c252627905036b164cdbc1220dc7b1f5d08
Author: Sergey M․ <dstftw@gmail.com>
Date: Sun, 6 Dec 2020 23:08:03 +0700
[generic] Extract RSS video itunes metadata
Diffstat:
1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
@@ -20,12 +20,14 @@ from ..utils import (
ExtractorError,
float_or_none,
HEADRequest,
+ int_or_none,
is_html,
js_to_json,
KNOWN_EXTENSIONS,
merge_dicts,
mimetype2ext,
orderedSet,
+ parse_duration,
sanitized_Request,
smuggle_url,
unescapeHTML,
@@ -33,7 +35,9 @@ from ..utils import (
unified_timestamp,
unsmuggle_url,
UnsupportedError,
+ url_or_none,
xpath_text,
+ xpath_with_ns,
)
from .commonprotocols import RtmpIE
from .brightcove import (
@@ -206,10 +210,12 @@ class GenericIE(InfoExtractor):
'playlist': [{
'info_dict': {
'ext': 'mov',
- 'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
- 'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
+ 'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
+ 'title': 're:MSNBC Rachel Maddow',
'description': 're:.*her unique approach to storytelling.*',
- 'upload_date': '20201204',
+ 'timestamp': int,
+ 'upload_date': compat_str,
+ 'duration': float,
},
}],
},
@@ -2189,6 +2195,10 @@ class GenericIE(InfoExtractor):
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
+ NS_MAP = {
+ 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
+ }
+
entries = []
for it in doc.findall('./channel/item'):
next_url = None
@@ -2204,6 +2214,20 @@ class GenericIE(InfoExtractor):
if not next_url:
continue
+ def itunes(key):
+ return xpath_text(
+ it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
+ default=None)
+
+ duration = itunes('duration')
+ explicit = itunes('explicit')
+ if explicit == 'true':
+ age_limit = 18
+ elif explicit == 'false':
+ age_limit = 0
+ else:
+ age_limit = None
+
entries.append({
'_type': 'url_transparent',
'url': next_url,
@@ -2211,6 +2235,12 @@ class GenericIE(InfoExtractor):
'description': xpath_text(it, 'description', default=None),
'timestamp': unified_timestamp(
xpath_text(it, 'pubDate', default=None)),
+ 'duration': int_or_none(duration) or parse_duration(duration),
+ 'thumbnail': url_or_none(itunes('image')),
+ 'episode': itunes('title'),
+ 'episode_number': int_or_none(itunes('episode')),
+ 'season_number': int_or_none(itunes('season')),
+ 'age_limit': age_limit,
})
return {