commit: 172754131578f6042efa7c47a57c6e8531e3d190
parent 45b0a0d11b9bc67895507629f8f0a81c5835f51f
Author: Sergey M․ <dstftw@gmail.com>
Date: Sun, 13 Dec 2020 20:24:13 +0700
[extractor/common] Improve JSON-LD interaction statistic extraction (refs #23306)
Diffstat:
2 files changed, 60 insertions(+), 2 deletions(-)
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
@@ -98,6 +98,56 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
+ def test_search_json_ld_realworld(self):
+ # https://github.com/ytdl-org/youtube-dl/issues/23306
+ expect_dict(
+ self,
+ self.ie._search_json_ld(r'''<script type="application/ld+json">
+{
+"@context": "http://schema.org/",
+"@type": "VideoObject",
+"name": "1 On 1 With Kleio",
+"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
+"duration": "PT0H12M23S",
+"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
+"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
+"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
+"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
+"width": "1920",
+"height": "1080",
+"encodingFormat": "mp4",
+"bitrate": "6617kbps",
+"isFamilyFriendly": "False",
+"description": "Kleio Valentien",
+"uploadDate": "2015-12-05T21:24:35+01:00",
+"interactionStatistic": {
+"@type": "InteractionCounter",
+"interactionType": { "@type": "http://schema.org/WatchAction" },
+"userInteractionCount": 1120958
+}, "aggregateRating": {
+"@type": "AggregateRating",
+"ratingValue": "88",
+"ratingCount": "630",
+"bestRating": "100",
+"worstRating": "0"
+}, "actor": [{
+"@type": "Person",
+"name": "Kleio Valentien",
+"url": "https://www.eporner.com/pornstar/kleio-valentien/"
+}]}
+</script>''', None),
+ {
+ 'title': '1 On 1 With Kleio',
+ 'description': 'Kleio Valentien',
+ 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
+ 'timestamp': 1449347075,
+ 'duration': 743.0,
+ 'view_count': 1120958,
+ 'width': 1920,
+ 'height': 1080,
+ })
+
+
def test_download_json(self):
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
@@ -1237,8 +1237,16 @@ class InfoExtractor(object):
'ViewAction': 'view',
}
+ def extract_interaction_type(e):
+ interaction_type = e.get('interactionType')
+ if isinstance(interaction_type, dict):
+ interaction_type = interaction_type.get('@type')
+ return str_or_none(interaction_type)
+
def extract_interaction_statistic(e):
interaction_statistic = e.get('interactionStatistic')
+ if isinstance(interaction_statistic, dict):
+ interaction_statistic = [interaction_statistic]
if not isinstance(interaction_statistic, list):
return
for is_e in interaction_statistic:
@@ -1246,8 +1254,8 @@ class InfoExtractor(object):
continue
if is_e.get('@type') != 'InteractionCounter':
continue
- interaction_type = is_e.get('interactionType')
- if not isinstance(interaction_type, compat_str):
+ interaction_type = extract_interaction_type(is_e)
+ if not interaction_type:
continue
# For interaction count some sites provide string instead of
# an integer (as per spec) with non digit characters (e.g. ",")