commit: 0685d9727b9657fc8a31c96cb52c4155de29fcfc
parent e06632e3fe25036b804a62469bb18fa4c37e3368
Author: Sergey M․ <dstftw@gmail.com>
Date: Mon, 9 Jul 2018 23:43:05 +0700
[utils] Share JSON-LD regex
Diffstat:
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
@@ -52,6 +52,7 @@ from ..utils import (
GeoUtils,
int_or_none,
js_to_json,
+ JSON_LD_RE,
mimetype2ext,
orderedSet,
parse_codecs,
@@ -1149,8 +1150,7 @@ class InfoExtractor(object):
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
json_ld = self._search_regex(
- r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
- html, 'JSON-LD', group='json_ld', **kwargs)
+ JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
default = kwargs.get('default', NO_DEFAULT)
if not json_ld:
return default if default is not NO_DEFAULT else {}
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
@@ -184,6 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
def preferredencoding():