commit: fda6d237a5b664cc8a9a45562d4113c51fd0280d
parent 5d9f6cbc5afa033b6f1cfd2abe4327e366da2ad1
Author: Sergey M․ <dstftw@gmail.com>
Date: Sun, 23 Feb 2020 06:47:11 +0700
[wistia] Add support for multiple generic embeds (closes #8347, closes #11385)
Diffstat:
2 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
@@ -2537,14 +2537,15 @@ class GenericIE(InfoExtractor):
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
# Look for embedded Wistia player
- wistia_url = WistiaIE._extract_url(webpage)
- if wistia_url:
- return {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(wistia_url),
- 'ie_key': WistiaIE.ie_key(),
- 'uploader': video_uploader,
- }
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if wistia_urls:
+ playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+ for entry in playlist['entries']:
+ entry.update({
+ '_type': 'url_transparent',
+ 'uploader': video_uploader,
+ })
+ return playlist
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py
@@ -45,22 +45,23 @@ class WistiaIE(InfoExtractor):
# https://wistia.com/support/embed-and-share/video-on-your-website
@staticmethod
def _extract_url(webpage):
- match = re.search(
- r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage)
- if match:
- return unescapeHTML(match.group('url'))
+ urls = WistiaIE._extract_urls(webpage)
+ return urls[0] if urls else None
- match = re.search(
- r'''(?sx)
- <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
- <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
- ''', webpage)
- if match:
- return 'wistia:%s' % match.group('id')
-
- match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage)
- if match:
- return 'wistia:%s' % match.group('id')
+ @staticmethod
+ def _extract_urls(webpage):
+ urls = []
+ for match in re.finditer(
+ r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
+ urls.append(unescapeHTML(match.group('url')))
+ for match in re.finditer(
+ r'''(?sx)
+ <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
+ ''', webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ return urls
def _real_extract(self, url):
video_id = self._match_id(url)