commit: fe05191b8c59538a48b6cbc95f4fe54fc7e6a0ac
parent 0204838163bd4068fe23b40414573d1307d817ab
Author: Ben Rog-Wilhelm <zorba-github@pavlovian.net>
Date: Tue, 4 May 2021 14:14:35 -0500
[kaltura] Improve iframe extraction (#28969)
Co-authored-by: Sergey M. <dstftw@gmail.com>
Diffstat:
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py
@@ -102,6 +102,21 @@ class GDCVaultIE(InfoExtractor):
'format': 'mp4-408',
},
},
+ {
+ # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+ 'url': 'https://www.gdcvault.com/play/1025699',
+ 'info_dict': {
+ 'id': '0_zagynv0a',
+ 'ext': 'mp4',
+ 'title': 'Tech Toolbox',
+ 'upload_date': '20190408',
+ 'uploader_id': 'joe@blazestreaming.com',
+ 'timestamp': 1554764629,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
]
def _login(self, webpage_url, display_id):
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
@@ -145,7 +145,7 @@ class KalturaIE(InfoExtractor):
''', webpage))
or list(re.finditer(
r'''(?xs)
- <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+ <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)*
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)