commit: 94c1255782414cdcda565717e31c91734ec00990
parent 476e1095fac398b648f953a826ed3b191ce8d5d7
Author: Sergey M․ <dstftw@gmail.com>
Date: Tue, 14 Apr 2015 17:50:53 +0600
[brightcove] Handle non well-formed XMLs (#5421)
Diffstat:
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
@@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
object_str = fix_xml_ampersands(object_str)
- object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+ try:
+ object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+ except xml.etree.ElementTree.ParseError:
+ return
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
if fv_el is not None:
@@ -185,7 +188,7 @@ class BrightcoveIE(InfoExtractor):
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?>\s*</object>''',
webpage)
- return [cls._build_brighcove_url(m) for m in matches]
+ return filter(None, [cls._build_brighcove_url(m) for m in matches])
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})