commit: 3b024e17afcfe12f4ea55e9a200b9cbd61ec3f99
parent ec71c13ab891566abff9010710afb915e8f22523
Author: Philipp Hagemeister <phihag@phihag.de>
Date: Fri, 1 Feb 2013 17:29:50 +0100
Work around buggy HTML Parser in Python < 2.7.3 (Closes #662)
Diffstat:
1 file changed, 6 insertions(+), 0 deletions(-)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
@@ -280,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
lines[-1] = lines[-1][:self.result[2][1]]
return '\n'.join(lines).strip()
+# Hack for https://github.com/rg3/youtube-dl/issues/662
+if sys.version_info < (2, 7, 3):
+ AttrParser.parse_endtag = (lambda self, i:
+ i + len("</scr'+'ipt>")
+ if self.rawdata[i:].startswith("</scr'+'ipt>")
+ else compat_html_parser.HTMLParser.parse_endtag(self, i))
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""