commit: 52a57ee045e02844a8f650a9d3ae30e0092d86cd
parent a3d444ab85dbb85dc3200c686ec3323dbb7008cb
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Fri, 30 Jan 2015 21:00:49 +0100
Replace every bunch of whitespaces with only one space in HTML text
Diffstat:
1 file changed, 2 insertions(+), 0 deletions(-)
diff --git a/searx/utils.py b/searx/utils.py
@@ -119,6 +119,8 @@ class HTMLTextExtractor(HTMLParser):
def html_to_text(html):
+ html = html.replace('\n', ' ')
+ html = ' '.join(html.split())
s = HTMLTextExtractor()
s.feed(html)
return s.get_text()