logo

searx

My custom branche(s) on searx, a meta-search engine
commit: e47258ce38fe4313a5a6881c54bd9fb14c8619c7
parent: 48839295d3939212a0fe4bf86297081a22dcb7ac
Author: asciimoo <asciimoo@gmail.com>
Date:   Thu, 17 Oct 2013 20:43:05 +0200

[fix] proper html escaping

Diffstat:

Msearx/engines/stackoverflow.py3++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py @@ -1,6 +1,7 @@ from urllib import quote from lxml import html from urlparse import urljoin +from cgi import escape base_url = 'http://stackoverflow.com/' search_url = base_url+'search?q=' @@ -20,6 +21,6 @@ def response(resp): link = result.xpath('.//div[@class="result-link"]//a')[0] url = urljoin(base_url, link.attrib.get('href')) title = ' '.join(link.xpath('.//text()')) - content = ' '.join(result.xpath('.//div[@class="excerpt"]//text()')) + content = escape(' '.join(result.xpath('.//div[@class="excerpt"]//text()'))) results.append({'url': url, 'title': title, 'content': content}) return results