commit: 23b9095cbf2d31a1495ee3d63a55bd81548cd367
parent 5bffa9ca33f8ec98baebca13ee9b16262bfe4e8d
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date: Mon, 24 Aug 2015 11:28:55 +0200
[fix] improve result handling of startpage engine
Diffstat:
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
@@ -66,7 +66,11 @@ def response(resp):
url = link.attrib.get('href')
# block google-ad url's
- if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
+ if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
+ continue
+
+ # block startpage search url's
+ if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
continue
title = escape(extract_text(link))