logo

searx

My custom branche(s) on searx, a meta-search engine
commit: c43476229b58f20dba62c0f623ae2dad96bc8525
parent: 3a8eafcc6b19b4b47b10534fbc683e4e3fbc064d
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Mon, 24 Aug 2015 13:23:13 +0200

Merge pull request #405 from pointhi/bug_fixes

some Bug fixes

Diffstat:

Msearx/__init__.py2+-
Msearx/engines/startpage.py10+++++++++-
Msearx/search.py4++++
3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/searx/__init__.py b/searx/__init__.py @@ -40,7 +40,7 @@ else: with open(settings_path) as settings_yaml: settings = load(settings_yaml) -if settings.get('server', {}).get('debug'): +if settings.get('general', {}).get('debug'): logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py @@ -66,7 +66,15 @@ def response(resp): url = link.attrib.get('href') # block google-ad url's - if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): + if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url): + continue + + # block startpage search url's + if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): + continue + + # block ixquick search url's + if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): continue title = escape(extract_text(link)) diff --git a/searx/search.py b/searx/search.py @@ -206,6 +206,10 @@ def score_results(results): # if there is no duplicate found, append result else: res['score'] = score + # if the result has no scheme, use http as default + if res['parsed_url'].scheme == '': + res['parsed_url'] = res['parsed_url']._replace(scheme="http") + results.append(res) results = sorted(results, key=itemgetter('score'), reverse=True)