logo

searx

Unnamed repository; edit this file 'description' to name the repository.
commit: 37c3ace3096d9568f8dbdc3728659f4c77377b33
parent: e3df22b1401742ae0ade324ce4403f2b2b45dfe1
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Fri, 11 Sep 2015 18:33:06 +0200

[fix] add missing scheme to duplicated results too ++ revert gigablasts handling

Diffstat:

searx/engines/gigablast.py | 2--
searx/search.py | 7++++---
2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py @@ -53,8 +53,6 @@ def response(resp): # parse results for result in dom.xpath(results_xpath): url = result.xpath(url_xpath)[0].text - if not url.startswith('http://') and not url.startswith('https://'): - url = 'http://' + url title = result.xpath(title_xpath)[0].text content = escape(result.xpath(content_xpath)[0].text) diff --git a/searx/search.py b/searx/search.py @@ -143,6 +143,10 @@ def score_results(results): res['parsed_url'] = urlparse(res['url']) + # if the result has no scheme, use http as default + if not res['parsed_url'].scheme: + res['parsed_url'] = res['parsed_url']._replace(scheme="http") + res['host'] = res['parsed_url'].netloc if res['host'].startswith('www.'): @@ -206,9 +210,6 @@ def score_results(results): # if there is no duplicate found, append result else: res['score'] = score - # if the result has no scheme, use http as default - if res['parsed_url'].scheme == '': - res['parsed_url'] = res['parsed_url']._replace(scheme="http") results.append(res)