logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 96c8b20a045f62205a3b9a03113086f0fcfbc579
parent: b44643222f85764399a4eac72541783eb8c2868f
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Tue, 24 Jun 2014 16:30:04 +0200

[enh] https rewrite basics

Diffstat:

Asearx/https_rewrite.py14++++++++++++++
Msearx/settings.yml3++-
Msearx/webapp.py20++++++++++++++++++--
3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/searx/https_rewrite.py b/searx/https_rewrite.py @@ -0,0 +1,14 @@ +import re + +# https://gitweb.torproject.org/\ +# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules + +# HTTPS rewrite rules +https_rules = ( + # from + (re.compile(r'^http://(www\.|m\.|)?xkcd\.(?:com|org)/', re.I | re.U), + # to + r'https://\1xkcd.com/'), + (re.compile(r'^https?://(?:ssl)?imgs\.xkcd\.com/', re.I | re.U), + r'https://sslimgs.xkcd.com/'), +) diff --git a/searx/settings.yml b/searx/settings.yml @@ -1,11 +1,12 @@ server: port : 8888 secret_key : "ultrasecretkey" # change this! - debug : True + debug : False request_timeout : 2.0 # seconds base_url : False themes_path : "" default_theme : default + https_rewrite : True engines: - name : wikipedia diff --git a/searx/webapp.py b/searx/webapp.py @@ -41,13 +41,16 @@ from searx.engines import ( from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_themes ) +from searx.https_rewrite import https_rules from searx.languages import language_codes from searx.search import Search from searx.autocomplete import backends as autocomplete_backends -static_path, templates_path, themes = get_themes(settings['themes_path'] if \ - settings.get('themes_path', None) else searx_dir) +static_path, templates_path, themes =\ + get_themes(settings['themes_path'] + if settings.get('themes_path') + else searx_dir) default_theme = settings['default_theme'] if \ settings.get('default_theme', None) else 'default' @@ -192,8 +195,20 @@ def index(): search.lang) for result in search.results: + if not search.paging and engines[result['engine']].paging: search.paging = True + + if settings['server']['https_rewrite']\ + and result['parsed_url'].scheme == 'http': + + for http_regex, https_url in https_rules: + if http_regex.match(result['url']): + result['url'] = http_regex.sub(https_url, result['url']) + # TODO result['parsed_url'].scheme + break + + # HTTPS rewrite if search.request_data.get('format', 'html') == 'html': if 'content' in result: result['content'] = highlight_content(result['content'], @@ -206,6 +221,7 @@ def index(): # removing html content and whitespace duplications result['title'] = ' '.join(html_to_text(result['title']) .strip().split()) + if len(result['url']) > 74: url_parts = result['url'][:35], result['url'][-35:] result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)