commit: 96c8b20a045f62205a3b9a03113086f0fcfbc579
parent: b44643222f85764399a4eac72541783eb8c2868f
Author: Adam Tauber <asciimoo@gmail.com>
Date: Tue, 24 Jun 2014 16:30:04 +0200
[enh] https rewrite basics
Diffstat:
3 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/searx/https_rewrite.py b/searx/https_rewrite.py
@@ -0,0 +1,14 @@
+import re
+
+# https://gitweb.torproject.org/\
+# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules
+
+# HTTPS rewrite rules
+https_rules = (
+ # from
+ (re.compile(r'^http://(www\.|m\.|)?xkcd\.(?:com|org)/', re.I | re.U),
+ # to
+ r'https://\1xkcd.com/'),
+ (re.compile(r'^https?://(?:ssl)?imgs\.xkcd\.com/', re.I | re.U),
+ r'https://sslimgs.xkcd.com/'),
+)
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -1,11 +1,12 @@
server:
port : 8888
secret_key : "ultrasecretkey" # change this!
- debug : True
+ debug : False
request_timeout : 2.0 # seconds
base_url : False
themes_path : ""
default_theme : default
+ https_rewrite : True
engines:
- name : wikipedia
diff --git a/searx/webapp.py b/searx/webapp.py
@@ -41,13 +41,16 @@ from searx.engines import (
from searx.utils import (
UnicodeWriter, highlight_content, html_to_text, get_themes
)
+from searx.https_rewrite import https_rules
from searx.languages import language_codes
from searx.search import Search
from searx.autocomplete import backends as autocomplete_backends
-static_path, templates_path, themes = get_themes(settings['themes_path'] if \
- settings.get('themes_path', None) else searx_dir)
+static_path, templates_path, themes =\
+ get_themes(settings['themes_path']
+ if settings.get('themes_path')
+ else searx_dir)
default_theme = settings['default_theme'] if \
settings.get('default_theme', None) else 'default'
@@ -192,8 +195,20 @@ def index():
search.lang)
for result in search.results:
+
if not search.paging and engines[result['engine']].paging:
search.paging = True
+
+ if settings['server']['https_rewrite']\
+ and result['parsed_url'].scheme == 'http':
+
+ for http_regex, https_url in https_rules:
+ if http_regex.match(result['url']):
+ result['url'] = http_regex.sub(https_url, result['url'])
+ # TODO result['parsed_url'].scheme
+ break
+
+ # HTTPS rewrite
if search.request_data.get('format', 'html') == 'html':
if 'content' in result:
result['content'] = highlight_content(result['content'],
@@ -206,6 +221,7 @@ def index():
# removing html content and whitespace duplications
result['title'] = ' '.join(html_to_text(result['title'])
.strip().split())
+
if len(result['url']) > 74:
url_parts = result['url'][:35], result['url'][-35:]
result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)