logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 285f991cd078db7df5622e3109984f1f3c651988
parent: a865e6672fa2a289937ff378e8893c19f71c5e0c
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sun, 18 Jan 2015 10:39:13 +0100

Merge pull request #182 from dalf/enh-proxy

[enh] image-proxy : handle ETag and date related headers, add hash to URL

Diffstat:

Msearx/templates/courgette/result_templates/code.html5++---
Msearx/templates/courgette/result_templates/videos.html5++---
Msearx/templates/default/infobox.html2+-
Msearx/templates/default/result_templates/code.html2+-
Msearx/templates/default/result_templates/default.html2+-
Msearx/templates/default/result_templates/map.html2+-
Msearx/templates/default/result_templates/videos.html2+-
Msearx/templates/oscar/infobox.html2+-
Msearx/templates/oscar/result_templates/videos.html2+-
Msearx/utils.py8++++++++
Msearx/webapp.py24++++++++++++++++++++----
11 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/searx/templates/courgette/result_templates/code.html b/searx/templates/courgette/result_templates/code.html @@ -1,9 +1,9 @@ <div class="result {{ result.class }}"> <h3 class="result_title">{% if result['favicon'] %}<img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}" />{% endif %}<a href="{{ result.url }}">{{ result.title|safe }}</a></h3> {% if result.publishedDate %}<span class="published_date">{{ result.publishedDate }}</span>{% endif %} - <p class="content">{% if result.img_src %}<img src="{{ result.img_src }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> + <p class="content">{% if result.img_src %}<img src="{{ image_proxify(result.img_src) }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> {% if result.repository %}<p class="content"><a href="{{ result.repository|safe }}">{{ result.repository }}</a></p>{% endif %} {{ result.codelines|code_highlighter(result.code_language)|safe }} <p class="url">{{ result.pretty_url }}</p> -</div>- \ No newline at end of file +</div> diff --git a/searx/templates/courgette/result_templates/videos.html b/searx/templates/courgette/result_templates/videos.html @@ -5,6 +5,6 @@ <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> {% if result.publishedDate %}<span class="published_date">{{ result.publishedDate }}</span><br />{% endif %} - <a href="{{ result.url }}"><img width="400" src="{{ result.thumbnail }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> + <a href="{{ result.url }}"><img width="400" src="{{ image_proxify(result.thumbnail) }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> <p class="url">{{ result.pretty_url }}</p> -</div>- \ No newline at end of file +</div> diff --git a/searx/templates/default/infobox.html b/searx/templates/default/infobox.html @@ -1,6 +1,6 @@ <div class="infobox"> <h2>{{ infobox.infobox }}</h2> - {% if infobox.img_src %}<img src="{{ infobox.img_src }}" title="{{ infobox.infobox|striptags }}" alt="{{ infobox.infobox|striptags }}" />{% endif %} + {% if infobox.img_src %}<img src="{{ image_proxify(infobox.img_src) }}" title="{{ infobox.infobox|striptags }}" alt="{{ infobox.infobox|striptags }}" />{% endif %} <p>{{ infobox.entity }}</p> <p>{{ infobox.content | safe }}</p> {% if infobox.attributes %} diff --git a/searx/templates/default/result_templates/code.html b/searx/templates/default/result_templates/code.html @@ -2,7 +2,7 @@ <h3 class="result_title"> {% if result['favicon'] %}<img width="14" height="14" class="favicon" src="static/{{theme}}/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}" />{% endif %}<a href="{{ result.url }}">{{ result.title|safe }}</a></h3> <p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a></p> {% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %} - <p class="content">{% if result.img_src %}<img src="{{ result.img_src }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> + <p class="content">{% if result.img_src %}<img src="{{ image_proxify(result.img_src) }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> {% if result.repository %}<p class="result-content"><a href="{{ result.repository|safe }}">{{ result.repository }}</a></p>{% endif %} {{ result.codelines|code_highlighter(result.code_language)|safe }} diff --git a/searx/templates/default/result_templates/default.html b/searx/templates/default/result_templates/default.html @@ -2,5 +2,5 @@ <h3 class="result_title">{% if "icon_"~result.engine~".ico" in favicons %}<img width="14" height="14" class="favicon" src="{{ url_for('static', filename='img/icons/icon_'+result.engine+'.ico') }}" alt="{{result.engine}}" />{% endif %}<a href="{{ result.url }}">{{ result.title|safe }}</a></h3> <p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a> {% if result.publishedDate %}<span class="published_date">{{ result.publishedDate }}</span>{% endif %}</p> - <p class="content">{% if result.img_src %}<img src="{{ result.img_src }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> + <p class="content">{% if result.img_src %}<img src="{{ image_proxify(result.img_src) }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> </div> diff --git a/searx/templates/default/result_templates/map.html b/searx/templates/default/result_templates/map.html @@ -8,6 +8,6 @@ <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> <p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a> {% if result.publishedDate %}<span class="published_date">{{ result.publishedDate }}</span>{% endif %}</p> - <p class="content">{% if result.img_src %}<img src="{{ result.img_src }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> + <p class="content">{% if result.img_src %}<img src="{{ image_proxify(result.img_src) }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> </div> </div> diff --git a/searx/templates/default/result_templates/videos.html b/searx/templates/default/result_templates/videos.html @@ -1,6 +1,6 @@ <div class="result"> <h3 class="result_title">{% if "icon_"~result.engine~".ico" in favicons %}<img width="14" height="14" class="favicon" src="{{ url_for('static', filename='img/icons/icon_'+result.engine+'.ico') }}" alt="{{result.engine}}" />{% endif %}<a href="{{ result.url }}">{{ result.title|safe }}</a></h3> {% if result.publishedDate %}<span class="published_date">{{ result.publishedDate }}</span><br />{% endif %} - <a href="{{ result.url }}"><img class="thumbnail" src="{{ result.thumbnail }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> + <a href="{{ result.url }}"><img class="thumbnail" src="{{ image_proxify(result.thumbnail) }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> <p class="url">{{ result.url }}</p> </div> diff --git a/searx/templates/oscar/infobox.html b/searx/templates/oscar/infobox.html @@ -3,7 +3,7 @@ <h4 class="panel-title">{{ infobox.infobox }}</h4> </div> <div class="panel-body"> - {% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ infobox.img_src }}" alt="{{ infobox.infobox }}" />{% endif %} + {% if infobox.img_src %}<img class="img-responsive center-block infobox_part" src="{{ image_proxify(infobox.img_src) }}" alt="{{ infobox.infobox }}" />{% endif %} {% if infobox.content %}<p class="infobox_part">{{ infobox.content }}</p>{% endif %} {% if infobox.attributes %} diff --git a/searx/templates/oscar/result_templates/videos.html b/searx/templates/oscar/result_templates/videos.html @@ -15,7 +15,7 @@ <div class="container-fluid"> <div class="row"> - <a href="{{ result.url }}"><img class="thumbnail col-xs-6 col-sm-4 col-md-4 result-content" src="{{ result.thumbnail|safe }}" alt="{{ result.title|striptags }} {{ result.engine }}" /></a> + <a href="{{ result.url }}"><img class="thumbnail col-xs-6 col-sm-4 col-md-4 result-content" src="{{ image_proxify(result.thumbnail) }}" alt="{{ result.title|striptags }} {{ result.engine }}" /></a> {% if result.content %}<p class="col-xs-12 col-sm-8 col-md-8 result-content">{{ result.content|safe }}</p>{% endif %} </div> </div> diff --git a/searx/utils.py b/searx/utils.py @@ -206,3 +206,11 @@ def format_date_by_locale(date_string, locale_string): except: logger.warning('cannot set original locale: {0}'.format(orig_locale)) return formatted_date + + +def dict_subset(d, properties): + result = {} + for k in properties: + if k in d: + result[k] = d[k] + return result diff --git a/searx/webapp.py b/searx/webapp.py @@ -25,6 +25,7 @@ if __name__ == '__main__': import json import cStringIO import os +import hashlib from datetime import datetime, timedelta from requests import get as http_get @@ -41,7 +42,7 @@ from searx.engines import ( ) from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_themes, - get_static_files, get_result_templates, gen_useragent + get_static_files, get_result_templates, gen_useragent, dict_subset ) from searx.version import VERSION_STRING from searx.languages import language_codes @@ -216,8 +217,10 @@ def image_proxify(url): if not settings['server'].get('image_proxy') and not request.cookies.get('image_proxy'): return url + h = hashlib.sha256(url + settings['server']['secret_key']).hexdigest() + return '{0}?{1}'.format(url_for('image_proxy'), - urlencode(dict(url=url))) + urlencode(dict(url=url, h=h))) def render(template_name, override_theme=None, **kwargs): @@ -562,10 +565,21 @@ def image_proxy(): if not url: return '', 400 + h = hashlib.sha256(url + settings['server']['secret_key']).hexdigest() + + if h != request.args.get('h'): + return '', 400 + + headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) + headers['User-Agent'] = gen_useragent() + resp = http_get(url, stream=True, timeout=settings['server'].get('request_timeout', 2), - headers={'User-Agent': gen_useragent()}) + headers=headers) + + if resp.status_code == 304: + return '', resp.status_code if resp.status_code != 200: logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) @@ -586,7 +600,9 @@ def image_proxy(): return '', 502 # Bad gateway - file is too big (>5M) img += chunk - return Response(img, mimetype=resp.headers['content-type']) + headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) + + return Response(img, mimetype=resp.headers['content-type'], headers=headers) @app.route('/stats', methods=['GET'])