logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 15eef0ebdb15af80c026302bef250dc7f4417951
parent 7fdfeca3a43e0e2bd8ef2dcb27cca7745edf596a
Author: Alexandre Flament <alex@al-f.net>
Date:   Fri, 20 Jan 2017 18:52:47 +0100

[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.

Diffstat:

Asearx/exceptions.py32++++++++++++++++++++++++++++++++
Msearx/search.py66+++++++++++++++++++++++++++++++++++++++++++-----------------------
Msearx/templates/__common__/opensearch_response_rss.xml6++++++
Msearx/webapp.py67++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
4 files changed, 133 insertions(+), 38 deletions(-)

diff --git a/searx/exceptions.py b/searx/exceptions.py @@ -0,0 +1,32 @@ +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2017- by Alexandre Flament, <alex@al-f.net> +''' + + +class SearxException(Exception): + pass + + +class SearxParameterException(SearxException): + + def __init__(self, name, value): + if value == '' or value is None: + message = 'Empty ' + name + ' parameter' + else: + message = 'Invalid value "' + value + '" for parameter ' + name + super(SearxParameterException, self).__init__(message) + self.parameter_name = name + self.parameter_value = value diff --git a/searx/search.py b/searx/search.py @@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery from searx.results import ResultContainer from searx import logger from searx.plugins import plugins +from searx.languages import language_codes +from searx.exceptions import SearxParameterException logger = logger.getChild('search') number_of_searches = 0 +language_code_set = set(l[0].lower() for l in language_codes) +language_code_set.add('all') + def send_http_request(engine, request_params, start_time, timeout_limit): # for page_load_time stats @@ -182,33 +187,13 @@ def default_request_params(): def get_search_query_from_webapp(preferences, form): - query = None - query_engines = [] - query_categories = [] - query_pageno = 1 - query_lang = 'all' - query_time_range = None + # no text for the query ? + if not form.get('q'): + raise SearxParameterException('q', '') # set blocked engines disabled_engines = preferences.engines.get_disabled() - # set specific language if set - query_lang = preferences.get_value('language') - - # safesearch - query_safesearch = preferences.get_value('safesearch') - - # TODO better exceptions - if not form.get('q'): - raise Exception('noquery') - - # set pagenumber - pageno_param = form.get('pageno', '1') - if not pageno_param.isdigit() or int(pageno_param) < 1: - pageno_param = 1 - - query_pageno = int(pageno_param) - # parse query, if tags are set, which change # the serch engine or search-language raw_text_query = RawTextQuery(form['q'], disabled_engines) @@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form): # set query query = raw_text_query.getSearchQuery() + # get and check page number + pageno_param = form.get('pageno', '1') + if not pageno_param.isdigit() or int(pageno_param) < 1: + raise SearxParameterException('pageno', pageno_param) + query_pageno = int(pageno_param) + + # get language # set specific language if set on request, query or preferences # TODO support search with multible languages if len(raw_text_query.languages): @@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form): else: query_lang = preferences.get_value('language') + # check language + if query_lang not in language_code_set: + raise SearxParameterException('language', query_lang) + + # get safesearch + if 'safesearch' in form: + query_safesearch = form.get('safesearch') + # first check safesearch + if not query_safesearch.isdigit(): + raise SearxParameterException('safesearch', query_safesearch) + query_safesearch = int(query_safesearch) + else: + query_safesearch = preferences.get_value('safesearch') + + # safesearch : second check + if query_safesearch < 0 or query_safesearch > 2: + raise SearxParameterException('safesearch', query_safesearch) + + # get time_range query_time_range = form.get('time_range') + # check time_range + if not(query_time_range is None)\ + and not (query_time_range in ['', 'day', 'week', 'month', 'year']): + raise SearxParameterException('time_range', query_time_range) + + # query_engines query_engines = raw_text_query.engines + # query_categories + query_categories = [] + # if engines are calculated from query, # set categories by using that informations if query_engines and raw_text_query.specific: diff --git a/searx/templates/__common__/opensearch_response_rss.xml b/searx/templates/__common__/opensearch_response_rss.xml @@ -11,6 +11,12 @@ <opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage> <atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/> <opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" /> + {% if error_message %} + <item> + <title>Error</title> + <description>{{ error_message|e }}</description> + </item> + {% endif %} {% for r in results %} <item> <title>{{ r.title }}</title> diff --git a/searx/webapp.py b/searx/webapp.py @@ -52,6 +52,7 @@ from flask import ( from flask_babel import Babel, gettext, format_date, format_decimal from flask.json import jsonify from searx import settings, searx_dir, searx_debug +from searx.exceptions import SearxException, SearxParameterException from searx.engines import ( categories, engines, engine_shortcuts, get_engines_stats, initialize_engines ) @@ -400,6 +401,33 @@ def pre_request(): request.user_plugins.append(plugin) +def index_error(output_format, error_message): + if output_format == 'json': + return Response(json.dumps({'error': error_message}), + mimetype='application/json') + elif output_format == 'csv': + response = Response('', mimetype='application/csv') + cont_disp = 'attachment;Filename=searx.csv' + response.headers.add('Content-Disposition', cont_disp) + return response + elif output_format == 'rss': + response_rss = render( + 'opensearch_response_rss.xml', + results=[], + q=request.form['q'] if 'q' in request.form else '', + number_of_results=0, + base_url=get_base_url(), + error_message=error_message + ) + return Response(response_rss, mimetype='text/xml') + else: + # html + request.errors.append(gettext('search error')) + return render( + 'index.html', + ) + + @app.route('/search', methods=['GET', 'POST']) @app.route('/', methods=['GET', 'POST']) def index(): @@ -408,10 +436,19 @@ def index(): Supported outputs: html, json, csv, rss. """ + # output_format + output_format = request.form.get('format', 'html') + if output_format not in ['html', 'csv', 'json', 'rss']: + output_format = 'html' + + # check if there is query if request.form.get('q') is None: - return render( - 'index.html', - ) + if output_format == 'html': + return render( + 'index.html', + ) + else: + return index_error(output_format, 'No query'), 400 # search search_query = None @@ -421,20 +458,24 @@ def index(): # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request) result_container = search.search() - except: - request.errors.append(gettext('search error')) + except Exception as e: + # log exception logger.exception('search error') - return render( - 'index.html', - ) + # is it an invalid input parameter or something else ? + if (issubclass(e.__class__, SearxParameterException)): + return index_error(output_format, e.message), 400 + else: + return index_error(output_format, gettext('search error')), 500 + + # results results = result_container.get_ordered_results() + number_of_results = result_container.results_number() + if number_of_results < result_container.results_length(): + number_of_results = 0 # UI advanced_search = request.form.get('advanced_search', None) - output_format = request.form.get('format', 'html') - if output_format not in ['html', 'csv', 'json', 'rss']: - output_format = 'html' # output for result in results: @@ -470,10 +511,6 @@ def index(): else: result['publishedDate'] = format_date(result['publishedDate']) - number_of_results = result_container.results_number() - if number_of_results < result_container.results_length(): - number_of_results = 0 - if output_format == 'json': return Response(json.dumps({'query': search_query.query, 'number_of_results': number_of_results,