commit: 15eef0ebdb15af80c026302bef250dc7f4417951
parent 7fdfeca3a43e0e2bd8ef2dcb27cca7745edf596a
Author: Alexandre Flament <alex@al-f.net>
Date: Fri, 20 Jan 2017 18:52:47 +0100
[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.
Diffstat:
4 files changed, 133 insertions(+), 38 deletions(-)
diff --git a/searx/exceptions.py b/searx/exceptions.py
@@ -0,0 +1,32 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2017- by Alexandre Flament, <alex@al-f.net>
+'''
+
+
+class SearxException(Exception):
+ pass
+
+
+class SearxParameterException(SearxException):
+
+ def __init__(self, name, value):
+ if value == '' or value is None:
+ message = 'Empty ' + name + ' parameter'
+ else:
+ message = 'Invalid value "' + value + '" for parameter ' + name
+ super(SearxParameterException, self).__init__(message)
+ self.parameter_name = name
+ self.parameter_value = value
diff --git a/searx/search.py b/searx/search.py
@@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
+from searx.languages import language_codes
+from searx.exceptions import SearxParameterException
logger = logger.getChild('search')
number_of_searches = 0
+language_code_set = set(l[0].lower() for l in language_codes)
+language_code_set.add('all')
+
def send_http_request(engine, request_params, start_time, timeout_limit):
# for page_load_time stats
@@ -182,33 +187,13 @@ def default_request_params():
def get_search_query_from_webapp(preferences, form):
- query = None
- query_engines = []
- query_categories = []
- query_pageno = 1
- query_lang = 'all'
- query_time_range = None
+ # no text for the query ?
+ if not form.get('q'):
+ raise SearxParameterException('q', '')
# set blocked engines
disabled_engines = preferences.engines.get_disabled()
- # set specific language if set
- query_lang = preferences.get_value('language')
-
- # safesearch
- query_safesearch = preferences.get_value('safesearch')
-
- # TODO better exceptions
- if not form.get('q'):
- raise Exception('noquery')
-
- # set pagenumber
- pageno_param = form.get('pageno', '1')
- if not pageno_param.isdigit() or int(pageno_param) < 1:
- pageno_param = 1
-
- query_pageno = int(pageno_param)
-
# parse query, if tags are set, which change
# the serch engine or search-language
raw_text_query = RawTextQuery(form['q'], disabled_engines)
@@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form):
# set query
query = raw_text_query.getSearchQuery()
+ # get and check page number
+ pageno_param = form.get('pageno', '1')
+ if not pageno_param.isdigit() or int(pageno_param) < 1:
+ raise SearxParameterException('pageno', pageno_param)
+ query_pageno = int(pageno_param)
+
+ # get language
# set specific language if set on request, query or preferences
# TODO support search with multible languages
if len(raw_text_query.languages):
@@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form):
else:
query_lang = preferences.get_value('language')
+ # check language
+ if query_lang not in language_code_set:
+ raise SearxParameterException('language', query_lang)
+
+ # get safesearch
+ if 'safesearch' in form:
+ query_safesearch = form.get('safesearch')
+ # first check safesearch
+ if not query_safesearch.isdigit():
+ raise SearxParameterException('safesearch', query_safesearch)
+ query_safesearch = int(query_safesearch)
+ else:
+ query_safesearch = preferences.get_value('safesearch')
+
+ # safesearch : second check
+ if query_safesearch < 0 or query_safesearch > 2:
+ raise SearxParameterException('safesearch', query_safesearch)
+
+ # get time_range
query_time_range = form.get('time_range')
+ # check time_range
+ if not(query_time_range is None)\
+ and not (query_time_range in ['', 'day', 'week', 'month', 'year']):
+ raise SearxParameterException('time_range', query_time_range)
+
+ # query_engines
query_engines = raw_text_query.engines
+ # query_categories
+ query_categories = []
+
# if engines are calculated from query,
# set categories by using that informations
if query_engines and raw_text_query.specific:
diff --git a/searx/templates/__common__/opensearch_response_rss.xml b/searx/templates/__common__/opensearch_response_rss.xml
@@ -11,6 +11,12 @@
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
+ {% if error_message %}
+ <item>
+ <title>Error</title>
+ <description>{{ error_message|e }}</description>
+ </item>
+ {% endif %}
{% for r in results %}
<item>
<title>{{ r.title }}</title>
diff --git a/searx/webapp.py b/searx/webapp.py
@@ -52,6 +52,7 @@ from flask import (
from flask_babel import Babel, gettext, format_date, format_decimal
from flask.json import jsonify
from searx import settings, searx_dir, searx_debug
+from searx.exceptions import SearxException, SearxParameterException
from searx.engines import (
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
)
@@ -400,6 +401,33 @@ def pre_request():
request.user_plugins.append(plugin)
+def index_error(output_format, error_message):
+ if output_format == 'json':
+ return Response(json.dumps({'error': error_message}),
+ mimetype='application/json')
+ elif output_format == 'csv':
+ response = Response('', mimetype='application/csv')
+ cont_disp = 'attachment;Filename=searx.csv'
+ response.headers.add('Content-Disposition', cont_disp)
+ return response
+ elif output_format == 'rss':
+ response_rss = render(
+ 'opensearch_response_rss.xml',
+ results=[],
+ q=request.form['q'] if 'q' in request.form else '',
+ number_of_results=0,
+ base_url=get_base_url(),
+ error_message=error_message
+ )
+ return Response(response_rss, mimetype='text/xml')
+ else:
+ # html
+ request.errors.append(gettext('search error'))
+ return render(
+ 'index.html',
+ )
+
+
@app.route('/search', methods=['GET', 'POST'])
@app.route('/', methods=['GET', 'POST'])
def index():
@@ -408,10 +436,19 @@ def index():
Supported outputs: html, json, csv, rss.
"""
+ # output_format
+ output_format = request.form.get('format', 'html')
+ if output_format not in ['html', 'csv', 'json', 'rss']:
+ output_format = 'html'
+
+ # check if there is query
if request.form.get('q') is None:
- return render(
- 'index.html',
- )
+ if output_format == 'html':
+ return render(
+ 'index.html',
+ )
+ else:
+ return index_error(output_format, 'No query'), 400
# search
search_query = None
@@ -421,20 +458,24 @@ def index():
# search = Search(search_query) # without plugins
search = SearchWithPlugins(search_query, request)
result_container = search.search()
- except:
- request.errors.append(gettext('search error'))
+ except Exception as e:
+ # log exception
logger.exception('search error')
- return render(
- 'index.html',
- )
+ # is it an invalid input parameter or something else ?
+ if (issubclass(e.__class__, SearxParameterException)):
+ return index_error(output_format, e.message), 400
+ else:
+ return index_error(output_format, gettext('search error')), 500
+
+ # results
results = result_container.get_ordered_results()
+ number_of_results = result_container.results_number()
+ if number_of_results < result_container.results_length():
+ number_of_results = 0
# UI
advanced_search = request.form.get('advanced_search', None)
- output_format = request.form.get('format', 'html')
- if output_format not in ['html', 'csv', 'json', 'rss']:
- output_format = 'html'
# output
for result in results:
@@ -470,10 +511,6 @@ def index():
else:
result['publishedDate'] = format_date(result['publishedDate'])
- number_of_results = result_container.results_number()
- if number_of_results < result_container.results_length():
- number_of_results = 0
-
if output_format == 'json':
return Response(json.dumps({'query': search_query.query,
'number_of_results': number_of_results,