commit: 369ec017b76f65c8202581261165c9423631e1f0
parent e8cdaf117057e7978c19e9541ffefd13a715d5d3
Author: Alexandre Flament <alex@al-f.net>
Date: Wed, 4 Jan 2017 14:01:29 +0100
[enh] standalone_seax.py is a command line interface to searx with JSON output.
Diffstat:
2 files changed, 101 insertions(+), 0 deletions(-)
diff --git a/utils/standalone_search.py b/utils/google_search.py
diff --git a/utils/standalone_searx.py b/utils/standalone_searx.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2016- by Alexandre Flament, <alex@al-f.net>
+'''
+
+# set path
+from sys import path
+from os.path import realpath, dirname
+path.append(realpath(dirname(realpath(__file__)) + '/../'))
+
+# initialization
+from json import dumps
+from searx import settings
+import searx.query
+import searx.search
+import searx.engines
+import searx.preferences
+import argparse
+
+searx.engines.initialize_engines(settings['engines'])
+
+# command line parsing
+parser = argparse.ArgumentParser(description='Standalone searx.')
+parser.add_argument('query', type=str,
+ help='Text query')
+parser.add_argument('--category', type=str, nargs='?',
+ choices=searx.engines.categories.keys(),
+ default='general',
+ help='Search category')
+parser.add_argument('--lang', type=str, nargs='?',default='all',
+ help='Search language')
+parser.add_argument('--pageno', type=int, nargs='?', default=1,
+ help='Page number starting from 1')
+parser.add_argument('--safesearch', type=str, nargs='?', choices=['0', '1', '2'], default='0',
+ help='Safe content filter from none to strict')
+parser.add_argument('--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'],
+ help='Filter by time range')
+args = parser.parse_args()
+
+# search results for the query
+form = {
+ "q":args.query,
+ "categories":args.category.decode('utf-8'),
+ "pageno":str(args.pageno),
+ "language":args.lang,
+ "time_range":args.timerange
+}
+preferences = searx.preferences.Preferences(['oscar'], searx.engines.categories.keys(), searx.engines.engines, [])
+preferences.key_value_settings['safesearch'].parse(args.safesearch)
+
+search_query = searx.search.get_search_query_from_webapp(preferences, form)
+search = searx.search.Search(search_query)
+result_container = search.search()
+
+# output
+from datetime import datetime
+
+def no_parsed_url(results):
+ for result in results:
+ del result['parsed_url']
+ return results
+
+def json_serial(obj):
+ """JSON serializer for objects not serializable by default json code"""
+ if isinstance(obj, datetime):
+ serial = obj.isoformat()
+ return serial
+ raise TypeError ("Type not serializable")
+
+result_container_json = {
+ "search": {
+ "q": search_query.query,
+ "pageno": search_query.pageno,
+ "lang": search_query.lang,
+ "safesearch": search_query.safesearch,
+ "timerange": search_query.time_range,
+ "engines": search_query.engines
+ },
+ "results": no_parsed_url(result_container.get_ordered_results()),
+ "infoboxes": result_container.infoboxes,
+ "suggestions": list(result_container.suggestions),
+ "answers": list(result_container.answers),
+ "paging": result_container.paging,
+ "results_number": result_container.results_number()
+}
+
+print(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial))