logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 2a788c8f29f63bc069436f1a12343a47d66f2523
parent: 90a93422865b81ce256ab02a367a4d3529eb2d18
Author: asciimoo <asciimoo@gmail.com>
Date:   Fri, 31 Jan 2014 04:35:23 +0100

[enh] search language support init

Diffstat:

Msearx/engines/__init__.py12+++++++++++-
Msearx/engines/bing.py12+++++++++---
Msearx/engines/google.py13+++++++++----
Asearx/engines/wikipedia.py30++++++++++++++++++++++++++++++
Asearx/languages.py59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml3+--
Msearx/static/css/style.css3++-
Msearx/templates/preferences.html11+++++++++++
Msearx/webapp.py32++++++++++++++++++++++++++++++--
9 files changed, 162 insertions(+), 13 deletions(-)

diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py @@ -53,8 +53,14 @@ if not 'engines' in settings or not settings['engines']: for engine_data in settings['engines']: engine_name = engine_data['engine'] engine = load_module(engine_name + '.py') + if not hasattr(engine, 'paging'): engine.paging = False + + if not hasattr(engine, 'language_support'): + #engine.language_support = False + engine.language_support = True + for param_name in engine_data: if param_name == 'engine': continue @@ -158,7 +164,7 @@ def score_results(results): return sorted(results, key=itemgetter('score'), reverse=True) -def search(query, request, selected_engines, pageno=1): +def search(query, request, selected_engines, pageno=1, lang='all'): global engines, categories, number_of_searches requests = [] results = {} @@ -176,11 +182,15 @@ def search(query, request, selected_engines, pageno=1): if pageno > 1 and not engine.paging: continue + if lang != 'all' and not engine.language_support: + continue + request_params = default_request_params() request_params['headers']['User-Agent'] = user_agent request_params['category'] = selected_engine['category'] request_params['started'] = datetime.now() request_params['pageno'] = pageno + request_params['language'] = lang request_params = engine.request(query, request_params) callback = make_callback( diff --git a/searx/engines/bing.py b/searx/engines/bing.py @@ -4,16 +4,22 @@ from cgi import escape base_url = 'http://www.bing.com/' search_string = 'search?{query}&first={offset}' -locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx - paging = True +language_support = True def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 + if params['language'] == 'all': + language = 'en-US' + else: + language = params['language'].replace('_', '-') search_path = search_string.format( - query=urlencode({'q': query, 'setmkt': locale}), + query=urlencode({'q': query, 'setmkt': language}), offset=offset) + + params['cookies']['SRCHHPGUSR'] = \ + 'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] #if params['category'] == 'images': # params['url'] = base_url + 'images/' + search_path params['url'] = base_url + search_path diff --git a/searx/engines/google.py b/searx/engines/google.py @@ -5,16 +5,21 @@ from json import loads categories = ['general'] -paging = True - url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/web?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # noqa +search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa + +paging = True +language_support = True def request(query, params): offset = (params['pageno'] - 1) * 8 + language = 'en-US' + if params['language'] != 'all': + language = params['language'].replace('_', '-') params['url'] = search_url.format(offset=offset, - query=urlencode({'q': query})) + query=urlencode({'q': query}), + language=language) return params diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py @@ -0,0 +1,30 @@ +from json import loads +from urllib import urlencode, quote + +url = 'https://{language}.wikipedia.org/' + +search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json&sroffset={offset}' # noqa + +number_of_results = 10 + +language_support = True + + +def request(query, params): + offset = (params['pageno'] - 1) * 10 + if params['language'] == 'all': + language = 'en' + else: + language = params['language'].split('_')[0] + params['language'] = language + params['url'] = search_url.format(query=urlencode({'srsearch': query}), + offset=offset, + language=language) + return params + + +def response(resp): + search_results = loads(resp.text) + res = search_results.get('query', {}).get('search', []) + return [{'url': url.format(language=resp.search_params['language']) + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa + 'title': result['title']} for result in res[:int(number_of_results)]] diff --git a/searx/languages.py b/searx/languages.py @@ -0,0 +1,59 @@ +language_codes = ( + ("ar_XA", "Arabic", "Arabia"), + ("bg_BG", "Bulgarian", "Bulgaria"), + ("cs_CZ", "Czech", "Czech Republic"), + ("de_DE", "German", "Germany"), + ("da_DK", "Danish", "Denmark"), + ("de_AT", "German", "Austria"), + ("de_CH", "German", "Switzerland"), + ("el_GR", "Greek", "Greece"), + ("en_AU", "English", "Australia"), + ("en_CA", "English", "Canada"), + ("en_GB", "English", "United Kingdom"), + ("en_ID", "English", "Indonesia"), + ("en_IE", "English", "Ireland"), + ("en_IN", "English", "India"), + ("en_MY", "English", "Malaysia"), + ("en_NZ", "English", "New Zealand"), + ("en_PH", "English", "Philippines"), + ("en_SG", "English", "Singapore"), + ("en_US", "English", "United States"), + ("en_XA", "English", "Arabia"), + ("en_ZA", "English", "South Africa"), + ("es_AR", "Spanish", "Argentina"), + ("es_CL", "Spanish", "Chile"), + ("es_ES", "Spanish", "Spain"), + ("es_MX", "Spanish", "Mexico"), + ("es_US", "Spanish", "United States"), + ("es_XL", "Spanish", "Latin America"), + ("et_EE", "Estonian", "Estonia"), + ("fi_FI", "Finnish", "Finland"), + ("fr_BE", "French", "Belgium"), + ("fr_CA", "French", "Canada"), + ("fr_CH", "French", "Switzerland"), + ("fr_FR", "French", "France"), + ("he_IL", "Hebrew", "Israel"), + ("hr_HR", "Croatian", "Croatia"), + ("hu_HU", "Hungarian", "Hungary"), + ("it_IT", "Italian", "Italy"), + ("ja_JP", "Japanese", "Japan"), + ("ko_KR", "Korean", "Korea"), + ("lt_LT", "Lithuanian", "Lithuania"), + ("lv_LV", "Latvian", "Latvia"), + ("nb_NO", "Norwegian", "Norway"), + ("nl_BE", "Dutch", "Belgium"), + ("nl_NL", "Dutch", "Netherlands"), + ("pl_PL", "Polish", "Poland"), + ("pt_BR", "Portuguese", "Brazil"), + ("pt_PT", "Portuguese", "Portugal"), + ("ro_RO", "Romanian", "Romania"), + ("ru_RU", "Russian", "Russia"), + ("sk_SK", "Slovak", "Slovak Republic"), + ("sl_SL", "Slovenian", "Slovenia"), + ("sv_SE", "Swedish", "Sweden"), + ("th_TH", "Thai", "Thailand"), + ("tr_TR", "Turkish", "Turkey"), + ("uk_UA", "Ukrainian", "Ukraine"), + ("zh_CN", "Chinese", "China"), + ("zh_HK", "Chinese", "Hong Kong SAR"), + ("zh_TW", "Chinese", "Taiwan")) diff --git a/searx/settings.yml b/searx/settings.yml @@ -7,8 +7,7 @@ server: engines: - name : wikipedia - engine : mediawiki - url : https://en.wikipedia.org/ + engine : wikipedia number_of_results : 1 paging : False diff --git a/searx/static/css/style.css b/searx/static/css/style.css @@ -152,7 +152,7 @@ tr:hover td { background: #DDDDDD; } #results { margin: 10px; padding: 0; margin-bottom: 20px; } #sidebar { position: absolute; left: 54em; width: 12em; margin: 0 2px 5px 5px; padding: 0 2px 2px 2px; } -#suggestions span { display: block; margin: 0 2px 10px 2px; padding: 0; } +#suggestions span { display: block; margin: 0 2px 2px 2px; padding: 0; } #suggestions form { display: block; } #suggestions input { padding: 2px 6px; margin: 2px 4px; font-size: 0.8em; display: inline-block; background: #3498DB; color: #FFFFFF; border-radius: 4px; border: 0; cursor: pointer; } @@ -177,6 +177,7 @@ tr:hover td { background: #DDDDDD; } } #apis { + margin-top: 8px; clear: both; } diff --git a/searx/templates/preferences.html b/searx/templates/preferences.html @@ -12,6 +12,17 @@ </p> </fieldset> <fieldset> + <legend>{{ _('Search language') }}</legend> + <p> + <select name='language'> + <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> + {% for lang_id,lang_name,country_name in language_codes %} + <option value={{ lang_id }} {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name}} ({{ country_name }})</option> + {% endfor %} + </select> + </p> + </fieldset> + <fieldset> <legend>{{ _('Interface language') }}</legend> <p> <select name='locale'> diff --git a/searx/webapp.py b/searx/webapp.py @@ -29,6 +29,7 @@ from searx import settings, searx_dir from searx.engines import search, categories, engines, get_engines_stats from searx.utils import UnicodeWriter from searx.utils import highlight_content, html_to_text +from searx.languages import language_codes from flask.ext.babel import Babel @@ -117,6 +118,11 @@ def parse_query(query): @app.route('/', methods=['GET', 'POST']) def index(): paging = False + lang = 'all' + + if request.cookies.get('language')\ + and request.cookies['language'] in (x[0] for x in language_codes): + lang = request.cookies['language'] if request.method == 'POST': request_data = request.form @@ -159,7 +165,11 @@ def index(): 'name': x.name} for x in categories[categ]) - results, suggestions = search(query, request, selected_engines, pageno) + results, suggestions = search(query, + request, + selected_engines, + pageno, + lang) for result in results: if not paging and engines[result['engine']].paging: @@ -232,6 +242,11 @@ def list_engines(): @app.route('/preferences', methods=['GET', 'POST']) def preferences(): + lang = None + + if request.cookies.get('language')\ + and request.cookies['language'] in (x[0] for x in language_codes): + lang = request.cookies['language'] if request.method == 'POST': selected_categories = [] @@ -244,6 +259,10 @@ def preferences(): selected_categories.append(category) elif pd_name == 'locale' and pd in settings['locales']: locale = pd + elif pd_name == 'language' and (pd == 'all' or + pd in (x[0] for + x in language_codes)): + lang = pd resp = make_response(redirect('/')) @@ -254,6 +273,13 @@ def preferences(): max_age=60 * 60 * 24 * 7 * 4 ) + if lang: + # cookie max age: 4 weeks + resp.set_cookie( + 'language', lang, + max_age=60 * 60 * 24 * 7 * 4 + ) + if selected_categories: # cookie max age: 4 weeks resp.set_cookie( @@ -263,7 +289,9 @@ def preferences(): return resp return render('preferences.html', locales=settings['locales'], - current_locale=get_locale()) + current_locale=get_locale(), + current_language=lang or 'all', + language_codes=language_codes) @app.route('/stats', methods=['GET'])