logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 49b845051f027cf9d29a20821968103a0f55c9d9
parent: df0b8ee5270a37642ccf5f42e95b0656aefc7fda
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Wed,  6 Dec 2017 14:34:26 +0100

Merge pull request #973 from MarcAbonce/languages

Remove 'all' option from search languages

Diffstat:

Msearx/engines/archlinux.py6+++---
Msearx/engines/bing.py5+----
Msearx/engines/bing_news.py5+----
Msearx/engines/dailymotion.py5+----
Msearx/engines/duckduckgo.py12+++---------
Msearx/engines/duckduckgo_images.py8++------
Msearx/engines/faroo.py5+----
Msearx/engines/gigablast.py9+++------
Msearx/engines/google.py3++-
Msearx/engines/google_news.py5++---
Msearx/engines/mediawiki.py5+----
Msearx/engines/photon.py7+++----
Msearx/engines/qwant.py23+++++++++++------------
Msearx/engines/startpage.py5++---
Msearx/engines/subtitleseeker.py2+-
Msearx/engines/swisscows.py5+----
Msearx/engines/twitter.py7+------
Msearx/engines/wikidata.py4----
Msearx/engines/wikipedia.py2+-
Msearx/engines/yacy.py4+---
Msearx/engines/yahoo.py4+---
Msearx/engines/yahoo_news.py5+----
Msearx/engines/youtube_api.py4+---
Msearx/preferences.py1-
Msearx/query.py34+++++++++++++++++++---------------
Msearx/search.py6+++++-
Msearx/settings.yml2+-
Msearx/settings_robot.yml2+-
Msearx/templates/courgette/preferences.html1-
Msearx/templates/legacy/preferences.html1-
Msearx/templates/oscar/languages.html11+++++------
Msearx/templates/oscar/preferences.html4++--
Msearx/templates/pix-art/preferences.html1-
Msearx/webapp.py4++--
Mtests/unit/engines/test_archlinux.py2+-
Mtests/unit/engines/test_bing.py6+-----
Mtests/unit/engines/test_bing_news.py4----
Mtests/unit/engines/test_dailymotion.py4----
Mtests/unit/engines/test_duckduckgo_images.py2+-
Mtests/unit/engines/test_faroo.py8++------
Mtests/unit/engines/test_gigablast.py7+------
Mtests/unit/engines/test_google.py2+-
Mtests/unit/engines/test_google_news.py7+------
Mtests/unit/engines/test_mediawiki.py4----
Mtests/unit/engines/test_qwant.py3++-
Mtests/unit/engines/test_startpage.py4----
Mtests/unit/engines/test_subtitleseeker.py36------------------------------------
Mtests/unit/engines/test_swisscows.py5-----
Mtests/unit/engines/test_twitter.py6------
Mtests/unit/engines/test_wikidata.py6+++---
Mtests/unit/engines/test_wikipedia.py4----
Mtests/unit/engines/test_yacy.py5-----
Mtests/unit/engines/test_yahoo.py7-------
Mtests/unit/engines/test_yahoo_news.py7-------
Mtests/unit/test_preferences.py15++++++++++-----
Atests/unit/test_query.py64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
56 files changed, 166 insertions(+), 249 deletions(-)

diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py @@ -26,7 +26,7 @@ xpath_results = '//ul[@class="mw-search-results"]/li' xpath_link = './/div[@class="mw-search-result-heading"]/a' -# cut 'en' from 'en_US', 'de' from 'de_CH', and so on +# cut 'en' from 'en-US', 'de' from 'de-CH', and so on def locale_to_lang_code(locale): if locale.find('-') >= 0: locale = locale.split('-')[0] @@ -36,7 +36,7 @@ def locale_to_lang_code(locale): # wikis for some languages were moved off from the main site, we need to make # requests to correct URLs to be able to get results in those languages lang_urls = { - 'all': { + 'en': { 'base': 'https://wiki.archlinux.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}' }, @@ -67,7 +67,7 @@ lang_urls = { def get_lang_urls(language): if language in lang_urls: return lang_urls[language] - return lang_urls['all'] + return lang_urls['en'] # Language names to build search requests for diff --git a/searx/engines/bing.py b/searx/engines/bing.py @@ -32,10 +32,7 @@ search_string = 'search?{query}&first={offset}' def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - if params['language'] != 'all': - lang = params['language'].split('-')[0].upper() - else: - lang = 'EN' + lang = params['language'].split('-')[0].upper() query = u'language:{} {}'.format(lang, query.decode('utf-8')).encode('utf-8') diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py @@ -71,10 +71,7 @@ def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - if params['language'] == 'all': - language = 'en-US' - else: - language = params['language'] + language = params['language'] params['url'] = _get_url(query, language, offset, params['time_range']) diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py @@ -32,10 +32,7 @@ supported_languages_url = 'https://api.dailymotion.com/languages' # do search-request def request(query, params): - if params['language'] == 'all': - locale = 'en-US' - else: - locale = params['language'] + locale = params['language'] params['url'] = search_url.format( query=urlencode({'search': query, 'localization': locale}), diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py @@ -44,9 +44,7 @@ content_xpath = './/a[@class="result__snippet"]' # match query's language to a region code that duckduckgo will accept def get_region_code(lang, lang_list=None): # custom fixes for languages - if lang == 'all': - region_code = None - elif lang[:2] == 'ja': + if lang[:2] == 'ja': region_code = 'jp-jp' elif lang[:2] == 'sl': region_code = 'sl-sl' @@ -82,12 +80,8 @@ def request(query, params): offset = (params['pageno'] - 1) * 30 region_code = get_region_code(params['language']) - if region_code: - params['url'] = url.format( - query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset) - else: - params['url'] = url.format( - query=urlencode({'q': query}), offset=offset, dc_param=offset) + params['url'] = url.format( + query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset) if params['time_range'] in time_range_dict: params['url'] += time_range_url.format(range=time_range_dict[params['time_range']]) diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py @@ -53,12 +53,8 @@ def request(query, params): safesearch = params['safesearch'] - 1 region_code = get_region_code(params['language'], lang_list=supported_languages) - if region_code: - params['url'] = images_url.format( - query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd) - else: - params['url'] = images_url.format( - query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) + params['url'] = images_url.format( + query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd) return params diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py @@ -40,10 +40,7 @@ def request(query, params): offset = (params['pageno'] - 1) * number_of_results + 1 categorie = search_category.get(params['category'], 'web') - if params['language'] == 'all': - language = 'en' - else: - language = params['language'].split('_')[0] + language = params['language'].split('-')[0] # if language is not supported, put it in english if language != 'en' and\ diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py @@ -49,12 +49,9 @@ supported_languages_url = 'https://gigablast.com/search?&rxikd=1' def request(query, params): offset = (params['pageno'] - 1) * number_of_results - if params['language'] == 'all': - language = 'xx' - else: - language = params['language'].replace('-', '_').lower() - if language.split('-')[0] != 'zh': - language = language.split('-')[0] + language = params['language'].replace('-', '_').lower() + if language.split('-')[0] != 'zh': + language = language.split('-')[0] if params['safesearch'] >= 1: safesearch = 1 diff --git a/searx/engines/google.py b/searx/engines/google.py @@ -165,7 +165,8 @@ def extract_text_from_dom(result, xpath): def request(query, params): offset = (params['pageno'] - 1) * 10 - if params['language'] == 'all': + # temporary fix until a way of supporting en-US is found + if params['language'] == 'en-US': params['language'] = 'en-GB' if params['language'][:2] == 'jv': diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py @@ -50,9 +50,8 @@ def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), search_options=urlencode(search_options)) - if params['language'] != 'all': - language_array = params['language'].lower().split('-') - params['url'] += '&lr=lang_' + language_array[0] + language_array = params['language'].lower().split('-') + params['url'] += '&lr=lang_' + language_array[0] return params diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py @@ -45,10 +45,7 @@ def request(query, params): format_strings = list(Formatter().parse(base_url)) - if params['language'] == 'all': - language = 'en' - else: - language = params['language'].split('-')[0] + language = params['language'].split('-')[0] # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] if any(x[1] == 'language' for x in format_strings): diff --git a/searx/engines/photon.py b/searx/engines/photon.py @@ -35,10 +35,9 @@ def request(query, params): search_string.format(query=urlencode({'q': query}), limit=number_of_results) - if params['language'] != 'all': - language = params['language'].split('_')[0] - if language in supported_languages: - params['url'] = params['url'] + "&lang=" + language + language = params['language'].split('-')[0] + if language in supported_languages: + params['url'] = params['url'] + "&lang=" + language # using searx User-Agent params['headers']['User-Agent'] = searx_useragent() diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py @@ -44,18 +44,17 @@ def request(query, params): query=urlencode({'q': query}), offset=offset) - # add language tag if specified - if params['language'] != 'all': - if params['language'] == 'no' or params['language'].startswith('no-'): - params['language'] = params['language'].replace('no', 'nb', 1) - if params['language'].find('-') < 0: - # tries to get a country code from language - for lang in supported_languages: - lc = lang.split('-') - if params['language'] == lc[0]: - params['language'] = lang - break - params['url'] += '&locale=' + params['language'].replace('-', '_').lower() + # add language tag + if params['language'] == 'no' or params['language'].startswith('no-'): + params['language'] = params['language'].replace('no', 'nb', 1) + if params['language'].find('-') < 0: + # tries to get a country code from language + for lang in supported_languages: + lc = lang.split('-') + if params['language'] == lc[0]: + params['language'] = lang + break + params['url'] += '&locale=' + params['language'].replace('-', '_').lower() return params diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py @@ -45,9 +45,8 @@ def request(query, params): params['data'] = {'query': query, 'startat': offset} - # set language if specified - if params['language'] != 'all': - params['data']['with_language'] = ('lang_' + params['language'].split('-')[0]) + # set language + params['data']['with_language'] = ('lang_' + params['language'].split('-')[0]) return params diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py @@ -48,7 +48,7 @@ def response(resp): search_lang = 'Farsi' elif resp.search_params['language'] == 'pt-BR': search_lang = 'Brazilian' - elif resp.search_params['language'] != 'all': + else: search_lang = [lc[3] for lc in language_codes if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]] diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py @@ -35,10 +35,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=') # do search-request def request(query, params): - if params['language'] == 'all': - ui_language = 'browser' - region = 'browser' - elif params['language'].split('-')[0] == 'no': + if params['language'].split('-')[0] == 'no': region = 'nb-NO' else: region = params['language'] diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py @@ -37,12 +37,7 @@ timestamp_xpath = './/span[contains(@class,"_timestamp")]' # do search-request def request(query, params): params['url'] = search_url + urlencode({'q': query}) - - # set language if specified - if params['language'] != 'all': - params['cookies']['lang'] = params['language'].split('-')[0] - else: - params['cookies']['lang'] = 'en' + params['cookies']['lang'] = params['language'].split('-')[0] return params diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py @@ -57,8 +57,6 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' def request(query, params): language = params['language'].split('-')[0] - if language == 'all': - language = 'en' params['url'] = url_search.format( query=urlencode({'label': query, 'language': language})) @@ -71,8 +69,6 @@ def response(resp): wikidata_ids = html.xpath(wikidata_ids_xpath) language = resp.search_params['language'].split('-')[0] - if language == 'all': - language = 'en' # TODO: make requests asynchronous to avoid timeout when result_count > 1 for wikidata_id in wikidata_ids[:result_count]: diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py @@ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' # set language in base_url def url_lang(lang): lang = lang.split('-')[0] - if lang == 'all' or lang not in supported_languages: + if lang not in supported_languages: language = 'en' else: language = lang diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py @@ -51,9 +51,7 @@ def request(query, params): limit=number_of_results, search_type=search_type) - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&lr=lang_' + params['language'].split('-')[0] + params['url'] += '&lr=lang_' + params['language'].split('-')[0] return params diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py @@ -71,9 +71,7 @@ def _get_url(query, offset, language, time_range): def _get_language(params): - if params['language'] == 'all': - return 'en' - elif params['language'][:2] == 'zh': + if params['language'][:2] == 'zh': if params['language'] == 'zh' or params['language'] == 'zh-CH': return 'szh' else: diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py @@ -38,10 +38,7 @@ suggestion_xpath = '//div[contains(@class,"VerALSOTRY")]//a' def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - if params['language'] == 'all': - language = 'en' - else: - language = params['language'].split('_')[0] + language = params['language'].split('-')[0] params['url'] = search_url.format(offset=offset, query=urlencode({'p': query}), diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py @@ -34,9 +34,7 @@ def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), api_key=api_key) - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0] + params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0] return params diff --git a/searx/preferences.py b/searx/preferences.py @@ -12,7 +12,6 @@ if version[0] == '3': COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years LANGUAGE_CODES = [l[0] for l in languages] -LANGUAGE_CODES.append('all') DISABLED = 0 ENABLED = 1 DOI_RESOLVERS = list(settings['doi_resolvers']) diff --git a/searx/query.py b/searx/query.py @@ -73,11 +73,6 @@ class RawTextQuery(object): if query_part[0] == ':': lang = query_part[1:].lower().replace('_', '-') - # user may set a valid, yet not selectable language - if VALID_LANGUAGE_CODE.match(lang): - self.languages.append(lang) - parse_next = True - # check if any language-code is equal with # declared language-codes for lc in language_codes: @@ -85,16 +80,25 @@ class RawTextQuery(object): # if correct language-code is found # set it as new search-language - if lang == lang_id\ - or lang_id.startswith(lang)\ - or lang == lang_name\ - or lang == english_name\ - or lang.replace('-', ' ') == country: - parse_next = True - self.languages.append(lang_id) - # to ensure best match (first match is not necessarily the best one) - if lang == lang_id: - break + if (lang == lang_id + or lang == lang_name + or lang == english_name + or lang.replace('-', ' ') == country)\ + and lang not in self.languages: + parse_next = True + lang_parts = lang_id.split('-') + if len(lang_parts) == 2: + self.languages.append(lang_parts[0] + '-' + lang_parts[1].upper()) + else: + self.languages.append(lang_id) + # to ensure best match (first match is not necessarily the best one) + if lang == lang_id: + break + + # user may set a valid, yet not selectable language + if not self.languages and VALID_LANGUAGE_CODE.match(lang): + self.languages.append(lang) + parse_next = True # this force a engine or category if query_part[0] == '!' or query_part[0] == '?': diff --git a/searx/search.py b/searx/search.py @@ -24,7 +24,7 @@ from flask_babel import gettext import requests.exceptions import searx.poolrequests as requests_lib from searx.engines import ( - categories, engines + categories, engines, settings ) from searx.answerers import ask from searx.utils import gen_useragent @@ -220,6 +220,10 @@ def get_search_query_from_webapp(preferences, form): else: query_lang = preferences.get_value('language') + # provides backwards compatibility for requests using old language default + if query_lang == 'all': + query_lang = settings['search']['language'] + # check language if not VALID_LANGUAGE_CODE.match(query_lang): raise SearxParameterException('language', query_lang) diff --git a/searx/settings.yml b/searx/settings.yml @@ -5,7 +5,7 @@ general: search: safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default - language : "all" + language : "en-US" server: port : 8888 diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml @@ -5,7 +5,7 @@ general: search: safe_search : 0 autocomplete : "" - language: "all" + language: "en-US" server: port : 11111 diff --git a/searx/templates/courgette/preferences.html b/searx/templates/courgette/preferences.html @@ -13,7 +13,6 @@ <legend>{{ _('Search language') }}</legend> <p> <select name='language'> - <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option> {% endfor %} diff --git a/searx/templates/legacy/preferences.html b/searx/templates/legacy/preferences.html @@ -14,7 +14,6 @@ <legend>{{ _('Search language') }}</legend> <p> <select name='language'> - <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option> {% endfor %} diff --git a/searx/templates/oscar/languages.html b/searx/templates/oscar/languages.html @@ -3,10 +3,9 @@ {% else %} <select class="time_range custom-select form-control" id='language' name='language'> {% endif %} - <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> - {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} - <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> - {{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }} - </option> - {% endfor %} + {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} + <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> + {{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }} + </option> + {% endfor %} </select> diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html @@ -187,7 +187,7 @@ </td> <th>{{ search_engine.name }}</th> <td class="name">{{ shortcuts[search_engine.name] }}</td> - <td>{{ support_toggle(current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages) }}</td> + <td>{{ support_toggle(current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages) }}</td> <td>{{ support_toggle(search_engine.safesearch==True) }}</td> <td>{{ support_toggle(search_engine.time_range_support==True) }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> @@ -197,7 +197,7 @@ <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td>{{ support_toggle(search_engine.time_range_support==True) }}</td> <td>{{ support_toggle(search_engine.safesearch==True) }}</td> - <td>{{ support_toggle(current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages) }}</td> + <td>{{ support_toggle(current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages) }}</td> <td>{{ shortcuts[search_engine.name] }}</td> <th>{{ search_engine.name }}</th> <td class="onoff-checkbox"> diff --git a/searx/templates/pix-art/preferences.html b/searx/templates/pix-art/preferences.html @@ -9,7 +9,6 @@ <legend>{{ _('Search language') }}</legend> <p> <select name='language'> - <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option> {% endfor %} diff --git a/searx/webapp.py b/searx/webapp.py @@ -630,8 +630,8 @@ def autocompleter(): if len(raw_results) <= 3 and completer: # get language from cookie language = request.preferences.get_value('language') - if not language or language == 'all': - language = 'en' + if not language: + language = settings['search']['language'] else: language = language.split('-')[0] # run autocompletion diff --git a/tests/unit/engines/test_archlinux.py b/tests/unit/engines/test_archlinux.py @@ -4,7 +4,7 @@ from searx.engines import archlinux from searx.testing import SearxTestCase domains = { - 'all': 'https://wiki.archlinux.org', + 'en': 'https://wiki.archlinux.org', 'de': 'https://wiki.archlinux.de', 'fr': 'https://wiki.archlinux.fr', 'ja': 'https://wiki.archlinuxjp.org', diff --git a/tests/unit/engines/test_bing.py b/tests/unit/engines/test_bing.py @@ -10,17 +10,13 @@ class TestBingEngine(SearxTestCase): query = u'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' params = bing.request(query.encode('utf-8'), dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) self.assertTrue('language%3AFR' in params['url']) self.assertTrue('bing.com' in params['url']) - dicto['language'] = 'all' - params = bing.request(query.encode('utf-8'), dicto) - self.assertTrue('language' in params['url']) - def test_response(self): self.assertRaises(AttributeError, bing.response, None) self.assertRaises(AttributeError, bing.response, []) diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py @@ -19,10 +19,6 @@ class TestBingNewsEngine(SearxTestCase): self.assertIn('bing.com', params['url']) self.assertIn('fr', params['url']) - dicto['language'] = 'all' - params = bing_news.request(query, dicto) - self.assertIn('en', params['url']) - def test_no_url_in_request_year_time_range(self): dicto = defaultdict(dict) query = 'test_query' diff --git a/tests/unit/engines/test_dailymotion.py b/tests/unit/engines/test_dailymotion.py @@ -18,10 +18,6 @@ class TestDailymotionEngine(SearxTestCase): self.assertTrue('dailymotion.com' in params['url']) self.assertTrue('fr' in params['url']) - dicto['language'] = 'all' - params = dailymotion.request(query, dicto) - self.assertTrue('en' in params['url']) - def test_response(self): self.assertRaises(AttributeError, dailymotion.response, None) self.assertRaises(AttributeError, dailymotion.response, []) diff --git a/tests/unit/engines/test_duckduckgo_images.py b/tests/unit/engines/test_duckduckgo_images.py @@ -15,7 +15,7 @@ class TestDuckduckgoImagesEngine(SearxTestCase): dicto['is_test'] = True dicto['pageno'] = 1 dicto['safesearch'] = 0 - dicto['language'] = 'all' + dicto['language'] = 'en-US' params = duckduckgo_images.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) diff --git a/tests/unit/engines/test_faroo.py b/tests/unit/engines/test_faroo.py @@ -11,7 +11,7 @@ class TestFarooEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' dicto['category'] = 'general' params = faroo.request(query, dicto) self.assertIn('url', params) @@ -20,11 +20,7 @@ class TestFarooEngine(SearxTestCase): self.assertIn('en', params['url']) self.assertIn('web', params['url']) - dicto['language'] = 'all' - params = faroo.request(query, dicto) - self.assertIn('en', params['url']) - - dicto['language'] = 'de_DE' + dicto['language'] = 'de-DE' params = faroo.request(query, dicto) self.assertIn('de', params['url']) diff --git a/tests/unit/engines/test_gigablast.py b/tests/unit/engines/test_gigablast.py @@ -11,16 +11,11 @@ class TestGigablastEngine(SearxTestCase): dicto = defaultdict(dict) dicto['pageno'] = 0 dicto['safesearch'] = 0 - dicto['language'] = 'all' + dicto['language'] = 'en-US' params = gigablast.request(query, dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) self.assertTrue('gigablast.com' in params['url']) - self.assertTrue('xx' in params['url']) - - dicto['language'] = 'en-US' - params = gigablast.request(query, dicto) - self.assertTrue('en' in params['url']) self.assertFalse('en-US' in params['url']) def test_response(self): diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py @@ -26,7 +26,7 @@ class TestGoogleEngine(SearxTestCase): self.assertIn('google.fr', params['url']) self.assertIn('fr', params['headers']['Accept-Language']) - dicto['language'] = 'all' + dicto['language'] = 'en-US' params = google.request(query, dicto) self.assertIn('google.co', params['url']) self.assertIn('en', params['headers']['Accept-Language']) diff --git a/tests/unit/engines/test_google_news.py b/tests/unit/engines/test_google_news.py @@ -12,18 +12,13 @@ class TestGoogleNewsEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' dicto['time_range'] = 'w' params = google_news.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) self.assertIn('fr', params['url']) - dicto['language'] = 'all' - params = google_news.request(query, dicto) - self.assertIn('url', params) - self.assertNotIn('fr', params['url']) - def test_response(self): self.assertRaises(AttributeError, google_news.response, None) self.assertRaises(AttributeError, google_news.response, []) diff --git a/tests/unit/engines/test_mediawiki.py b/tests/unit/engines/test_mediawiki.py @@ -18,10 +18,6 @@ class TestMediawikiEngine(SearxTestCase): self.assertIn('wikipedia.org', params['url']) self.assertIn('fr', params['url']) - dicto['language'] = 'all' - params = mediawiki.request(query, dicto) - self.assertIn('en', params['url']) - mediawiki.base_url = "http://test.url/" mediawiki.search_url = mediawiki.base_url +\ 'w/api.php?action=query'\ diff --git a/tests/unit/engines/test_qwant.py b/tests/unit/engines/test_qwant.py @@ -19,10 +19,11 @@ class TestQwantEngine(SearxTestCase): self.assertIn('qwant.com', params['url']) self.assertIn('fr_fr', params['url']) - dicto['language'] = 'all' + dicto['language'] = 'en-US' qwant.categories = ['news'] params = qwant.request(query, dicto) self.assertFalse('fr' in params['url']) + self.assertIn('en_us', params['url']) self.assertIn('news', params['url']) qwant.supported_languages = ['en', 'fr-FR', 'fr-CA'] diff --git a/tests/unit/engines/test_startpage.py b/tests/unit/engines/test_startpage.py @@ -21,10 +21,6 @@ class TestStartpageEngine(SearxTestCase): self.assertIn('with_language', params['data']) self.assertIn('lang_fr', params['data']['with_language']) - dicto['language'] = 'all' - params = startpage.request(query, dicto) - self.assertNotIn('with_language', params['data']) - def test_response(self): self.assertRaises(AttributeError, startpage.response, None) self.assertRaises(AttributeError, startpage.response, []) diff --git a/tests/unit/engines/test_subtitleseeker.py b/tests/unit/engines/test_subtitleseeker.py @@ -99,42 +99,6 @@ class TestSubtitleseekerEngine(SearxTestCase): <div class="clear"></div> </div> """ - dicto['language'] = 'all' - response = mock.Mock(text=html, search_params=dicto) - results = subtitleseeker.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'This is the Title') - self.assertEqual(results[0]['url'], 'http://this.is.the.url/') - self.assertIn('1998', results[0]['content']) - self.assertIn('1039 Subs', results[0]['content']) - - html = """ - <div class="boxRows"> - <div class="boxRowsInner" style="width:600px;"> - <img src="http://static.subtitleseeker.com/images/movie.gif" - style="width:16px; height:16px;" class="icon"> - <a href="http://this.is.the.url/" - class="blue" title="Title subtitle" > - This is the Title - </a> - </div> - <div class="boxRowsInner f12b red" style="width:70px;"> - 1998 - </div> - <div class="boxRowsInner grey-web f12" style="width:120px;"> - <img src="http://static.subtitleseeker.com/images/basket_put.png" - style="width:16px; height:16px;" class="icon"> - 1039 Subs - </div> - <div class="boxRowsInner grey-web f10" style="width:130px;"> - <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" - style="width:16px; height:16px;" class="icon"> - 1 hours ago - </div> - <div class="clear"></div> - </div> - """ subtitleseeker.language = 'English' response = mock.Mock(text=html, search_params=dicto) results = subtitleseeker.response(response) diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py @@ -18,11 +18,6 @@ class TestSwisscowsEngine(SearxTestCase): self.assertTrue('uiLanguage=de' in params['url']) self.assertTrue('region=de-DE' in params['url']) - dicto['language'] = 'all' - params = swisscows.request(query, dicto) - self.assertTrue('uiLanguage=browser' in params['url']) - self.assertTrue('region=browser' in params['url']) - dicto['category'] = 'images' params = swisscows.request(query, dicto) self.assertIn('image', params['url']) diff --git a/tests/unit/engines/test_twitter.py b/tests/unit/engines/test_twitter.py @@ -20,12 +20,6 @@ class TestTwitterEngine(SearxTestCase): self.assertIn('lang', params['cookies']) self.assertIn('fr', params['cookies']['lang']) - dicto['language'] = 'all' - params = twitter.request(query, dicto) - self.assertIn('cookies', params) - self.assertIn('lang', params['cookies']) - self.assertIn('en', params['cookies']['lang']) - def test_response(self): self.assertRaises(AttributeError, twitter.response, None) self.assertRaises(AttributeError, twitter.response, []) diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py @@ -11,14 +11,14 @@ class TestWikidataEngine(SearxTestCase): def test_request(self): query = 'test_query' dicto = defaultdict(dict) - dicto['language'] = 'all' + dicto['language'] = 'en-US' params = wikidata.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) self.assertIn('wikidata.org', params['url']) self.assertIn('en', params['url']) - dicto['language'] = 'es_ES' + dicto['language'] = 'es-ES' params = wikidata.request(query, dicto) self.assertIn(query, params['url']) self.assertIn('es', params['url']) @@ -30,7 +30,7 @@ class TestWikidataEngine(SearxTestCase): self.assertRaises(AttributeError, wikidata.response, '') self.assertRaises(AttributeError, wikidata.response, '[]') - response = mock.Mock(text='<html></html>', search_params={"language": "all"}) + response = mock.Mock(text='<html></html>', search_params={"language": "en"}) self.assertEqual(wikidata.response(response), []) def test_getDetail(self): diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py @@ -25,10 +25,6 @@ class TestWikipediaEngine(SearxTestCase): self.assertIn('Test_Query', params['url']) self.assertNotIn('test_query', params['url']) - dicto['language'] = 'all' - params = wikipedia.request(query, dicto) - self.assertIn('en', params['url']) - dicto['language'] = 'xx' params = wikipedia.request(query, dicto) self.assertIn('en', params['url']) diff --git a/tests/unit/engines/test_yacy.py b/tests/unit/engines/test_yacy.py @@ -17,11 +17,6 @@ class TestYacyEngine(SearxTestCase): self.assertIn('localhost', params['url']) self.assertIn('fr', params['url']) - dicto['language'] = 'all' - params = yacy.request(query, dicto) - self.assertIn('url', params) - self.assertNotIn('lr=lang_', params['url']) - def test_response(self): self.assertRaises(AttributeError, yacy.response, None) self.assertRaises(AttributeError, yacy.response, []) diff --git a/tests/unit/engines/test_yahoo.py b/tests/unit/engines/test_yahoo.py @@ -39,13 +39,6 @@ class TestYahooEngine(SearxTestCase): self.assertIn('sB', params['cookies']) self.assertIn('fr', params['cookies']['sB']) - dicto['language'] = 'all' - params = yahoo.request(query, dicto) - self.assertIn('cookies', params) - self.assertIn('sB', params['cookies']) - self.assertIn('en', params['cookies']['sB']) - self.assertIn('en', params['url']) - def test_no_url_in_request_year_time_range(self): dicto = defaultdict(dict) query = 'test_query' diff --git a/tests/unit/engines/test_yahoo_news.py b/tests/unit/engines/test_yahoo_news.py @@ -22,13 +22,6 @@ class TestYahooNewsEngine(SearxTestCase): self.assertIn('sB', params['cookies']) self.assertIn('fr', params['cookies']['sB']) - dicto['language'] = 'all' - params = yahoo_news.request(query, dicto) - self.assertIn('cookies', params) - self.assertIn('sB', params['cookies']) - self.assertIn('en', params['cookies']['sB']) - self.assertIn('en', params['url']) - def test_sanitize_url(self): url = "test.url" self.assertEqual(url, yahoo_news.sanitize_url(url)) diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py @@ -90,25 +90,30 @@ class TestSettings(SearxTestCase): # search language settings def test_lang_setting_valid_choice(self): - setting = SearchLanguageSetting('all', choices=['all', 'de', 'en']) + setting = SearchLanguageSetting('en', choices=['de', 'en']) setting.parse('de') self.assertEquals(setting.get_value(), 'de') def test_lang_setting_invalid_choice(self): - setting = SearchLanguageSetting('all', choices=['all', 'de', 'en']) + setting = SearchLanguageSetting('en', choices=['de', 'en']) setting.parse('xx') - self.assertEquals(setting.get_value(), 'all') + self.assertEquals(setting.get_value(), 'en') def test_lang_setting_old_cookie_choice(self): - setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES']) + setting = SearchLanguageSetting('en', choices=['en', 'es', 'es-ES']) setting.parse('es_XA') self.assertEquals(setting.get_value(), 'es') def test_lang_setting_old_cookie_format(self): - setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES']) + setting = SearchLanguageSetting('en', choices=['en', 'es', 'es-ES']) setting.parse('es_ES') self.assertEquals(setting.get_value(), 'es-ES') + def test_lang_setting_old_default(self): + setting = SearchLanguageSetting('en', choices=['en', 'es', 'de']) + setting.parse('all') + self.assertEquals(setting.get_value(), 'en') + # plugins settings def test_plugins_setting_all_default_enabled(self): plugin1 = PluginStub('plugin1', True) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py @@ -0,0 +1,64 @@ +from searx.query import RawTextQuery +from searx.testing import SearxTestCase + + +class TestQuery(SearxTestCase): + + def test_simple_query(self): + query_text = 'the query' + query = RawTextQuery(query_text, []) + query.parse_query() + + self.assertEquals(query.getFullQuery(), query_text) + self.assertEquals(len(query.query_parts), 1) + self.assertEquals(len(query.languages), 0) + self.assertFalse(query.specific) + + def test_language_code(self): + language = 'es-ES' + query_text = 'the query' + full_query = ':' + language + ' ' + query_text + query = RawTextQuery(full_query, []) + query.parse_query() + + self.assertEquals(query.getFullQuery(), full_query) + self.assertEquals(len(query.query_parts), 3) + self.assertEquals(len(query.languages), 1) + self.assertIn(language, query.languages) + self.assertFalse(query.specific) + + def test_language_name(self): + language = 'english' + query_text = 'the query' + full_query = ':' + language + ' ' + query_text + query = RawTextQuery(full_query, []) + query.parse_query() + + self.assertEquals(query.getFullQuery(), full_query) + self.assertEquals(len(query.query_parts), 3) + self.assertIn('en', query.languages) + self.assertFalse(query.specific) + + def test_unlisted_language_code(self): + language = 'all' + query_text = 'the query' + full_query = ':' + language + ' ' + query_text + query = RawTextQuery(full_query, []) + query.parse_query() + + self.assertEquals(query.getFullQuery(), full_query) + self.assertEquals(len(query.query_parts), 3) + self.assertIn('all', query.languages) + self.assertFalse(query.specific) + + def test_invalid_language_code(self): + language = 'not_a_language' + query_text = 'the query' + full_query = ':' + language + ' ' + query_text + query = RawTextQuery(full_query, []) + query.parse_query() + + self.assertEquals(query.getFullQuery(), full_query) + self.assertEquals(len(query.query_parts), 1) + self.assertEquals(len(query.languages), 0) + self.assertFalse(query.specific)