commit: 9743bde25ef2ce6b765b8192aafcdc0a15739b17
parent: ea034fafa994227ea89662710901e73cb901e28c
Author: Adam Tauber <asciimoo@gmail.com>
Date: Wed, 28 Dec 2016 20:09:57 +0100
Merge pull request #748 from a01200356/languages
[mod] Allow users to search in most engine supported languages
Diffstat:
54 files changed, 984 insertions(+), 153 deletions(-)
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -43,7 +43,7 @@ generally made searx better:
- Kang-min Liu
- Kirill Isakov
- Guilhem Bonnefille
-- Marc Abonce Seguin
+- Marc Abonce Seguin @a01200356
- @jibe-b
- Christian Pietsch @pietsch
- @Maxqia
@@ -55,7 +55,6 @@ generally made searx better:
- Ammar Najjar @ammarnajjar
- @stepshal
- François Revol @mmuman
-- marc @a01200356
- Harry Wood @harry-wood
- Thomas Renard @threnard
- Pydo `<https://github.com/pydo>`_
diff --git a/searx/autocomplete.py b/searx/autocomplete.py
@@ -81,17 +81,17 @@ def searx_bang(full_query):
engine_query = full_query.getSearchQuery()[1:]
for lc in language_codes:
- lang_id, lang_name, country = map(str.lower, lc)
+ lang_id, lang_name, country, english_name = map(str.lower, lc)
# check if query starts with language-id
if lang_id.startswith(engine_query):
if len(engine_query) <= 2:
- results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
+ results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
else:
results.append(':{lang_id}'.format(lang_id=lang_id))
# check if query starts with language name
- if lang_name.startswith(engine_query):
+ if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
results.append(':{lang_name}'.format(lang_name=lang_name))
# check if query starts with country
diff --git a/searx/data/engines_languages.json b/searx/data/engines_languages.json
@@ -0,0 +1 @@
+{"google news": {"el": {"name": "Ελληνικά"}, "eo": {"name": "Esperanto"}, "en": {"name": "English"}, "af": {"name": "Afrikaans"}, "vi": {"name": "Tiếng Việt"}, "ca": {"name": "Català"}, "it": {"name": "Italiano"}, "iw": {"name": "עברית"}, "hy": {"name": "Հայերեն"}, "cs": {"name": "Čeština"}, "et": {"name": "Eesti"}, "id": {"name": "Indonesia"}, "es": {"name": "Español"}, "ru": {"name": "Русский"}, "nl": {"name": "Nederlands"}, "pt": {"name": "Português"}, "no": {"name": "Norsk"}, "tr": {"name": "Türkçe"}, "lt": {"name": "Lietuvių"}, "lv": {"name": "Latviešu"}, "tl": {"name": "Filipino"}, "zh-TW": {"name": "中文 (繁體)"}, "th": {"name": "ไทย"}, "ro": {"name": "Română"}, "is": {"name": "Íslenska"}, "pl": {"name": "Polski"}, "be": {"name": "Беларуская"}, "fr": {"name": "Français"}, "bg": {"name": "Български"}, "hr": {"name": "Hrvatski"}, "de": {"name": "Deutsch"}, "ko": {"name": "한국어"}, "da": {"name": "Dansk"}, "fa": {"name": "فارسی"}, "hi": {"name": "हिन्दी"}, "fi": {"name": "Suomi"}, "hu": {"name": "Magyar"}, "ja": {"name": "日本語"}, "sr": {"name": "Српски"}, "sw": {"name": "Kiswahili"}, "sv": {"name": "Svenska"}, "sk": {"name": "Slovenčina"}, "zh-CN": {"name": "中文 (简体)"}, "ar": {"name": "العربية"}, "uk": {"name": "Українська"}, "sl": {"name": "Slovenščina"}}, "dailymotion": {"gv": {"english_name": "Manx"}, "gu": {"name": "ગુજરાતી", "english_name": "Gujarati"}, "gd": {"english_name": "Gaelic, Scottish"}, "ga": {"name": "Gaeilge", "english_name": "Irish"}, "gn": {"english_name": "Guarani"}, "gl": {"name": "Galego", "english_name": "Galician"}, "lg": {"english_name": "Ganda"}, "lb": {"english_name": "Luxembourgish"}, "la": {"english_name": "Latin"}, "ln": {"english_name": "Lingala"}, "lo": {"english_name": "Lao"}, "tt": {"name": "Татарча", "english_name": "Tatar"}, "tr": {"name": "Türkçe", "english_name": "Turkish"}, "ts": {"english_name": "Tsonga"}, "li": {"english_name": "Limburgan"}, "lv": {"name": "Latviešu", "english_name": "Latvian"}, "to": {"english_name": "Tonga (Tonga Islands)"}, "lt": {"name": "Lietuvių", "english_name": "Lithuanian"}, "lu": {"english_name": "Luba-Katanga"}, "tk": {"english_name": "Turkmen"}, "th": {"name": "ไทย", "english_name": "Thai"}, "ti": {"name": "ትግርኛ", "english_name": "Tigrinya"}, "tg": {"english_name": "Tajik"}, "te": {"english_name": "Telugu"}, "ta": {"name": "தமிழ்", "english_name": "Tamil"}, "yi": {"english_name": "Yiddish"}, "yo": {"english_name": "Yoruba"}, "de": {"name": "Deutsch", "english_name": "German"}, "da": {"name": "Dansk", "english_name": "Danish"}, "dz": {"english_name": "Dzongkha"}, "st": {"english_name": "Sotho, Southern"}, "dv": {"english_name": "Dhivehi"}, "qu": {"english_name": "Quechua"}, "el": {"name": "Ελληνικά", "english_name": "Greek, Modern (1453-)"}, "eo": {"name": "Esperanto", "english_name": "Esperanto"}, "en": {"english_name": "English"}, "zh": {"name": "中文", "english_name": "Chinese"}, "ee": {"english_name": "Ewe"}, "za": {"english_name": "Zhuang"}, "mh": {"english_name": "Marshallese"}, "uk": {"name": "українська", "english_name": "Ukrainian"}, "eu": {"name": "Euskara", "english_name": "Basque"}, "et": {"name": "Eesti", "english_name": "Estonian"}, "es": {"name": "Español", "english_name": "Spanish"}, "ru": {"name": "русский", "english_name": "Russian"}, "rw": {"name": "Ikinyarwanda", "english_name": "Kinyarwanda"}, "rm": {"english_name": "Romansh"}, "rn": {"english_name": "Rundi"}, "ro": {"name": "Română", "english_name": "Romanian"}, "bn": {"name": "বাংলা", "english_name": "Bengali"}, "be": {"english_name": "Belarusian"}, "bg": {"name": "Български", "english_name": "Bulgarian"}, "ba": {"english_name": "Bashkir"}, "wa": {"name": "Walon", "english_name": "Walloon"}, "wo": {"english_name": "Wolof"}, "bm": {"english_name": "Bambara"}, "jv": {"english_name": "Javanese"}, "bo": {"english_name": "Tibetan"}, "bi": {"english_name": "Bislama"}, "br": {"name": "Brezhoneg", "english_name": "Breton"}, "bs": {"name": "Bosnian", "english_name": "Bosnian"}, "ja": {"name": "日本語", "english_name": "Japanese"}, "om": {"english_name": "Oromo"}, "oj": {"english_name": "Ojibwa"}, "ty": {"english_name": "Tahitian"}, "oc": {"name": "Occitan", "english_name": "Occitan"}, "tw": {"english_name": "Twi"}, "os": {"english_name": "Ossetian"}, "or": {"name": "Oriya", "english_name": "Oriya"}, "xh": {"name": "Xhosa", "english_name": "Xhosa"}, "ch": {"english_name": "Chamorro"}, "co": {"english_name": "Corsican"}, "ca": {"name": "Català", "english_name": "Catalan"}, "ce": {"english_name": "Chechen"}, "cy": {"name": "Cymraeg", "english_name": "Welsh"}, "cs": {"name": "čeština", "english_name": "Czech"}, "cr": {"english_name": "Cree"}, "cv": {"english_name": "Chuvash"}, "cu": {"english_name": "Slavic, Church"}, "ve": {"name": "Venda", "english_name": "Venda"}, "ps": {"name": "Pushto", "english_name": "Pushto"}, "pt": {"name": "Português", "english_name": "Portuguese"}, "tl": {"english_name": "Tagalog"}, "pa": {"name": "ਪੰਜਾਬੀ", "english_name": "Panjabi"}, "vi": {"name": "Tiếng Việt", "english_name": "Vietnamese"}, "pi": {"english_name": "Pali"}, "is": {"name": "Íslenska", "english_name": "Icelandic"}, "pl": {"name": "polski", "english_name": "Polish"}, "hz": {"english_name": "Herero"}, "hy": {"english_name": "Armenian"}, "hr": {"name": "hrvatski", "english_name": "Croatian"}, "iu": {"english_name": "Inuktitut"}, "ht": {"english_name": "Haitian"}, "hu": {"name": "magyar", "english_name": "Hungarian"}, "hi": {"name": "हिंदी", "english_name": "Hindi"}, "ho": {"english_name": "Hiri Motu"}, "ha": {"english_name": "Hausa"}, "he": {"name": "עברית", "english_name": "Hebrew"}, "mg": {"english_name": "Malagasy"}, "uz": {"english_name": "Uzbek"}, "ml": {"english_name": "Malayalam"}, "mn": {"name": "Монгол", "english_name": "Mongolian"}, "mi": {"name": "Reo Māori", "english_name": "Maori"}, "ik": {"english_name": "Inupiaq"}, "mk": {"name": "Македонски", "english_name": "Macedonian"}, "ur": {"english_name": "Urdu"}, "mt": {"name": "Malti", "english_name": "Maltese"}, "ms": {"name": "Malay", "english_name": "Malay"}, "mr": {"name": "मराठी", "english_name": "Marathi"}, "ug": {"english_name": "Uighur"}, "my": {"english_name": "Burmese"}, "sq": {"english_name": "Albanian"}, "ae": {"english_name": "Avestan"}, "ss": {"english_name": "Swati"}, "af": {"name": "Afrikaans", "english_name": "Afrikaans"}, "tn": {"english_name": "Tswana"}, "sw": {"english_name": "Swahili (macrolanguage)"}, "ak": {"english_name": "Akan"}, "am": {"name": "አማርኛ", "english_name": "Amharic"}, "it": {"name": "Italiano", "english_name": "Italian"}, "an": {"english_name": "Aragonese"}, "ii": {"english_name": "Yi, Sichuan"}, "ia": {"english_name": "Interlingua"}, "as": {"english_name": "Assamese"}, "ar": {"name": "العربية", "english_name": "Arabic"}, "su": {"english_name": "Sundanese"}, "io": {"english_name": "Ido"}, "av": {"english_name": "Avaric"}, "ay": {"english_name": "Aymara"}, "az": {"name": "Azerbaijani", "english_name": "Azerbaijani"}, "ie": {"english_name": "Interlingue"}, "id": {"name": "Indonesian", "english_name": "Indonesian"}, "ig": {"english_name": "Igbo"}, "sk": {"name": "Slovenský", "english_name": "Slovak"}, "sr": {"name": "српски", "english_name": "Serbian"}, "nl": {"name": "Nederlands", "english_name": "Dutch"}, "nn": {"name": "Norwegian Nynorsk", "english_name": "Norwegian Nynorsk"}, "no": {"english_name": "Norwegian"}, "na": {"english_name": "Nauru"}, "nb": {"name": "Norwegian Bokmål", "english_name": "Norwegian Bokmål"}, "nd": {"english_name": "Ndebele, North"}, "ne": {"english_name": "Nepali (macrolanguage)"}, "ng": {"english_name": "Ndonga"}, "ny": {"english_name": "Nyanja"}, "vo": {"english_name": "Volapük"}, "zu": {"name": "Isi-Zulu", "english_name": "Zulu"}, "so": {"english_name": "Somali"}, "nr": {"english_name": "Ndebele, South"}, "nv": {"english_name": "Navajo"}, "sn": {"english_name": "Shona"}, "fr": {"name": "français", "english_name": "French"}, "sm": {"english_name": "Samoan"}, "fy": {"english_name": "Frisian, Western"}, "sv": {"name": "Svenska", "english_name": "Swedish"}, "fa": {"name": "فارسی", "english_name": "Persian"}, "ff": {"english_name": "Fulah"}, "fi": {"name": "suomi", "english_name": "Finnish"}, "fj": {"english_name": "Fijian"}, "sa": {"english_name": "Sanskrit"}, "fo": {"english_name": "Faroese"}, "ka": {"english_name": "Georgian"}, "kg": {"english_name": "Kongo"}, "kk": {"english_name": "Kazakh"}, "kj": {"english_name": "Kuanyama"}, "ki": {"english_name": "Kikuyu"}, "ko": {"name": "한국어", "english_name": "Korean"}, "kn": {"name": "ಕನ್ನಡ", "english_name": "Kannada"}, "km": {"english_name": "Khmer, Central"}, "kl": {"english_name": "Kalaallisut"}, "ks": {"english_name": "Kashmiri"}, "kr": {"english_name": "Kanuri"}, "si": {"english_name": "Sinhala"}, "sh": {"name": "Serbo-Croatian", "english_name": "Serbo-Croatian"}, "kw": {"english_name": "Cornish"}, "kv": {"english_name": "Komi"}, "ku": {"english_name": "Kurdish"}, "sl": {"name": "slovenščina", "english_name": "Slovenian"}, "sc": {"english_name": "Sardinian"}, "ky": {"english_name": "Kirghiz"}, "sg": {"english_name": "Sango"}, "se": {"english_name": "Sami, Northern"}, "sd": {"english_name": "Sindhi"}}, "google": {"el": {"name": "Ελληνικά"}, "eo": {"name": "Esperanto"}, "en": {"name": "English"}, "af": {"name": "Afrikaans"}, "vi": {"name": "Tiếng Việt"}, "ca": {"name": "Català"}, "it": {"name": "Italiano"}, "iw": {"name": "עברית"}, "hy": {"name": "Հայերեն"}, "cs": {"name": "Čeština"}, "et": {"name": "Eesti"}, "id": {"name": "Indonesia"}, "es": {"name": "Español"}, "ru": {"name": "Русский"}, "nl": {"name": "Nederlands"}, "pt": {"name": "Português"}, "no": {"name": "Norsk"}, "tr": {"name": "Türkçe"}, "lt": {"name": "Lietuvių"}, "lv": {"name": "Latviešu"}, "tl": {"name": "Filipino"}, "zh-TW": {"name": "中文 (繁體)"}, "th": {"name": "ไทย"}, "ro": {"name": "Română"}, "is": {"name": "Íslenska"}, "pl": {"name": "Polski"}, "be": {"name": "Беларуская"}, "fr": {"name": "Français"}, "bg": {"name": "Български"}, "hr": {"name": "Hrvatski"}, "de": {"name": "Deutsch"}, "ko": {"name": "한국어"}, "da": {"name": "Dansk"}, "fa": {"name": "فارسی"}, "hi": {"name": "हिन्दी"}, "fi": {"name": "Suomi"}, "hu": {"name": "Magyar"}, "ja": {"name": "日本語"}, "sr": {"name": "Српски"}, "sw": {"name": "Kiswahili"}, "sv": {"name": "Svenska"}, "sk": {"name": "Slovenčina"}, "zh-CN": {"name": "中文 (简体)"}, "ar": {"name": "العربية"}, "uk": {"name": "Українська"}, "sl": {"name": "Slovenščina"}}, "duckduckgo": ["da-DK", "vi-VN", "en-SG", "sl-SL", "en-XA", "tzh-HK", "en-UK", "ro-RO", "en-MY", "el-GR", "it-CH", "hu-HU", "fr-FR", "en-PH", "tl-PH", "fr-CA", "fi-FI", "et-EE", "sv-SE", "es-XL", "th-TH", "sk-SK", "es-ES", "en-IE", "es-US", "es-PE", "nl-NL", "en-US", "de-DE", "de-AT", "wt-WT", "no-NO", "tr-TR", "ca-ES", "it-IT", "es-CO", "ru-RU", "ca-CT", "en-ZA", "en-CA", "jp-JP", "es-MX", "id-ID", "es-AR", "he-IL", "kr-KR", "en-AU", "ms-MY", "pl-PL", "lv-LV", "bg-BG", "zh-CN", "en-NZ", "lt-LT", "tzh-TW", "hr-HR", "pt-PT", "fr-BE", "de-CH", "cs-CZ", "en-IN", "nl-BE", "fr-CH", "en-ID", "ar-XA", "pt-BR", "uk-UA", "es-CL"], "bing": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "wikipedia": {"el": {"articles": 124415, "name": "Ελληνικά", "english_name": "Greek"}, "eo": {"articles": 235634, "name": "Esperanto", "english_name": "Esperanto"}, "en": {"articles": 5309305, "name": "English", "english_name": "English"}, "zh": {"articles": 915635, "name": "中文", "english_name": "Chinese"}, "simple": {"articles": 121825, "name": "Simple English", "english_name": "Simple English"}, "vi": {"articles": 1151694, "name": "Tiếng Việt", "english_name": "Vietnamese"}, "ca": {"articles": 529048, "name": "Català", "english_name": "Catalan"}, "it": {"articles": 1318160, "name": "Italiano", "english_name": "Italian"}, "ce": {"articles": 159058, "name": "Нохчийн", "english_name": "Chechen"}, "vo": {"articles": 120413, "name": "Volapük", "english_name": "Volapük"}, "eu": {"articles": 261908, "name": "Euskara", "english_name": "Basque"}, "ar": {"articles": 453836, "name": "العربية", "english_name": "Arabic"}, "cs": {"articles": 369249, "name": "Čeština", "english_name": "Czech"}, "et": {"articles": 151639, "name": "Eesti", "english_name": "Estonian"}, "gl": {"articles": 134762, "name": "Galego", "english_name": "Galician"}, "id": {"articles": 390382, "name": "Bahasa Indonesia", "english_name": "Indonesian"}, "es": {"articles": 1302275, "name": "Español", "english_name": "Spanish"}, "ru": {"articles": 1359763, "name": "Русский", "english_name": "Russian"}, "az": {"articles": 111556, "name": "Azərbaycanca", "english_name": "Azerbaijani"}, "nl": {"articles": 1886078, "name": "Nederlands", "english_name": "Dutch"}, "pt": {"articles": 949323, "name": "Português", "english_name": "Portuguese"}, "no": {"articles": 458365, "name": "Norsk (Bokmål)", "english_name": "Norwegian (Bokmål)"}, "tr": {"articles": 287474, "name": "Türkçe", "english_name": "Turkish"}, "zh-min-nan": {"articles": 201946, "name": "Bân-lâm-gú", "english_name": "Min Nan"}, "lt": {"articles": 180396, "name": "Lietuvių", "english_name": "Lithuanian"}, "th": {"articles": 113285, "name": "ไทย", "english_name": "Thai"}, "nn": {"articles": 131737, "name": "Nynorsk", "english_name": "Norwegian (Nynorsk)"}, "ro": {"articles": 373101, "name": "Română", "english_name": "Romanian"}, "war": {"articles": 1261974, "name": "Winaray", "english_name": "Waray-Waray"}, "pl": {"articles": 1197910, "name": "Polski", "english_name": "Polish"}, "be": {"articles": 123504, "name": "Беларуская", "english_name": "Belarusian"}, "fr": {"articles": 1823855, "name": "Français", "english_name": "French"}, "bg": {"articles": 223845, "name": "Български", "english_name": "Bulgarian"}, "la": {"articles": 125703, "name": "Latina", "english_name": "Latin"}, "ceb": {"articles": 3550855, "name": "Sinugboanong Binisaya", "english_name": "Cebuano"}, "hr": {"articles": 171093, "name": "Hrvatski", "english_name": "Croatian"}, "de": {"articles": 2009917, "name": "Deutsch", "english_name": "German"}, "hu": {"articles": 400092, "name": "Magyar", "english_name": "Hungarian"}, "fa": {"articles": 516845, "name": "فارسی", "english_name": "Persian"}, "hi": {"articles": 114434, "name": "हिन्दी", "english_name": "Hindi"}, "fi": {"articles": 405252, "name": "Suomi", "english_name": "Finnish"}, "hy": {"articles": 212809, "name": "Հայերեն", "english_name": "Armenian"}, "da": {"articles": 221877, "name": "Dansk", "english_name": "Danish"}, "ja": {"articles": 1041824, "name": "日本語", "english_name": "Japanese"}, "he": {"articles": 199341, "name": "עברית", "english_name": "Hebrew"}, "ka": {"articles": 111191, "name": "ქართული", "english_name": "Georgian"}, "ms": {"articles": 286242, "name": "Bahasa Melayu", "english_name": "Malay"}, "uz": {"articles": 128746, "name": "O‘zbek", "english_name": "Uzbek"}, "kk": {"articles": 217493, "name": "Қазақша", "english_name": "Kazakh"}, "sr": {"articles": 342597, "name": "Српски / Srpski", "english_name": "Serbian"}, "min": {"articles": 221961, "name": "Minangkabau", "english_name": "Minangkabau"}, "ko": {"articles": 367360, "name": "한국어", "english_name": "Korean"}, "sv": {"articles": 3783545, "name": "Svenska", "english_name": "Swedish"}, "ur": {"articles": 110877, "name": "اردو", "english_name": "Urdu"}, "sk": {"articles": 215374, "name": "Slovenčina", "english_name": "Slovak"}, "sh": {"articles": 436576, "name": "Srpskohrvatski / Српскохрватски", "english_name": "Serbo-Croatian"}, "uk": {"articles": 667399, "name": "Українська", "english_name": "Ukrainian"}, "sl": {"articles": 154006, "name": "Slovenščina", "english_name": "Slovenian"}}, "bing news": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "yahoo news": ["ar", "bg", "zh-chs", "zh-cht", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "de", "el", "he", "hu", "it", "ja", "ko", "lv", "lt", "no", "pl", "pt", "ro", "ru", "sk", "sl", "es", "sv", "th", "tr"], "swisscows": ["browser", "ar-SA", "es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "en-CA", "fr-CA", "es-CL", "zh-CN", "da-DK", "fi-FI", "fr-FR", "de-DE", "zh-HK", "en-IN", "en-IE", "it-IT", "ja-JP", "ko-KR", "en-MY", "es-MX", "nl-NL", "en-NZ", "nb-NO", "en-PH", "pl-PL", "pt-PT", "ru-RU", "en-ZA", "es-ES", "sv-SE", "de-CH", "fr-CH", "zh-TW", "tr-TR", "en-GB", "en-US", "es-US"], "wikidata": {"el": {"articles": 124415, "name": "Ελληνικά", "english_name": "Greek"}, "eo": {"articles": 235634, "name": "Esperanto", "english_name": "Esperanto"}, "en": {"articles": 5309305, "name": "English", "english_name": "English"}, "zh": {"articles": 915635, "name": "中文", "english_name": "Chinese"}, "simple": {"articles": 121825, "name": "Simple English", "english_name": "Simple English"}, "vi": {"articles": 1151694, "name": "Tiếng Việt", "english_name": "Vietnamese"}, "ca": {"articles": 529048, "name": "Català", "english_name": "Catalan"}, "it": {"articles": 1318160, "name": "Italiano", "english_name": "Italian"}, "ce": {"articles": 159058, "name": "Нохчийн", "english_name": "Chechen"}, "vo": {"articles": 120413, "name": "Volapük", "english_name": "Volapük"}, "eu": {"articles": 261908, "name": "Euskara", "english_name": "Basque"}, "ar": {"articles": 453836, "name": "العربية", "english_name": "Arabic"}, "cs": {"articles": 369249, "name": "Čeština", "english_name": "Czech"}, "et": {"articles": 151639, "name": "Eesti", "english_name": "Estonian"}, "gl": {"articles": 134762, "name": "Galego", "english_name": "Galician"}, "id": {"articles": 390382, "name": "Bahasa Indonesia", "english_name": "Indonesian"}, "es": {"articles": 1302275, "name": "Español", "english_name": "Spanish"}, "ru": {"articles": 1359763, "name": "Русский", "english_name": "Russian"}, "az": {"articles": 111556, "name": "Azərbaycanca", "english_name": "Azerbaijani"}, "nl": {"articles": 1886078, "name": "Nederlands", "english_name": "Dutch"}, "pt": {"articles": 949323, "name": "Português", "english_name": "Portuguese"}, "no": {"articles": 458365, "name": "Norsk (Bokmål)", "english_name": "Norwegian (Bokmål)"}, "tr": {"articles": 287474, "name": "Türkçe", "english_name": "Turkish"}, "zh-min-nan": {"articles": 201946, "name": "Bân-lâm-gú", "english_name": "Min Nan"}, "lt": {"articles": 180396, "name": "Lietuvių", "english_name": "Lithuanian"}, "th": {"articles": 113285, "name": "ไทย", "english_name": "Thai"}, "nn": {"articles": 131737, "name": "Nynorsk", "english_name": "Norwegian (Nynorsk)"}, "ro": {"articles": 373101, "name": "Română", "english_name": "Romanian"}, "war": {"articles": 1261974, "name": "Winaray", "english_name": "Waray-Waray"}, "pl": {"articles": 1197910, "name": "Polski", "english_name": "Polish"}, "be": {"articles": 123504, "name": "Беларуская", "english_name": "Belarusian"}, "fr": {"articles": 1823855, "name": "Français", "english_name": "French"}, "bg": {"articles": 223845, "name": "Български", "english_name": "Bulgarian"}, "la": {"articles": 125703, "name": "Latina", "english_name": "Latin"}, "ceb": {"articles": 3550855, "name": "Sinugboanong Binisaya", "english_name": "Cebuano"}, "hr": {"articles": 171093, "name": "Hrvatski", "english_name": "Croatian"}, "de": {"articles": 2009917, "name": "Deutsch", "english_name": "German"}, "hu": {"articles": 400092, "name": "Magyar", "english_name": "Hungarian"}, "fa": {"articles": 516845, "name": "فارسی", "english_name": "Persian"}, "hi": {"articles": 114434, "name": "हिन्दी", "english_name": "Hindi"}, "fi": {"articles": 405252, "name": "Suomi", "english_name": "Finnish"}, "hy": {"articles": 212809, "name": "Հայերեն", "english_name": "Armenian"}, "da": {"articles": 221877, "name": "Dansk", "english_name": "Danish"}, "ja": {"articles": 1041824, "name": "日本語", "english_name": "Japanese"}, "he": {"articles": 199341, "name": "עברית", "english_name": "Hebrew"}, "ka": {"articles": 111191, "name": "ქართული", "english_name": "Georgian"}, "ms": {"articles": 286242, "name": "Bahasa Melayu", "english_name": "Malay"}, "uz": {"articles": 128746, "name": "O‘zbek", "english_name": "Uzbek"}, "kk": {"articles": 217493, "name": "Қазақша", "english_name": "Kazakh"}, "sr": {"articles": 342597, "name": "Српски / Srpski", "english_name": "Serbian"}, "min": {"articles": 221961, "name": "Minangkabau", "english_name": "Minangkabau"}, "ko": {"articles": 367360, "name": "한국어", "english_name": "Korean"}, "sv": {"articles": 3783545, "name": "Svenska", "english_name": "Swedish"}, "ur": {"articles": 110877, "name": "اردو", "english_name": "Urdu"}, "sk": {"articles": 215374, "name": "Slovenčina", "english_name": "Slovak"}, "sh": {"articles": 436576, "name": "Srpskohrvatski / Српскохрватски", "english_name": "Serbo-Croatian"}, "uk": {"articles": 667399, "name": "Українська", "english_name": "Ukrainian"}, "sl": {"articles": 154006, "name": "Slovenščina", "english_name": "Slovenian"}}, "ddg definitions": ["da-DK", "vi-VN", "en-SG", "sl-SL", "en-XA", "tzh-HK", "en-UK", "ro-RO", "en-MY", "el-GR", "it-CH", "hu-HU", "fr-FR", "en-PH", "tl-PH", "fr-CA", "fi-FI", "et-EE", "sv-SE", "es-XL", "th-TH", "sk-SK", "es-ES", "en-IE", "es-US", "es-PE", "nl-NL", "en-US", "de-DE", "de-AT", "wt-WT", "no-NO", "tr-TR", "ca-ES", "it-IT", "es-CO", "ru-RU", "ca-CT", "en-ZA", "en-CA", "jp-JP", "es-MX", "id-ID", "es-AR", "he-IL", "kr-KR", "en-AU", "ms-MY", "pl-PL", "lv-LV", "bg-BG", "zh-CN", "en-NZ", "lt-LT", "tzh-TW", "hr-HR", "pt-PT", "fr-BE", "de-CH", "cs-CZ", "en-IN", "nl-BE", "fr-CH", "en-ID", "ar-XA", "pt-BR", "uk-UA", "es-CL"], "bing images": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "yahoo": ["ar", "bg", "zh-chs", "zh-cht", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "de", "el", "he", "hu", "it", "ja", "ko", "lv", "lt", "no", "pl", "pt", "ro", "ru", "sk", "sl", "es", "sv", "th", "tr"], "gigablast": ["en", "fr", "es", "ru", "tr", "ja", "h_", "tw", "cn", "ko", "de", "nl", "it", "fi", "sv", "no", "pt", "vi", "ar", "he", "id", "el", "th", "hi", "bn", "pl", "tl", "la", "eo", "ca", "bg", "tx", "sr", "hu", "da", "lt", "cs", "gl", "ka", "gd", "go", "ro", "ga", "lv", "hy", "is", "ag", "gv", "io", "fa", "te", "vv", "mg", "ku", "lb", "et"]}+
\ No newline at end of file
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
@@ -20,6 +20,8 @@ from os.path import realpath, dirname
import sys
from flask_babel import gettext
from operator import itemgetter
+from json import loads
+from requests import get
from searx import settings
from searx import logger
from searx.utils import load_module
@@ -33,10 +35,13 @@ engines = {}
categories = {'general': []}
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
+
engine_shortcuts = {}
engine_default_args = {'paging': False,
'categories': ['general'],
'language_support': True,
+ 'supported_languages': [],
'safesearch': False,
'timeout': settings['outgoing']['request_timeout'],
'shortcut': '-',
@@ -85,6 +90,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr))
sys.exit(1)
+ # assign supported languages from json file
+ if engine_data['name'] in languages:
+ setattr(engine, 'supported_languages', languages[engine_data['name']])
+
+ # assign language fetching method if auxiliary method exists
+ if hasattr(engine, '_fetch_supported_languages'):
+ setattr(engine, 'fetch_supported_languages',
+ lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
+
engine.stats = {
'result_count': 0,
'search_count': 0,
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
@@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a'
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
def locale_to_lang_code(locale):
- if locale.find('_') >= 0:
- locale = locale.split('_')[0]
+ if locale.find('-') >= 0:
+ locale = locale.split('-')[0]
return locale
@@ -95,6 +95,7 @@ main_langs = {
'uk': 'Українська',
'zh': '简体中文'
}
+supported_languages = dict(lang_urls, **main_langs)
# do search-request
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
@@ -21,6 +21,7 @@ from searx.engines.xpath import extract_text
categories = ['general']
paging = True
language_support = True
+supported_languages_url = 'https://www.bing.com/account/general'
# search-url
base_url = 'https://www.bing.com/'
@@ -32,7 +33,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
if params['language'] != 'all':
- query = u'language:{} {}'.format(params['language'].split('_')[0].upper(),
+ query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
query.decode('utf-8')).encode('utf-8')
search_path = search_string.format(
@@ -81,3 +82,15 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = []
+ dom = html.fromstring(resp.text)
+ options = dom.xpath('//div[@id="limit-languages"]//input')
+ for option in options:
+ code = option.xpath('./@id')[0].replace('_', '-')
+ supported_languages.append(code)
+
+ return supported_languages
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
@@ -19,6 +19,7 @@ from urllib import urlencode
from lxml import html
from json import loads
import re
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['images']
@@ -53,7 +54,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en-US'
else:
- language = params['language'].replace('_', '-')
+ language = params['language']
search_path = search_string.format(
query=urlencode({'q': query}),
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
@@ -17,6 +17,7 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['news']
@@ -74,7 +75,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en-US'
else:
- language = params['language'].replace('_', '-')
+ language = params['language']
params['url'] = _get_url(query, language, offset, params['time_range'])
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
@@ -15,6 +15,7 @@
from urllib import urlencode
from json import loads
from datetime import datetime
+from requests import get
# engine dependent config
categories = ['videos']
@@ -27,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr
embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
+supported_languages_url = 'https://api.dailymotion.com/languages'
+
# do search-request
def request(query, params):
@@ -74,3 +77,22 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = {}
+
+ response_json = loads(resp.text)
+
+ for language in response_json['list']:
+ supported_languages[language['code']] = {}
+
+ name = language['native_name']
+ if name:
+ supported_languages[language['code']]['name'] = name
+ english_name = language['name']
+ if english_name:
+ supported_languages[language['code']]['english_name'] = english_name
+
+ return supported_languages
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
@@ -15,13 +15,15 @@
from urllib import urlencode
from lxml.html import fromstring
+from requests import get
+from json import loads
from searx.engines.xpath import extract_text
-from searx.languages import language_codes
# engine dependent config
categories = ['general']
paging = True
language_support = True
+supported_languages_url = 'https://duckduckgo.com/d2030.js'
time_range_support = True
# search-url
@@ -46,19 +48,31 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30
+ # custom fixes for languages
if params['language'] == 'all':
locale = None
+ elif params['language'][:2] == 'ja':
+ locale = 'jp-jp'
+ elif params['language'][:2] == 'sl':
+ locale = 'sl-sl'
+ elif params['language'] == 'zh-TW':
+ locale = 'tw-tzh'
+ elif params['language'] == 'zh-HK':
+ locale = 'hk-tzh'
+ elif params['language'][-2:] == 'SA':
+ locale = 'xa-' + params['language'].split('-')[0]
+ elif params['language'][-2:] == 'GB':
+ locale = 'uk-' + params['language'].split('-')[0]
else:
- locale = params['language'].split('_')
+ locale = params['language'].split('-')
if len(locale) == 2:
# country code goes first
locale = locale[1].lower() + '-' + locale[0].lower()
else:
# tries to get a country code from language
locale = locale[0].lower()
- lang_codes = [x[0] for x in language_codes]
- for lc in lang_codes:
- lc = lc.split('_')
+ for lc in supported_languages:
+ lc = lc.split('-')
if locale == lc[0]:
locale = lc[1].lower() + '-' + lc[0].lower()
break
@@ -102,3 +116,17 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+
+ # response is a js file with regions as an embedded object
+ response_page = resp.text
+ response_page = response_page[response_page.find('regions:{') + 8:]
+ response_page = response_page[:response_page.find('}') + 1]
+
+ regions_json = loads(response_page)
+ supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+
+ return supported_languages
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
@@ -4,6 +4,7 @@ from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult):
def request(query, params):
params['url'] = url.format(query=urlencode({'q': query}))
- params['headers']['Accept-Language'] = params['language']
+ params['headers']['Accept-Language'] = params['language'].split('-')[0]
return params
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
@@ -14,6 +14,7 @@ from json import loads
from random import randint
from time import time
from urllib import urlencode
+from lxml.html import fromstring
# engine dependent config
categories = ['general']
@@ -40,6 +41,8 @@ url_xpath = './/url'
title_xpath = './/title'
content_xpath = './/sum'
+supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
+
# do search-request
def request(query, params):
@@ -48,7 +51,9 @@ def request(query, params):
if params['language'] == 'all':
language = 'xx'
else:
- language = params['language'][0:2]
+ language = params['language'].replace('-', '_').lower()
+ if language.split('-')[0] != 'zh':
+ language = language.split('-')[0]
if params['safesearch'] >= 1:
safesearch = 1
@@ -82,3 +87,16 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = []
+ dom = fromstring(resp.text)
+ links = dom.xpath('//span[@id="menu2"]/a')
+ for link in links:
+ code = link.xpath('./@href')[0][-2:]
+ if code != 'xx' and code not in supported_languages:
+ supported_languages.append(code)
+
+ return supported_languages
diff --git a/searx/engines/google.py b/searx/engines/google.py
@@ -103,6 +103,7 @@ map_hostname_start = 'maps.google.'
maps_path = '/maps'
redirect_path = '/url'
images_path = '/images'
+supported_languages_url = 'https://www.google.com/preferences?#languages'
# specific xpath variables
results_xpath = '//div[@class="g"]'
@@ -167,8 +168,12 @@ def request(query, params):
language = 'en'
country = 'US'
url_lang = ''
+ elif params['language'][:2] == 'jv':
+ language = 'jw'
+ country = 'ID'
+ url_lang = 'lang_jw'
else:
- language_array = params['language'].lower().split('_')
+ language_array = params['language'].lower().split('-')
if len(language_array) == 2:
country = language_array[1]
else:
@@ -355,3 +360,16 @@ def attributes_to_html(attributes):
retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
retval = retval + '</table>'
return retval
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = {}
+ dom = html.fromstring(resp.text)
+ options = dom.xpath('//table//td/font/label/span')
+ for option in options:
+ code = option.xpath('./@id')[0][1:]
+ name = option.text.title()
+ supported_languages[code] = {"name": name}
+
+ return supported_languages
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
@@ -12,6 +12,8 @@
from lxml import html
from urllib import urlencode
+from json import loads
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url
categories = ['news']
@@ -50,7 +52,7 @@ def request(query, params):
search_options=urlencode(search_options))
if params['language'] != 'all':
- language_array = params['language'].lower().split('_')
+ language_array = params['language'].lower().split('-')
params['url'] += '&lr=lang_' + language_array[0]
return params
diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py
@@ -46,7 +46,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en'
else:
- language = params['language'].split('_')[0]
+ language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings):
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
@@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}'
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# list of supported languages
-allowed_languages = ['de', 'en', 'fr', 'it']
+supported_languages = ['de', 'en', 'fr', 'it']
# do search-request
@@ -37,7 +37,7 @@ def request(query, params):
if params['language'] != 'all':
language = params['language'].split('_')[0]
- if language in allowed_languages:
+ if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
@@ -46,7 +46,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
- params['url'] += '&locale=' + params['language'].lower()
+ params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
return params
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
@@ -47,7 +47,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
- params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
+ params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
return params
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
@@ -22,7 +22,7 @@ language = ""
# search-url
url = 'http://www.subtitleseeker.com/'
-search_url = url + 'search/TITLES/{query}&p={pageno}'
+search_url = url + 'search/TITLES/{query}?p={pageno}'
# specific xpath variables
results_xpath = '//div[@class="boxRows"]'
@@ -43,10 +43,16 @@ def response(resp):
search_lang = ""
- if resp.search_params['language'] != 'all':
- search_lang = [lc[1]
+ # dirty fix for languages named differenly in their site
+ if resp.search_params['language'][:2] == 'fa':
+ search_lang = 'Farsi'
+ elif resp.search_params['language'] == 'pt-BR':
+ search_lang = 'Brazilian'
+ elif resp.search_params['language'] != 'all':
+ search_lang = [lc[3]
for lc in language_codes
- if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
+ if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
+ search_lang = search_lang[0].split(' (')[0]
# parse results
for result in dom.xpath(results_xpath):
diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py
@@ -13,6 +13,7 @@
from json import loads
from urllib import urlencode, unquote
import re
+from lxml.html import fromstring
# engine dependent config
categories = ['general', 'images']
@@ -23,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}'
+supported_languages_url = base_url
+
# regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*')
@@ -35,9 +38,11 @@ def request(query, params):
if params['language'] == 'all':
ui_language = 'browser'
region = 'browser'
+ elif params['language'].split('-')[0] == 'no':
+ region = 'nb-NO'
else:
- region = params['language'].replace('_', '-')
- ui_language = params['language'].split('_')[0]
+ region = params['language']
+ ui_language = params['language'].split('-')[0]
search_path = search_string.format(
query=urlencode({'query': query,
@@ -106,3 +111,15 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = []
+ dom = fromstring(resp.text)
+ options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
+ for option in options:
+ code = option.xpath('./@data-val')[0]
+ supported_languages.append(code)
+
+ return supported_languages
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
@@ -40,7 +40,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
- params['cookies']['lang'] = params['language'].split('_')[0]
+ params['cookies']['lang'] = params['language'].split('-')[0]
else:
params['cookies']['lang'] = 'en'
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
@@ -14,6 +14,8 @@
from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
+from searx.utils import format_date_by_locale
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads
from lxml.html import fromstring
@@ -55,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
def request(query, params):
- language = params['language'].split('_')[0]
+ language = params['language'].split('-')[0]
if language == 'all':
language = 'en'
@@ -70,7 +72,7 @@ def response(resp):
html = fromstring(resp.content)
wikidata_ids = html.xpath(wikidata_ids_xpath)
- language = resp.search_params['language'].split('_')[0]
+ language = resp.search_params['language'].split('-')[0]
if language == 'all':
language = 'en'
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
@@ -12,6 +12,8 @@
from json import loads
from urllib import urlencode, quote
+from lxml.html import fromstring
+
# search-url
base_url = 'https://{language}.wikipedia.org/'
@@ -24,14 +26,16 @@ search_postfix = 'w/api.php?'\
'&explaintext'\
'&pithumbsize=300'\
'&redirects'
+supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url
def url_lang(lang):
- if lang == 'all':
+ lang = lang.split('-')[0]
+ if lang == 'all' or lang not in supported_languages:
language = 'en'
else:
- language = lang.split('_')[0]
+ language = lang
return base_url.format(language=language)
@@ -111,3 +115,24 @@ def response(resp):
'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = {}
+ dom = fromstring(resp.text)
+ tables = dom.xpath('//table[contains(@class,"sortable")]')
+ for table in tables:
+ # exclude header row
+ trs = table.xpath('.//tr')[1:]
+ for tr in trs:
+ td = tr.xpath('./td')
+ code = td[3].xpath('./a')[0].text
+ name = td[2].xpath('./a')[0].text
+ english_name = td[1].xpath('./a')[0].text
+ articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
+ # exclude languages with too few articles
+ if articles >= 100000:
+ supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
+
+ return supported_languages
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
@@ -53,7 +53,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
- params['url'] += '&lr=lang_' + params['language'].split('_')[0]
+ params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
@@ -27,6 +27,8 @@ base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
+supported_languages_url = 'https://search.yahoo.com/web/advanced'
+
# specific xpath variables
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
url_xpath = './/h3/a/@href'
@@ -72,7 +74,13 @@ def _get_url(query, offset, language, time_range):
def _get_language(params):
if params['language'] == 'all':
return 'en'
- return params['language'].split('_')[0]
+ elif params['language'][:2] == 'zh':
+ if params['language'] == 'zh' or params['language'] == 'zh-CH':
+ return 'szh'
+ else:
+ return 'tzh'
+ else:
+ return params['language'].split('-')[0]
# do search-request
@@ -132,3 +140,15 @@ def response(resp):
# return results
return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+ supported_languages = []
+ dom = html.fromstring(resp.text)
+ options = dom.xpath('//div[@id="yschlang"]/span/label/input')
+ for option in options:
+ code = option.xpath('./@value')[0][5:].replace('_', '-')
+ supported_languages.append(code)
+
+ return supported_languages
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
@@ -12,7 +12,7 @@
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
-from searx.engines.yahoo import parse_url
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
@@ -22,7 +22,9 @@ language_support = True # TODO
default_tld = 'com'
language_map = {'ru': 'ru',
- 'ua': 'uk',
+ 'ua': 'ua',
+ 'be': 'by',
+ 'kk': 'kz',
'tr': 'com.tr'}
# search-url
@@ -36,7 +38,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga
def request(query, params):
- lang = params['language'].split('_')[0]
+ lang = params['language'].split('-')[0]
host = base_url.format(tld=language_map.get(lang) or default_tld)
params['url'] = host + search_url.format(page=params['pageno'] - 1,
query=urlencode({'text': query}))
diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py
@@ -36,7 +36,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
- params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
+ params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
return params
diff --git a/searx/languages.py b/searx/languages.py
@@ -1,78 +1,131 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-
-(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
-'''
-
+# -*- coding: utf-8 -*-
# list of language codes
+# this file is generated automatically by utils/update_search_languages.py
+
language_codes = (
- ("ar_XA", "Arabic", "Arabia"),
- ("bg_BG", "Bulgarian", "Bulgaria"),
- ("cs_CZ", "Czech", "Czech Republic"),
- ("da_DK", "Danish", "Denmark"),
- ("de_AT", "German", "Austria"),
- ("de_CH", "German", "Switzerland"),
- ("de_DE", "German", "Germany"),
- ("el_GR", "Greek", "Greece"),
- ("en_AU", "English", "Australia"),
- ("en_CA", "English", "Canada"),
- ("en_GB", "English", "United Kingdom"),
- ("en_ID", "English", "Indonesia"),
- ("en_IE", "English", "Ireland"),
- ("en_IN", "English", "India"),
- ("en_MY", "English", "Malaysia"),
- ("en_NZ", "English", "New Zealand"),
- ("en_PH", "English", "Philippines"),
- ("en_SG", "English", "Singapore"),
- ("en_US", "English", "United States"),
- ("en_XA", "English", "Arabia"),
- ("en_ZA", "English", "South Africa"),
- ("es_AR", "Spanish", "Argentina"),
- ("es_CL", "Spanish", "Chile"),
- ("es_ES", "Spanish", "Spain"),
- ("es_MX", "Spanish", "Mexico"),
- ("es_US", "Spanish", "United States"),
- ("es_XL", "Spanish", "Latin America"),
- ("et_EE", "Estonian", "Estonia"),
- ("fi_FI", "Finnish", "Finland"),
- ("fr_BE", "French", "Belgium"),
- ("fr_CA", "French", "Canada"),
- ("fr_CH", "French", "Switzerland"),
- ("fr_FR", "French", "France"),
- ("he_IL", "Hebrew", "Israel"),
- ("hr_HR", "Croatian", "Croatia"),
- ("hu_HU", "Hungarian", "Hungary"),
- ("it_IT", "Italian", "Italy"),
- ("ja_JP", "Japanese", "Japan"),
- ("ko_KR", "Korean", "Korea"),
- ("lt_LT", "Lithuanian", "Lithuania"),
- ("lv_LV", "Latvian", "Latvia"),
- ("nb_NO", "Norwegian", "Norway"),
- ("nl_BE", "Dutch", "Belgium"),
- ("nl_NL", "Dutch", "Netherlands"),
- ("oc_OC", "Occitan", "Occitan"),
- ("pl_PL", "Polish", "Poland"),
- ("pt_BR", "Portuguese", "Brazil"),
- ("pt_PT", "Portuguese", "Portugal"),
- ("ro_RO", "Romanian", "Romania"),
- ("ru_RU", "Russian", "Russia"),
- ("sk_SK", "Slovak", "Slovak Republic"),
- ("sl_SL", "Slovenian", "Slovenia"),
- ("sv_SE", "Swedish", "Sweden"),
- ("th_TH", "Thai", "Thailand"),
- ("tr_TR", "Turkish", "Turkey"),
- ("uk_UA", "Ukrainian", "Ukraine"),
- ("zh_CN", "Chinese", "China"),
- ("zh_HK", "Chinese", "Hong Kong SAR"),
- ("zh_TW", "Chinese", "Taiwan"))
+ (u"af", u"Afrikaans", u"", u""),
+ (u"am", u"አማርኛ", u"", u"Amharic"),
+ (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
+ (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
+ (u"be", u"Беларуская", u"", u"Belarusian"),
+ (u"bg-BG", u"Български", u"България", u"Bulgarian"),
+ (u"bn", u"বাংলা", u"", u"Bengali"),
+ (u"br", u"Brezhoneg", u"", u"Breton"),
+ (u"bs", u"Bosnian", u"", u"Bosnian"),
+ (u"ca", u"Català", u"", u"Catalan"),
+ (u"ca-CT", u"Català", u"", u"Catalan"),
+ (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
+ (u"ce", u"Нохчийн", u"", u"Chechen"),
+ (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
+ (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
+ (u"cy", u"Cymraeg", u"", u"Welsh"),
+ (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
+ (u"de", u"Deutsch", u"", u"German"),
+ (u"de-AT", u"Deutsch", u"Österreich", u"German"),
+ (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
+ (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
+ (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
+ (u"en", u"English", u"", u"English"),
+ (u"en-AU", u"English", u"Australia", u"English"),
+ (u"en-CA", u"English", u"Canada", u"English"),
+ (u"en-GB", u"English", u"United Kingdom", u"English"),
+ (u"en-ID", u"English", u"Indonesia", u"English"),
+ (u"en-IE", u"English", u"Ireland", u"English"),
+ (u"en-IN", u"English", u"India", u"English"),
+ (u"en-MY", u"English", u"Malaysia", u"English"),
+ (u"en-NZ", u"English", u"New Zealand", u"English"),
+ (u"en-PH", u"English", u"Philippines", u"English"),
+ (u"en-SG", u"English", u"Singapore", u"English"),
+ (u"en-US", u"English", u"United States", u"English"),
+ (u"en-ZA", u"English", u"South Africa", u"English"),
+ (u"eo", u"Esperanto", u"", u"Esperanto"),
+ (u"es", u"Español", u"", u"Spanish"),
+ (u"es-AR", u"Español", u"Argentina", u"Spanish"),
+ (u"es-CL", u"Español", u"Chile", u"Spanish"),
+ (u"es-CO", u"Español", u"Colombia", u"Spanish"),
+ (u"es-ES", u"Español", u"España", u"Spanish"),
+ (u"es-MX", u"Español", u"México", u"Spanish"),
+ (u"es-PE", u"Español", u"Perú", u"Spanish"),
+ (u"es-US", u"Español", u"Estados Unidos", u"Spanish"),
+ (u"et-EE", u"Eesti", u"Eesti", u"Estonian"),
+ (u"eu", u"Euskara", u"", u"Basque"),
+ (u"fa", u"فارسی", u"", u"Persian"),
+ (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
+ (u"fr", u"Français", u"", u"French"),
+ (u"fr-BE", u"Français", u"Belgique", u"French"),
+ (u"fr-CA", u"Français", u"Canada", u"French"),
+ (u"fr-CH", u"Français", u"Suisse", u"French"),
+ (u"fr-FR", u"Français", u"France", u"French"),
+ (u"ga", u"Gaeilge", u"", u"Irish"),
+ (u"gl", u"Galego", u"", u"Galician"),
+ (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
+ (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
+ (u"hi", u"हिन्दी", u"", u"Hindi"),
+ (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
+ (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
+ (u"hy", u"Հայերեն", u"", u"Armenian"),
+ (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
+ (u"is", u"Íslenska", u"", u""),
+ (u"it", u"Italiano", u"", u"Italian"),
+ (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
+ (u"it-IT", u"Italiano", u"Italia", u"Italian"),
+ (u"iw", u"עברית", u"", u""),
+ (u"ja-JP", u"日本語", u"日本", u"Japanese"),
+ (u"ka", u"ქართული", u"", u"Georgian"),
+ (u"kk", u"Қазақша", u"", u"Kazakh"),
+ (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
+ (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
+ (u"la", u"Latina", u"", u"Latin"),
+ (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
+ (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
+ (u"mi", u"Reo Māori", u"", u"Maori"),
+ (u"min", u"Minangkabau", u"", u"Minangkabau"),
+ (u"mk", u"Македонски", u"", u"Macedonian"),
+ (u"mn", u"Монгол", u"", u"Mongolian"),
+ (u"mr", u"मराठी", u"", u"Marathi"),
+ (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
+ (u"mt", u"Malti", u"", u"Maltese"),
+ (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
+ (u"nl", u"Nederlands", u"", u"Dutch"),
+ (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
+ (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
+ (u"nn", u"Nynorsk", u"", u"Norwegian"),
+ (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
+ (u"oc", u"Occitan", u"", u"Occitan"),
+ (u"or", u"Oriya", u"", u"Oriya"),
+ (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
+ (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
+ (u"ps", u"Pushto", u"", u"Pushto"),
+ (u"pt", u"Português", u"", u"Portuguese"),
+ (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
+ (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
+ (u"ro-RO", u"Română", u"România", u"Romanian"),
+ (u"ru-RU", u"Русский", u"Россия", u"Russian"),
+ (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
+ (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
+ (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
+ (u"sl", u"Slovenščina", u"", u"Slovenian"),
+ (u"sr", u"Српски / Srpski", u"", u"Serbian"),
+ (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
+ (u"sw", u"Kiswahili", u"", u""),
+ (u"ta", u"தமிழ்", u"", u"Tamil"),
+ (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
+ (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
+ (u"tl-PH", u"Filipino", u"Pilipinas", u""),
+ (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
+ (u"tt", u"Татарча", u"", u"Tatar"),
+ (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
+ (u"ur", u"اردو", u"", u"Urdu"),
+ (u"uz", u"O‘zbek", u"", u"Uzbek"),
+ (u"ve", u"Venda", u"", u"Venda"),
+ (u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"),
+ (u"vo", u"Volapük", u"", u"Volapük"),
+ (u"wa", u"Walon", u"", u"Walloon"),
+ (u"war", u"Winaray", u"", u"Waray-Waray"),
+ (u"xh", u"Xhosa", u"", u"Xhosa"),
+ (u"zh", u"中文", u"", u"Chinese"),
+ (u"zh-CN", u"中文", u"中国", u"Chinese"),
+ (u"zh-HK", u"中文", u"香港", u"Chinese"),
+ (u"zh-TW", u"中文", u"台湾", u"Chinese"),
+ (u"zu", u"Isi-Zulu", u"", u"Zulu")
+)
diff --git a/searx/preferences.py b/searx/preferences.py
@@ -95,6 +95,25 @@ class MultipleChoiceSetting(EnumStringSetting):
resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
+class SearchLanguageSetting(EnumStringSetting):
+ """Available choices may change, so user's value may not be in choices anymore"""
+
+ def parse(self, data):
+ if data not in self.choices and data != self.value:
+ # hack to give some backwards compatibility with old language cookies
+ data = str(data).replace('_', '-')
+ lang = data.split('-')[0]
+ if data in self.choices:
+ pass
+ elif lang in self.choices:
+ data = lang
+ elif data == 'ar-XA':
+ data = 'ar-SA'
+ else:
+ data = self.value
+ self.value = data
+
+
class MapSetting(Setting):
"""Setting of a value that has to be translated in order to be storable"""
@@ -216,8 +235,8 @@ class Preferences(object):
super(Preferences, self).__init__()
self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories),
- 'language': EnumStringSetting(settings['search']['language'],
- choices=LANGUAGE_CODES),
+ 'language': SearchLanguageSetting(settings['search']['language'],
+ choices=LANGUAGE_CODES),
'locale': EnumStringSetting(settings['ui']['default_locale'],
choices=settings['locales'].keys() + ['']),
'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
diff --git a/searx/query.py b/searx/query.py
@@ -71,21 +71,24 @@ class RawTextQuery(object):
# check if any language-code is equal with
# declared language-codes
for lc in language_codes:
- lang_id, lang_name, country = map(str.lower, lc)
+ lang_id, lang_name, country, english_name = map(unicode.lower, lc)
# if correct language-code is found
# set it as new search-language
if lang == lang_id\
or lang_id.startswith(lang)\
or lang == lang_name\
+ or lang == english_name\
or lang.replace('_', ' ') == country:
parse_next = True
- self.languages.append(lang)
- break
+ self.languages.append(lang_id)
+ # to ensure best match (first match is not necessarily the best one)
+ if lang == lang_id:
+ break
# this force a engine or category
if query_part[0] == '!' or query_part[0] == '?':
- prefix = query_part[1:].replace('_', ' ')
+ prefix = query_part[1:].replace('-', ' ')
# check if prefix is equal with engine shortcut
if prefix in engine_shortcuts:
diff --git a/searx/search.py b/searx/search.py
@@ -211,10 +211,14 @@ def get_search_query_from_webapp(preferences, form):
# set query
query = raw_text_query.getSearchQuery()
- # get last selected language in query, if possible
+ # set specific language if set on request, query or preferences
# TODO support search with multible languages
if len(raw_text_query.languages):
query_lang = raw_text_query.languages[-1]
+ elif 'language' in form:
+ query_lang = form.get('language')
+ else:
+ query_lang = preferences.get_value('language')
query_time_range = form.get('time_range')
diff --git a/searx/static/plugins/js/search_on_category_select.js b/searx/static/plugins/js/search_on_category_select.js
@@ -15,5 +15,10 @@ $(document).ready(function() {
$('#search_form').submit();
}
});
+ $('#language').change(function(e) {
+ if($('#q').val()) {
+ $('#search_form').submit();
+ }
+ });
}
});
diff --git a/searx/templates/courgette/preferences.html b/searx/templates/courgette/preferences.html
@@ -13,9 +13,9 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
- <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
- {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
- <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
+ <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+ {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
+ <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
</select>
</p>
diff --git a/searx/templates/legacy/preferences.html b/searx/templates/legacy/preferences.html
@@ -14,9 +14,9 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
- <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
- {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
- <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
+ <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+ {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
+ <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
</select>
</p>
diff --git a/searx/templates/oscar/advanced.html b/searx/templates/oscar/advanced.html
@@ -6,4 +6,5 @@
<div id="advanced-search-container">
{% include 'oscar/categories.html' %}
{% include 'oscar/time-range.html' %}
+ {% include 'oscar/languages.html' %}
</div>
diff --git a/searx/templates/oscar/languages.html b/searx/templates/oscar/languages.html
@@ -0,0 +1,12 @@
+{% if preferences %}
+<select class="form-control" name='language'>
+{% else %}
+<select class="time_range" id='language' name='language'>
+{% endif %}
+ <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+ {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
+ <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
+ {{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
+ </option>
+ {% endfor %}
+</select>
diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html
@@ -40,12 +40,7 @@
{% set language_label = _('Search language') %}
{% set language_info = _('What language do you prefer for search?') %}
{{ preferences_item_header(language_info, language_label, rtl) }}
- <select class="form-control" name='language'>
- <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
- {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
- <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
- {% endfor %}
- </select>
+ {% include 'oscar/languages.html' %}
{{ preferences_item_footer(language_info, language_label, rtl) }}
{% set locale_label = _('Interface language') %}
@@ -153,6 +148,7 @@
<th>{{ _("Allow") }}</th>
<th>{{ _("Engine name") }}</th>
<th>{{ _("Shortcut") }}</th>
+ <th>{{ _("Language support") }}</th>
<th>{{ _("SafeSearch") }}</th>
<th>{{ _("Time range") }}</th>
<th>{{ _("Avg. time") }}</th>
@@ -161,6 +157,7 @@
<th>{{ _("Max time") }}</th>
<th>{{ _("Avg. time") }}</th>
<th>{{ _("SafeSearch") }}</th>
+ <th>{{ _("Language support") }}</th>
<th>{{ _("Shortcut") }}</th>
<th>{{ _("Engine name") }}</th>
<th>{{ _("Allow") }}</th>
@@ -175,6 +172,7 @@
</td>
<th>{{ search_engine.name }}</th>
<td>{{ shortcuts[search_engine.name] }}</td>
+ <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
@@ -183,6 +181,7 @@
<td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
+ <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
<td>{{ shortcuts[search_engine.name] }}</td>
<th>{{ search_engine.name }}</th>
<td class="onoff-checkbox">
diff --git a/searx/templates/pix-art/preferences.html b/searx/templates/pix-art/preferences.html
@@ -9,9 +9,9 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
- <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
- {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
- <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
+ <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+ {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
+ <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
</select>
</p>
diff --git a/searx/webapp.py b/searx/webapp.py
@@ -330,6 +330,10 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
+ kwargs['language_codes'] = language_codes
+ if 'current_language' not in kwargs:
+ kwargs['current_language'] = request.preferences.get_value('language')
+
# override url_for function in templates
kwargs['url_for'] = url_for_theme
@@ -510,6 +514,7 @@ def index():
answers=result_container.answers,
infoboxes=result_container.infoboxes,
paging=result_container.paging,
+ current_language=search_query.lang,
base_url=get_base_url(),
theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())]
@@ -552,7 +557,7 @@ def autocompleter():
if not language or language == 'all':
language = 'en'
else:
- language = language.split('_')[0]
+ language = language.split('-')[0]
# run autocompletion
raw_results.extend(completer(raw_text_query.getSearchQuery(), language))
@@ -615,9 +620,7 @@ def preferences():
return render('preferences.html',
locales=settings['locales'],
current_locale=get_locale(),
- current_language=lang,
image_proxy=image_proxy,
- language_codes=language_codes,
engines_by_category=categories,
stats=stats,
answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
@@ -627,7 +630,8 @@ def preferences():
themes=themes,
plugins=plugins,
allowed_plugins=allowed_plugins,
- theme=get_current_theme_name())
+ theme=get_current_theme_name(),
+ preferences=True)
@app.route('/image_proxy', methods=['GET'])
diff --git a/tests/robot/test_basic.robot b/tests/robot/test_basic.robot
@@ -101,11 +101,11 @@ Change search language
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
- List Selection Should Be language Automatic
- Select From List language Turkish (Turkey) - tr_TR
+ List Selection Should Be language Default language
+ Select From List language Türkçe (Türkiye) - tr-TR
Submit Preferences
Go To http://localhost:11111/preferences
- List Selection Should Be language Turkish (Turkey) - tr_TR
+ List Selection Should Be language Türkçe (Türkiye) - tr-TR
Change autocomplete
Page Should Contain about
diff --git a/tests/unit/engines/test_bing.py b/tests/unit/engines/test_bing.py
@@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This should be the title')
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
self.assertEqual(results[0]['content'], 'This should be the content.')
+
+ def test_fetch_supported_languages(self):
+ html = """<html></html>"""
+ response = mock.Mock(text=html)
+ results = bing._fetch_supported_languages(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
+
+ html = """
+ <html>
+ <body>
+ <form>
+ <div id="limit-languages">
+ <div>
+ <div><input id="es" value="es"></input></div>
+ </div>
+ <div>
+ <div><input id="pt_BR" value="pt_BR"></input></div>
+ <div><input id="pt_PT" value="pt_PT"></input></div>
+ </div>
+ </div>
+ </form>
+ </body>
+ </html>
+ """
+ response = mock.Mock(text=html)
+ languages = bing._fetch_supported_languages(response)
+ self.assertEqual(type(languages), list)
+ self.assertEqual(len(languages), 3)
+ self.assertIn('es', languages)
+ self.assertIn('pt-BR', languages)
+ self.assertIn('pt-PT', languages)
diff --git a/tests/unit/engines/test_dailymotion.py b/tests/unit/engines/test_dailymotion.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
from searx.engines import dailymotion
@@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
results = dailymotion.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
+
+ def test_fetch_supported_languages(self):
+ json = r"""
+ {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
+ "localized_name":"Afrikaans","display_name":"Afrikaans"},
+ {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
+ "localized_name":"Arabic","display_name":"Arabic"},
+ {"code":"la","name":"Latin","native_name":null,
+ "localized_name":"Latin","display_name":"Latin"}
+ ]}
+ """
+ response = mock.Mock(text=json)
+ languages = dailymotion._fetch_supported_languages(response)
+ self.assertEqual(type(languages), dict)
+ self.assertEqual(len(languages), 3)
+ self.assertIn('af', languages)
+ self.assertIn('ar', languages)
+ self.assertIn('la', languages)
+
+ self.assertEqual(type(languages['af']), dict)
+ self.assertEqual(type(languages['ar']), dict)
+ self.assertEqual(type(languages['la']), dict)
+
+ self.assertIn('name', languages['af'])
+ self.assertIn('name', languages['ar'])
+ self.assertNotIn('name', languages['la'])
+
+ self.assertIn('english_name', languages['af'])
+ self.assertIn('english_name', languages['ar'])
+ self.assertIn('english_name', languages['la'])
+
+ self.assertEqual(languages['af']['name'], 'Afrikaans')
+ self.assertEqual(languages['af']['english_name'], 'Afrikaans')
+ self.assertEqual(languages['ar']['name'], u'العربية')
+ self.assertEqual(languages['ar']['english_name'], 'Arabic')
+ self.assertEqual(languages['la']['english_name'], 'Latin')
diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py
@@ -11,7 +11,7 @@ class TestDuckduckgoEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
- dicto['language'] = 'de_CH'
+ dicto['language'] = 'de-CH'
dicto['time_range'] = ''
params = duckduckgo.request(query, dicto)
self.assertIn('url', params)
@@ -19,6 +19,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertIn('duckduckgo.com', params['url'])
self.assertIn('ch-de', params['url'])
+ # when ddg uses non standard code
+ dicto['language'] = 'en-GB'
+ params = duckduckgo.request(query, dicto)
+ self.assertIn('uk-en', params['url'])
+
+ # no country given
+ duckduckgo.supported_languages = ['de-CH', 'en-US']
+ dicto['language'] = 'de'
+ params = duckduckgo.request(query, dicto)
+ self.assertIn('ch-de', params['url'])
+
def test_no_url_in_request_year_time_range(self):
dicto = defaultdict(dict)
query = 'test_query'
@@ -73,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
self.assertEqual(results[0]['content'], 'This should be the content.')
+
+ def test_fetch_supported_languages(self):
+ js = """some code...regions:{
+ "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
+ }some more code..."""
+ response = mock.Mock(text=js)
+ languages = duckduckgo._fetch_supported_languages(response)
+ self.assertEqual(type(languages), list)
+ self.assertEqual(len(languages), 5)
+ self.assertIn('wt-WT', languages)
+ self.assertIn('es-AR', languages)
+ self.assertIn('en-AU', languages)
+ self.assertIn('de-AT', languages)
+ self.assertIn('fr-BE', languages)
diff --git a/tests/unit/engines/test_duckduckgo_definitions.py b/tests/unit/engines/test_duckduckgo_definitions.py
@@ -21,10 +21,14 @@ class TestDDGDefinitionsEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
+ dicto['language'] = 'es'
params = duckduckgo_definitions.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('duckduckgo.com', params['url'])
+ self.assertIn('headers', params)
+ self.assertIn('Accept-Language', params['headers'])
+ self.assertIn('es', params['headers']['Accept-Language'])
def test_response(self):
self.assertRaises(AttributeError, duckduckgo_definitions.response, None)
diff --git a/tests/unit/engines/test_gigablast.py b/tests/unit/engines/test_gigablast.py
@@ -15,6 +15,12 @@ class TestGigablastEngine(SearxTestCase):
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('gigablast.com' in params['url'])
+ self.assertTrue('xx' in params['url'])
+
+ dicto['language'] = 'en-US'
+ params = gigablast.request(query, dicto)
+ self.assertTrue('en' in params['url'])
+ self.assertFalse('en-US' in params['url'])
def test_response(self):
self.assertRaises(AttributeError, gigablast.response, None)
@@ -83,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'South by Southwest 2016')
self.assertEqual(results[0]['url'], 'www.sxsw.com')
self.assertEqual(results[0]['content'], 'This should be the content.')
+
+ def test_fetch_supported_languages(self):
+ html = """<html></html>"""
+ response = mock.Mock(text=html)
+ results = gigablast._fetch_supported_languages(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
+
+ html = """
+ <html>
+ <body>
+ <span id="menu2">
+ <a href="/search?&rxikd=1&qlang=xx"></a>
+ <a href="/search?&rxikd=1&qlang=en"></a>
+ <a href="/search?&rxikd=1&qlang=fr"></a>
+ </span>
+ </body>
+ </html>
+ """
+ response = mock.Mock(text=html)
+ languages = gigablast._fetch_supported_languages(response)
+ self.assertEqual(type(languages), list)
+ self.assertEqual(len(languages), 2)
+ self.assertIn('en', languages)
+ self.assertIn('fr', languages)
diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py
@@ -18,7 +18,7 @@ class TestGoogleEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
- dicto['language'] = 'fr_FR'
+ dicto['language'] = 'fr-FR'
dicto['time_range'] = ''
params = google.request(query, dicto)
self.assertIn('url', params)
@@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(results[0]['title'], '')
self.assertEqual(results[0]['content'], '')
self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
+
+ def test_fetch_supported_languages(self):
+ html = """<html></html>"""
+ response = mock.Mock(text=html)
+ languages = google._fetch_supported_languages(response)
+ self.assertEqual(type(languages), dict)
+ self.assertEqual(len(languages), 0)
+
+ html = u"""
+ <html>
+ <body>
+ <table>
+ <tbody>
+ <tr>
+ <td>
+ <font>
+ <label>
+ <span id="ten">English</span>
+ </label>
+ </font>
+ </td>
+ <td>
+ <font>
+ <label>
+ <span id="tzh-CN">中文 (简体)</span>
+ </label>
+ <label>
+ <span id="tzh-TW">中文 (繁體)</span>
+ </label>
+ </font>
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </body>
+ </html>
+ """
+ response = mock.Mock(text=html)
+ languages = google._fetch_supported_languages(response)
+ self.assertEqual(type(languages), dict)
+ self.assertEqual(len(languages), 3)
+
+ self.assertIn('en', languages)
+ self.assertIn('zh-CN', languages)
+ self.assertIn('zh-TW', languages)
+
+ self.assertEquals(type(languages['en']), dict)
+ self.assertEquals(type(languages['zh-CN']), dict)
+ self.assertEquals(type(languages['zh-TW']), dict)
+
+ self.assertIn('name', languages['en'])
+ self.assertIn('name', languages['zh-CN'])
+ self.assertIn('name', languages['zh-TW'])
+
+ self.assertEquals(languages['en']['name'], 'English')
+ self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
+ self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')
diff --git a/tests/unit/engines/test_qwant.py b/tests/unit/engines/test_qwant.py
@@ -10,7 +10,7 @@ class TestQwantEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
- dicto['language'] = 'fr_FR'
+ dicto['language'] = 'fr-FR'
qwant.categories = ['']
params = qwant.request(query, dicto)
self.assertIn('url', params)
diff --git a/tests/unit/engines/test_subtitleseeker.py b/tests/unit/engines/test_subtitleseeker.py
@@ -10,6 +10,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
+ dicto['language'] = 'fr-FR'
params = subtitleseeker.request(query, dicto)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
@@ -17,7 +18,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
def test_response(self):
dicto = defaultdict(dict)
- dicto['language'] = 'fr_FR'
+ dicto['language'] = 'fr-FR'
response = mock.Mock(search_params=dicto)
self.assertRaises(AttributeError, subtitleseeker.response, None)
@@ -68,6 +69,10 @@ class TestSubtitleseekerEngine(SearxTestCase):
self.assertIn('1039 Subs', results[0]['content'])
self.assertIn('Alternative Title', results[0]['content'])
+ dicto['language'] = 'pt-BR'
+ results = subtitleseeker.response(response)
+ self.assertEqual(results[0]['url'], 'http://this.is.the.url/Brazilian/')
+
html = """
<div class="boxRows">
<div class="boxRowsInner" style="width:600px;">
diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py
@@ -10,7 +10,7 @@ class TestSwisscowsEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
- dicto['language'] = 'de_DE'
+ dicto['language'] = 'de-DE'
params = swisscows.request(query, dicto)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
@@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
self.assertEqual(results[2]['template'], 'images.html')
+
+ def test_fetch_supported_languages(self):
+ html = """<html></html>"""
+ response = mock.Mock(text=html)
+ languages = swisscows._fetch_supported_languages(response)
+ self.assertEqual(type(languages), list)
+ self.assertEqual(len(languages), 0)
+
+ html = """
+ <html>
+ <div id="regions-popup">
+ <div>
+ <ul>
+ <li><a data-val="browser"></a></li>
+ <li><a data-val="de-CH"></a></li>
+ <li><a data-val="fr-CH"></a></li>
+ </ul>
+ </div>
+ </div>
+ </html>
+ """
+ response = mock.Mock(text=html)
+ languages = swisscows._fetch_supported_languages(response)
+ self.assertEqual(type(languages), list)
+ self.assertEqual(len(languages), 3)
+ self.assertIn('de-CH', languages)
+ self.assertIn('fr-CH', languages)
diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py
@@ -8,9 +8,11 @@ from searx.testing import SearxTestCase
class TestWikipediaEngine(SearxTestCase):
def test_request(self):
+ wikipedia.supported_languages = ['fr', 'en']
+
query = 'test_query'
dicto = defaultdict(dict)
- dicto['language'] = 'fr_FR'
+ dicto['language'] = 'fr-FR'
params = wikipedia.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
@@ -27,6 +29,10 @@ class TestWikipediaEngine(SearxTestCase):
params = wikipedia.request(query, dicto)
self.assertIn('en', params['url'])
+ dicto['language'] = 'xx'
+ params = wikipedia.request(query, dicto)
+ self.assertIn('en', params['url'])
+
def test_response(self):
dicto = defaultdict(dict)
dicto['language'] = 'fr'
@@ -158,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
self.assertEqual(len(results), 2)
self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
self.assertIn(u'披头士乐队...', results[1]['content'])
+
+ def test_fetch_supported_languages(self):
+ html = u"""<html></html>"""
+ response = mock.Mock(text=html)
+ languages = wikipedia._fetch_supported_languages(response)
+ self.assertEqual(type(languages), dict)
+ self.assertEqual(len(languages), 0)
+
+ html = u"""
+ <html>
+ <body>
+ <div>
+ <div>
+ <h3>Table header</h3>
+ <table class="sortable jquery-tablesorter">
+ <thead>
+ <tr>
+ <th>N</th>
+ <th>Language</th>
+ <th>Language (local)</th>
+ <th>Wiki</th>
+ <th>Articles</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>2</td>
+ <td><a>Swedish</a></td>
+ <td><a>Svenska</a></td>
+ <td><a>sv</a></td>
+ <td><a><b>3000000</b></a></td>
+ </tr>
+ <tr>
+ <td>3</td>
+ <td><a>Cebuano</a></td>
+ <td><a>Sinugboanong Binisaya</a></td>
+ <td><a>ceb</a></td>
+ <td><a><b>3000000</b></a></td>
+ </tr>
+ </tbody>
+ </table>
+ <h3>Table header</h3>
+ <table class="sortable jquery-tablesorter">
+ <thead>
+ <tr>
+ <th>N</th>
+ <th>Language</th>
+ <th>Language (local)</th>
+ <th>Wiki</th>
+ <th>Articles</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>2</td>
+ <td><a>Norwegian (Bokmål)</a></td>
+ <td><a>Norsk (Bokmål)</a></td>
+ <td><a>no</a></td>
+ <td><a><b>100000</b></a></td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ </div>
+ </body>
+ </html>
+ """
+ response = mock.Mock(text=html)
+ languages = wikipedia._fetch_supported_languages(response)
+ self.assertEqual(type(languages), dict)
+ self.assertEqual(len(languages), 3)
+
+ self.assertIn('sv', languages)
+ self.assertIn('ceb', languages)
+ self.assertIn('no', languages)
+
+ self.assertEqual(type(languages['sv']), dict)
+ self.assertEqual(type(languages['ceb']), dict)
+ self.assertEqual(type(languages['no']), dict)
+
+ self.assertIn('name', languages['sv'])
+ self.assertIn('english_name', languages['sv'])
+ self.assertIn('articles', languages['sv'])
+
+ self.assertEqual(languages['sv']['name'], 'Svenska')
+ self.assertEqual(languages['sv']['english_name'], 'Swedish')
+ self.assertEqual(languages['sv']['articles'], 3000000)
+ self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
+ self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
+ self.assertEqual(languages['ceb']['articles'], 3000000)
+ self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
+ self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
+ self.assertEqual(languages['no']['articles'], 100000)
diff --git a/tests/unit/engines/test_yahoo.py b/tests/unit/engines/test_yahoo.py
@@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
results = yahoo.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
+
+ def test_fetch_supported_languages(self):
+ html = """<html></html>"""
+ response = mock.Mock(text=html)
+ results = yahoo._fetch_supported_languages(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
+
+ html = """
+ <html>
+ <div>
+ <div id="yschlang">
+ <span>
+ <label><input value="lang_ar"></input></label>
+ </span>
+ <span>
+ <label><input value="lang_zh_chs"></input></label>
+ <label><input value="lang_zh_cht"></input></label>
+ </span>
+ </div>
+ </div>
+ </html>
+ """
+ response = mock.Mock(text=html)
+ languages = yahoo._fetch_supported_languages(response)
+ self.assertEqual(type(languages), list)
+ self.assertEqual(len(languages), 3)
+ self.assertIn('ar', languages)
+ self.assertIn('zh-chs', languages)
+ self.assertIn('zh-cht', languages)
diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py
@@ -1,4 +1,4 @@
-from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException,
+from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting,
MultipleChoiceSetting, PluginsSetting, ValidationException)
from searx.testing import SearxTestCase
@@ -88,6 +88,27 @@ class TestSettings(SearxTestCase):
setting.parse('2')
self.assertEquals(setting.get_value(), ['2'])
+ # search language settings
+ def test_lang_setting_valid_choice(self):
+ setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
+ setting.parse('de')
+ self.assertEquals(setting.get_value(), 'de')
+
+ def test_lang_setting_invalid_choice(self):
+ setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
+ setting.parse('xx')
+ self.assertEquals(setting.get_value(), 'all')
+
+ def test_lang_setting_old_cookie_choice(self):
+ setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
+ setting.parse('es_XA')
+ self.assertEquals(setting.get_value(), 'es')
+
+ def test_lang_setting_old_cookie_format(self):
+ setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
+ setting.parse('es_ES')
+ self.assertEquals(setting.get_value(), 'es-ES')
+
# plugins settings
def test_plugins_setting_all_default_enabled(self):
plugin1 = PluginStub('plugin1', True)
diff --git a/utils/fetch_languages.py b/utils/fetch_languages.py
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+
+# This script generates languages.py from intersecting each engine's supported languages.
+#
+# The country names are obtained from http://api.geonames.org which requires registering as a user.
+#
+# Output files (engines_languages.json and languages.py)
+# are written in current directory to avoid overwriting in case something goes wrong.
+
+from requests import get
+from urllib import urlencode
+from lxml.html import fromstring
+from json import loads, dumps
+import io
+from sys import path
+path.append('../searx') # noqa
+from searx.engines import engines
+
+# Geonames API for country names.
+geonames_user = '' # ADD USER NAME HERE
+country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}'
+
+# Output files.
+engines_languages_file = 'engines_languages.json'
+languages_file = 'languages.py'
+
+engines_languages = {}
+languages = {}
+
+
+# To filter out invalid codes and dialects.
+def valid_code(lang_code):
+ # filter invalid codes
+ # sl-SL is technically not invalid, but still a mistake
+ invalid_codes = ['sl-SL', 'wt-WT', 'jw']
+ invalid_countries = ['UK', 'XA', 'XL']
+ if lang_code[:2] == 'xx'\
+ or lang_code in invalid_codes\
+ or lang_code[-2:] in invalid_countries\
+ or is_dialect(lang_code):
+ return False
+
+ return True
+
+
+# Language codes with any additional tags other than language and country.
+def is_dialect(lang_code):
+ lang_code = lang_code.split('-')
+ if len(lang_code) > 2 or len(lang_code[0]) > 3:
+ return True
+ if len(lang_code) == 2 and len(lang_code[1]) > 2:
+ return True
+
+ return False
+
+
+# Get country name in specified language.
+def get_country_name(locale):
+ if geonames_user is '':
+ return ''
+
+ locale = locale.split('-')
+ if len(locale) != 2:
+ return ''
+
+ url = country_names_url.format(parameters=urlencode({'lang': locale[0],
+ 'country': locale[1],
+ 'username': geonames_user}))
+ response = get(url)
+ json = loads(response.text)
+ content = json.get('geonames', None)
+ if content is None or len(content) != 1:
+ print "No country name found for " + locale[0] + "-" + locale[1]
+ return ''
+
+ return content[0].get('countryName', '')
+
+
+# Fetchs supported languages for each engine and writes json file with those.
+def fetch_supported_languages():
+ for engine_name in engines:
+ if hasattr(engines[engine_name], 'fetch_supported_languages'):
+ try:
+ engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
+ except Exception as e:
+ print e
+
+ # write json file
+ with io.open(engines_languages_file, "w", encoding="utf-8") as f:
+ f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
+
+
+# Join all language lists.
+# Iterate all languages supported by each engine.
+def join_language_lists():
+ # include wikipedia first for more accurate language names
+ languages.update({code: lang for code, lang
+ in engines_languages['wikipedia'].iteritems()
+ if valid_code(code)})
+
+ for engine_name in engines_languages:
+ for locale in engines_languages[engine_name]:
+ if not valid_code(locale):
+ continue
+
+ # if language is not on list or if it has no name yet
+ if locale not in languages or not languages[locale].get('name'):
+ if isinstance(engines_languages[engine_name], dict):
+ languages[locale] = engines_languages[engine_name][locale]
+ else:
+ languages[locale] = {}
+
+ # get locales that have no name or country yet
+ for locale in languages.keys():
+ # try to get language names
+ if not languages[locale].get('name'):
+ name = languages.get(locale.split('-')[0], {}).get('name', None)
+ if name:
+ languages[locale]['name'] = name
+ else:
+ # filter out locales with no name
+ del languages[locale]
+ continue
+
+ # try to get language name in english
+ if not languages[locale].get('english_name'):
+ languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
+
+ # try to get country name
+ if locale.find('-') > 0 and not languages[locale].get('country'):
+ languages[locale]['country'] = get_country_name(locale) or ''
+
+
+# Remove countryless language if language is featured in only one country.
+def filter_single_country_languages():
+ prev_lang = None
+ for code in sorted(languages):
+ lang = code.split('-')[0]
+ if lang == prev_lang:
+ countries += 1
+ else:
+ if prev_lang is not None and countries == 1:
+ del languages[prev_lang]
+ countries = 0
+ prev_lang = lang
+
+
+# Write languages.py.
+def write_languages_file():
+ new_file = open(languages_file, 'w')
+ file_content = '# -*- coding: utf-8 -*-\n'\
+ + '# list of language codes\n'\
+ + '# this file is generated automatically by utils/update_search_languages.py\n'\
+ + '\nlanguage_codes = ('
+ for code in sorted(languages):
+ file_content += '\n (u"' + code + '"'\
+ + ', u"' + languages[code]['name'].split(' (')[0] + '"'\
+ + ', u"' + languages[code].get('country', '') + '"'\
+ + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),'
+ # remove last comma
+ file_content = file_content[:-1]
+ file_content += '\n)\n'
+ new_file.write(file_content.encode('utf8'))
+ new_file.close()
+
+
+if __name__ == "__main__":
+ fetch_supported_languages()
+ join_language_lists()
+ filter_single_country_languages()
+ write_languages_file()