Browse Source

[mod] fetch supported languages for several engines
utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.

marc 8 years ago
parent
commit
f62ce21f50

+ 3256 - 0
searx/data/engines_languages.json

@@ -0,0 +1,3256 @@
+{
+    "google news": {
+        "gu": {
+            "name": "ગુજરાતી"
+        }, 
+        "mfe": {
+            "name": "Kreol Morisien"
+        }, 
+        "gd": {
+            "name": "Gàidhlig"
+        }, 
+        "ga": {
+            "name": "Gaeilge"
+        }, 
+        "gn": {
+            "name": "Guarani"
+        }, 
+        "gl": {
+            "name": "Galego"
+        }, 
+        "lg": {
+            "name": "Luganda"
+        }, 
+        "la": {
+            "name": "Latin"
+        }, 
+        "ln": {
+            "name": "Lingála"
+        }, 
+        "tw": {
+            "name": "Twi"
+        }, 
+        "tt": {
+            "name": "Tatar"
+        }, 
+        "tr": {
+            "name": "Türkçe"
+        }, 
+        "lv": {
+            "name": "Latviešu"
+        }, 
+        "to": {
+            "name": "Lea Fakatonga"
+        }, 
+        "lt": {
+            "name": "Lietuvių"
+        }, 
+        "tk": {
+            "name": "Turkmen"
+        }, 
+        "th": {
+            "name": "ไทย"
+        }, 
+        "pcm": {
+            "name": "Nigerian Pidgin"
+        }, 
+        "tg": {
+            "name": "Tajik"
+        }, 
+        "te": {
+            "name": "తెలుగు"
+        }, 
+        "haw": {
+            "name": "ʻŌlelo HawaiʻI"
+        }, 
+        "yi": {
+            "name": "ייִדיש"
+        }, 
+        "ceb": {
+            "name": "Cebuano"
+        }, 
+        "yo": {
+            "name": "Èdè Yorùbá"
+        }, 
+        "de": {
+            "name": "Deutsch"
+        }, 
+        "ko": {
+            "name": "한국어"
+        }, 
+        "da": {
+            "name": "Dansk"
+        }, 
+        "crs": {
+            "name": "Seychellois Creole"
+        }, 
+        "qu": {
+            "name": "Runasimi"
+        }, 
+        "bem": {
+            "name": "Ichibemba"
+        }, 
+        "xx": {
+            "name": "Pirate"
+        }, 
+        "ban": {
+            "name": "Balinese"
+        }, 
+        "el": {
+            "name": "Ελληνικά"
+        }, 
+        "eo": {
+            "name": "Esperanto"
+        }, 
+        "en": {
+            "name": "English"
+        }, 
+        "zh": {
+            "name": "中文 (繁體)"
+        }, 
+        "ee": {
+            "name": "Eʋegbe"
+        }, 
+        "eu": {
+            "name": "Euskara"
+        }, 
+        "et": {
+            "name": "Eesti"
+        }, 
+        "es": {
+            "name": "Español (Latinoamérica)"
+        }, 
+        "ru": {
+            "name": "Русский"
+        }, 
+        "rw": {
+            "name": "Kinyarwanda"
+        }, 
+        "lua": {
+            "name": "Luba-Lulua"
+        }, 
+        "rm": {
+            "name": "Rumantsch"
+        }, 
+        "rn": {
+            "name": "Ikirundi"
+        }, 
+        "ro": {
+            "name": "Română"
+        }, 
+        "be": {
+            "name": "Беларуская"
+        }, 
+        "bg": {
+            "name": "Български"
+        }, 
+        "uk": {
+            "name": "Українська"
+        }, 
+        "ps": {
+            "name": "پښتو"
+        }, 
+        "wo": {
+            "name": "Wolof"
+        }, 
+        "bn": {
+            "name": "বাংলা"
+        }, 
+        "jw": {
+            "name": "Javanese"
+        }, 
+        "tum": {
+            "name": "Tumbuka"
+        }, 
+        "br": {
+            "name": "Brezhoneg"
+        }, 
+        "bs": {
+            "name": "Bosanski"
+        }, 
+        "ja": {
+            "name": "日本語"
+        }, 
+        "om": {
+            "name": "Oromoo"
+        }, 
+        "ach": {
+            "name": "Acoli"
+        }, 
+        "oc": {
+            "name": "Occitan"
+        }, 
+        "kri": {
+            "name": "Krio (Sierra Leone)"
+        }, 
+        "lo": {
+            "name": "ລາວ"
+        }, 
+        "tlh": {
+            "name": "Klingon"
+        }, 
+        "or": {
+            "name": "ଓଡ଼ିଆ"
+        }, 
+        "xh": {
+            "name": "Xhosa"
+        }, 
+        "co": {
+            "name": "Corsican"
+        }, 
+        "nso": {
+            "name": "Northern Sotho"
+        }, 
+        "ca": {
+            "name": "Català"
+        }, 
+        "cy": {
+            "name": "Cymraeg"
+        }, 
+        "cs": {
+            "name": "Čeština"
+        }, 
+        "tn": {
+            "name": "Tswana"
+        }, 
+        "pt": {
+            "name": "Português (Portugal)"
+        }, 
+        "tl": {
+            "name": "Filipino"
+        }, 
+        "chr": {
+            "name": "ᏣᎳᎩ"
+        }, 
+        "pa": {
+            "name": "ਪੰਜਾਬੀ"
+        }, 
+        "loz": {
+            "name": "Lozi"
+        }, 
+        "is": {
+            "name": "Íslenska"
+        }, 
+        "pl": {
+            "name": "Polski"
+        }, 
+        "hy": {
+            "name": "Հայերեն"
+        }, 
+        "hr": {
+            "name": "Hrvatski"
+        }, 
+        "ti": {
+            "name": "ትግርኛ"
+        }, 
+        "ht": {
+            "name": "Haitian Creole"
+        }, 
+        "hu": {
+            "name": "Magyar"
+        }, 
+        "hi": {
+            "name": "हिन्दी"
+        }, 
+        "ha": {
+            "name": "Hausa"
+        }, 
+        "gaa": {
+            "name": "Ga"
+        }, 
+        "mg": {
+            "name": "Malagasy"
+        }, 
+        "uz": {
+            "name": "O‘Zbek"
+        }, 
+        "ml": {
+            "name": "മലയാളം"
+        }, 
+        "mn": {
+            "name": "Монгол"
+        }, 
+        "mi": {
+            "name": "Maori"
+        }, 
+        "mk": {
+            "name": "Македонски"
+        }, 
+        "ur": {
+            "name": "اردو"
+        }, 
+        "mt": {
+            "name": "Malti"
+        }, 
+        "ms": {
+            "name": "Bahasa Melayu"
+        }, 
+        "mr": {
+            "name": "मराठी"
+        }, 
+        "ug": {
+            "name": "ئۇيغۇرچە"
+        }, 
+        "ta": {
+            "name": "தமிழ்"
+        }, 
+        "my": {
+            "name": "ဗမာ"
+        }, 
+        "af": {
+            "name": "Afrikaans"
+        }, 
+        "vi": {
+            "name": "Tiếng Việt"
+        }, 
+        "ak": {
+            "name": "Akan"
+        }, 
+        "am": {
+            "name": "አማርኛ"
+        }, 
+        "it": {
+            "name": "Italiano"
+        }, 
+        "iw": {
+            "name": "עברית"
+        }, 
+        "kn": {
+            "name": "ಕನ್ನಡ"
+        }, 
+        "ar": {
+            "name": "العربية"
+        }, 
+        "km": {
+            "name": "ខ្មែរ"
+        }, 
+        "zu": {
+            "name": "Isizulu"
+        }, 
+        "ia": {
+            "name": "Interlingua"
+        }, 
+        "az": {
+            "name": "Azərbaycan Dili"
+        }, 
+        "id": {
+            "name": "Indonesia"
+        }, 
+        "ig": {
+            "name": "Igbo"
+        }, 
+        "nl": {
+            "name": "Nederlands"
+        }, 
+        "nn": {
+            "name": "Nynorsk"
+        }, 
+        "no": {
+            "name": "Norsk"
+        }, 
+        "ne": {
+            "name": "नेपाली"
+        }, 
+        "ny": {
+            "name": "Nyanja"
+        }, 
+        "nyn": {
+            "name": "Runyankore"
+        }, 
+        "fr": {
+            "name": "Français"
+        }, 
+        "fy": {
+            "name": "West-Frysk"
+        }, 
+        "fa": {
+            "name": "فارسی"
+        }, 
+        "fi": {
+            "name": "Suomi"
+        }, 
+        "fo": {
+            "name": "Føroyskt"
+        }, 
+        "ka": {
+            "name": "ქართული"
+        }, 
+        "kg": {
+            "name": "Kongo"
+        }, 
+        "ckb": {
+            "name": "Central Kurdish"
+        }, 
+        "kk": {
+            "name": "Қазақ Тілі"
+        }, 
+        "sr": {
+            "name": "Српски"
+        }, 
+        "sq": {
+            "name": "Shqip"
+        }, 
+        "sw": {
+            "name": "Kiswahili"
+        }, 
+        "sv": {
+            "name": "Svenska"
+        }, 
+        "su": {
+            "name": "Sundanese"
+        }, 
+        "st": {
+            "name": "Southern Sotho"
+        }, 
+        "sk": {
+            "name": "Slovenčina"
+        }, 
+        "si": {
+            "name": "සිංහල"
+        }, 
+        "so": {
+            "name": "Soomaali"
+        }, 
+        "sn": {
+            "name": "Chishona"
+        }, 
+        "sl": {
+            "name": "Slovenščina"
+        }, 
+        "ky": {
+            "name": "Кыргызча"
+        }, 
+        "sd": {
+            "name": "Sindhi"
+        }
+    }, 
+    "dailymotion": {
+        "gv": {
+            "english_name": "Manx"
+        }, 
+        "gu": {
+            "name": "ગુજરાતી", 
+            "english_name": "Gujarati"
+        }, 
+        "gd": {
+            "english_name": "Gaelic, Scottish"
+        }, 
+        "ga": {
+            "name": "Gaeilge", 
+            "english_name": "Irish"
+        }, 
+        "gn": {
+            "english_name": "Guarani"
+        }, 
+        "gl": {
+            "name": "Galego", 
+            "english_name": "Galician"
+        }, 
+        "lg": {
+            "english_name": "Ganda"
+        }, 
+        "lb": {
+            "english_name": "Luxembourgish"
+        }, 
+        "la": {
+            "english_name": "Latin"
+        }, 
+        "ln": {
+            "english_name": "Lingala"
+        }, 
+        "lo": {
+            "english_name": "Lao"
+        }, 
+        "tt": {
+            "name": "Татарча", 
+            "english_name": "Tatar"
+        }, 
+        "tr": {
+            "name": "Türkçe", 
+            "english_name": "Turkish"
+        }, 
+        "ts": {
+            "english_name": "Tsonga"
+        }, 
+        "li": {
+            "english_name": "Limburgan"
+        }, 
+        "lv": {
+            "name": "Latviešu", 
+            "english_name": "Latvian"
+        }, 
+        "to": {
+            "english_name": "Tonga (Tonga Islands)"
+        }, 
+        "lt": {
+            "name": "Lietuvių", 
+            "english_name": "Lithuanian"
+        }, 
+        "lu": {
+            "english_name": "Luba-Katanga"
+        }, 
+        "tk": {
+            "english_name": "Turkmen"
+        }, 
+        "th": {
+            "name": "ไทย", 
+            "english_name": "Thai"
+        }, 
+        "ti": {
+            "name": "ትግርኛ", 
+            "english_name": "Tigrinya"
+        }, 
+        "tg": {
+            "english_name": "Tajik"
+        }, 
+        "te": {
+            "english_name": "Telugu"
+        }, 
+        "ta": {
+            "name": "தமிழ்", 
+            "english_name": "Tamil"
+        }, 
+        "yi": {
+            "english_name": "Yiddish"
+        }, 
+        "yo": {
+            "english_name": "Yoruba"
+        }, 
+        "de": {
+            "name": "Deutsch", 
+            "english_name": "German"
+        }, 
+        "da": {
+            "name": "Dansk", 
+            "english_name": "Danish"
+        }, 
+        "dz": {
+            "english_name": "Dzongkha"
+        }, 
+        "st": {
+            "english_name": "Sotho, Southern"
+        }, 
+        "dv": {
+            "english_name": "Dhivehi"
+        }, 
+        "qu": {
+            "english_name": "Quechua"
+        }, 
+        "el": {
+            "name": "Ελληνικά", 
+            "english_name": "Greek, Modern (1453-)"
+        }, 
+        "eo": {
+            "name": "Esperanto", 
+            "english_name": "Esperanto"
+        }, 
+        "en": {
+            "english_name": "English"
+        }, 
+        "zh": {
+            "name": "中文", 
+            "english_name": "Chinese"
+        }, 
+        "ee": {
+            "english_name": "Ewe"
+        }, 
+        "za": {
+            "english_name": "Zhuang"
+        }, 
+        "mh": {
+            "english_name": "Marshallese"
+        }, 
+        "uk": {
+            "name": "українська", 
+            "english_name": "Ukrainian"
+        }, 
+        "eu": {
+            "name": "Euskara", 
+            "english_name": "Basque"
+        }, 
+        "et": {
+            "name": "Eesti", 
+            "english_name": "Estonian"
+        }, 
+        "es": {
+            "name": "Español", 
+            "english_name": "Spanish"
+        }, 
+        "ru": {
+            "name": "русский", 
+            "english_name": "Russian"
+        }, 
+        "rw": {
+            "name": "Ikinyarwanda", 
+            "english_name": "Kinyarwanda"
+        }, 
+        "rm": {
+            "english_name": "Romansh"
+        }, 
+        "rn": {
+            "english_name": "Rundi"
+        }, 
+        "ro": {
+            "name": "Română", 
+            "english_name": "Romanian"
+        }, 
+        "bn": {
+            "name": "বাংলা", 
+            "english_name": "Bengali"
+        }, 
+        "be": {
+            "english_name": "Belarusian"
+        }, 
+        "bg": {
+            "name": "Български", 
+            "english_name": "Bulgarian"
+        }, 
+        "ba": {
+            "english_name": "Bashkir"
+        }, 
+        "wa": {
+            "name": "Walon", 
+            "english_name": "Walloon"
+        }, 
+        "wo": {
+            "english_name": "Wolof"
+        }, 
+        "bm": {
+            "english_name": "Bambara"
+        }, 
+        "jv": {
+            "english_name": "Javanese"
+        }, 
+        "bo": {
+            "english_name": "Tibetan"
+        }, 
+        "bi": {
+            "english_name": "Bislama"
+        }, 
+        "br": {
+            "name": "Brezhoneg", 
+            "english_name": "Breton"
+        }, 
+        "bs": {
+            "name": "Bosnian", 
+            "english_name": "Bosnian"
+        }, 
+        "ja": {
+            "name": "日本語", 
+            "english_name": "Japanese"
+        }, 
+        "om": {
+            "english_name": "Oromo"
+        }, 
+        "oj": {
+            "english_name": "Ojibwa"
+        }, 
+        "ty": {
+            "english_name": "Tahitian"
+        }, 
+        "oc": {
+            "name": "Occitan", 
+            "english_name": "Occitan"
+        }, 
+        "tw": {
+            "english_name": "Twi"
+        }, 
+        "os": {
+            "english_name": "Ossetian"
+        }, 
+        "or": {
+            "name": "Oriya", 
+            "english_name": "Oriya"
+        }, 
+        "xh": {
+            "name": "Xhosa", 
+            "english_name": "Xhosa"
+        }, 
+        "ch": {
+            "english_name": "Chamorro"
+        }, 
+        "co": {
+            "english_name": "Corsican"
+        }, 
+        "ca": {
+            "name": "Català", 
+            "english_name": "Catalan"
+        }, 
+        "ce": {
+            "english_name": "Chechen"
+        }, 
+        "cy": {
+            "name": "Cymraeg", 
+            "english_name": "Welsh"
+        }, 
+        "cs": {
+            "name": "čeština", 
+            "english_name": "Czech"
+        }, 
+        "cr": {
+            "english_name": "Cree"
+        }, 
+        "cv": {
+            "english_name": "Chuvash"
+        }, 
+        "cu": {
+            "english_name": "Slavic, Church"
+        }, 
+        "ve": {
+            "name": "Venda", 
+            "english_name": "Venda"
+        }, 
+        "ps": {
+            "name": "Pushto", 
+            "english_name": "Pushto"
+        }, 
+        "pt": {
+            "name": "Português", 
+            "english_name": "Portuguese"
+        }, 
+        "tl": {
+            "english_name": "Tagalog"
+        }, 
+        "pa": {
+            "name": "ਪੰਜਾਬੀ", 
+            "english_name": "Panjabi"
+        }, 
+        "vi": {
+            "name": "Tiếng Việt", 
+            "english_name": "Vietnamese"
+        }, 
+        "pi": {
+            "english_name": "Pali"
+        }, 
+        "is": {
+            "name": "Íslenska", 
+            "english_name": "Icelandic"
+        }, 
+        "pl": {
+            "name": "polski", 
+            "english_name": "Polish"
+        }, 
+        "hz": {
+            "english_name": "Herero"
+        }, 
+        "hy": {
+            "english_name": "Armenian"
+        }, 
+        "hr": {
+            "name": "hrvatski", 
+            "english_name": "Croatian"
+        }, 
+        "iu": {
+            "english_name": "Inuktitut"
+        }, 
+        "ht": {
+            "english_name": "Haitian"
+        }, 
+        "hu": {
+            "name": "magyar", 
+            "english_name": "Hungarian"
+        }, 
+        "hi": {
+            "name": "हिंदी", 
+            "english_name": "Hindi"
+        }, 
+        "ho": {
+            "english_name": "Hiri Motu"
+        }, 
+        "ha": {
+            "english_name": "Hausa"
+        }, 
+        "he": {
+            "name": "עברית", 
+            "english_name": "Hebrew"
+        }, 
+        "mg": {
+            "english_name": "Malagasy"
+        }, 
+        "uz": {
+            "english_name": "Uzbek"
+        }, 
+        "ml": {
+            "english_name": "Malayalam"
+        }, 
+        "mn": {
+            "name": "Монгол", 
+            "english_name": "Mongolian"
+        }, 
+        "mi": {
+            "name": "Reo Māori", 
+            "english_name": "Maori"
+        }, 
+        "ik": {
+            "english_name": "Inupiaq"
+        }, 
+        "mk": {
+            "name": "Македонски", 
+            "english_name": "Macedonian"
+        }, 
+        "ur": {
+            "english_name": "Urdu"
+        }, 
+        "mt": {
+            "name": "Malti", 
+            "english_name": "Maltese"
+        }, 
+        "ms": {
+            "name": "Malay", 
+            "english_name": "Malay"
+        }, 
+        "mr": {
+            "name": "मराठी", 
+            "english_name": "Marathi"
+        }, 
+        "ug": {
+            "english_name": "Uighur"
+        }, 
+        "my": {
+            "english_name": "Burmese"
+        }, 
+        "sq": {
+            "english_name": "Albanian"
+        }, 
+        "ae": {
+            "english_name": "Avestan"
+        }, 
+        "ss": {
+            "english_name": "Swati"
+        }, 
+        "af": {
+            "name": "Afrikaans", 
+            "english_name": "Afrikaans"
+        }, 
+        "tn": {
+            "english_name": "Tswana"
+        }, 
+        "sw": {
+            "english_name": "Swahili (macrolanguage)"
+        }, 
+        "ak": {
+            "english_name": "Akan"
+        }, 
+        "am": {
+            "name": "አማርኛ", 
+            "english_name": "Amharic"
+        }, 
+        "it": {
+            "name": "Italiano", 
+            "english_name": "Italian"
+        }, 
+        "an": {
+            "english_name": "Aragonese"
+        }, 
+        "ii": {
+            "english_name": "Yi, Sichuan"
+        }, 
+        "ia": {
+            "english_name": "Interlingua"
+        }, 
+        "as": {
+            "english_name": "Assamese"
+        }, 
+        "ar": {
+            "name": "العربية", 
+            "english_name": "Arabic"
+        }, 
+        "su": {
+            "english_name": "Sundanese"
+        }, 
+        "io": {
+            "english_name": "Ido"
+        }, 
+        "av": {
+            "english_name": "Avaric"
+        }, 
+        "ay": {
+            "english_name": "Aymara"
+        }, 
+        "az": {
+            "name": "Azerbaijani", 
+            "english_name": "Azerbaijani"
+        }, 
+        "ie": {
+            "english_name": "Interlingue"
+        }, 
+        "id": {
+            "name": "Indonesian", 
+            "english_name": "Indonesian"
+        }, 
+        "ig": {
+            "english_name": "Igbo"
+        }, 
+        "sk": {
+            "name": "Slovenský", 
+            "english_name": "Slovak"
+        }, 
+        "sr": {
+            "name": "српски", 
+            "english_name": "Serbian"
+        }, 
+        "nl": {
+            "name": "Nederlands", 
+            "english_name": "Dutch"
+        }, 
+        "nn": {
+            "name": "Norwegian Nynorsk", 
+            "english_name": "Norwegian Nynorsk"
+        }, 
+        "no": {
+            "english_name": "Norwegian"
+        }, 
+        "na": {
+            "english_name": "Nauru"
+        }, 
+        "nb": {
+            "name": "Norwegian Bokmål", 
+            "english_name": "Norwegian Bokmål"
+        }, 
+        "nd": {
+            "english_name": "Ndebele, North"
+        }, 
+        "ne": {
+            "english_name": "Nepali (macrolanguage)"
+        }, 
+        "ng": {
+            "english_name": "Ndonga"
+        }, 
+        "ny": {
+            "english_name": "Nyanja"
+        }, 
+        "vo": {
+            "english_name": "Volapük"
+        }, 
+        "zu": {
+            "name": "Isi-Zulu", 
+            "english_name": "Zulu"
+        }, 
+        "so": {
+            "english_name": "Somali"
+        }, 
+        "nr": {
+            "english_name": "Ndebele, South"
+        }, 
+        "nv": {
+            "english_name": "Navajo"
+        }, 
+        "sn": {
+            "english_name": "Shona"
+        }, 
+        "fr": {
+            "name": "français", 
+            "english_name": "French"
+        }, 
+        "sm": {
+            "english_name": "Samoan"
+        }, 
+        "fy": {
+            "english_name": "Frisian, Western"
+        }, 
+        "sv": {
+            "name": "Svenska", 
+            "english_name": "Swedish"
+        }, 
+        "fa": {
+            "name": "فارسی", 
+            "english_name": "Persian"
+        }, 
+        "ff": {
+            "english_name": "Fulah"
+        }, 
+        "fi": {
+            "name": "suomi", 
+            "english_name": "Finnish"
+        }, 
+        "fj": {
+            "english_name": "Fijian"
+        }, 
+        "sa": {
+            "english_name": "Sanskrit"
+        }, 
+        "fo": {
+            "english_name": "Faroese"
+        }, 
+        "ka": {
+            "english_name": "Georgian"
+        }, 
+        "kg": {
+            "english_name": "Kongo"
+        }, 
+        "kk": {
+            "english_name": "Kazakh"
+        }, 
+        "kj": {
+            "english_name": "Kuanyama"
+        }, 
+        "ki": {
+            "english_name": "Kikuyu"
+        }, 
+        "ko": {
+            "name": "한국어", 
+            "english_name": "Korean"
+        }, 
+        "kn": {
+            "name": "ಕನ್ನಡ", 
+            "english_name": "Kannada"
+        }, 
+        "km": {
+            "english_name": "Khmer, Central"
+        }, 
+        "kl": {
+            "english_name": "Kalaallisut"
+        }, 
+        "ks": {
+            "english_name": "Kashmiri"
+        }, 
+        "kr": {
+            "english_name": "Kanuri"
+        }, 
+        "si": {
+            "english_name": "Sinhala"
+        }, 
+        "sh": {
+            "name": "Serbo-Croatian", 
+            "english_name": "Serbo-Croatian"
+        }, 
+        "kw": {
+            "english_name": "Cornish"
+        }, 
+        "kv": {
+            "english_name": "Komi"
+        }, 
+        "ku": {
+            "english_name": "Kurdish"
+        }, 
+        "sl": {
+            "name": "slovenščina", 
+            "english_name": "Slovenian"
+        }, 
+        "sc": {
+            "english_name": "Sardinian"
+        }, 
+        "ky": {
+            "english_name": "Kirghiz"
+        }, 
+        "sg": {
+            "english_name": "Sango"
+        }, 
+        "se": {
+            "english_name": "Sami, Northern"
+        }, 
+        "sd": {
+            "english_name": "Sindhi"
+        }
+    }, 
+    "google": {
+        "gu": {
+            "name": "ગુજરાતી"
+        }, 
+        "mfe": {
+            "name": "Kreol Morisien"
+        }, 
+        "gd": {
+            "name": "Gàidhlig"
+        }, 
+        "ga": {
+            "name": "Gaeilge"
+        }, 
+        "gn": {
+            "name": "Guarani"
+        }, 
+        "gl": {
+            "name": "Galego"
+        }, 
+        "lg": {
+            "name": "Luganda"
+        }, 
+        "la": {
+            "name": "Latin"
+        }, 
+        "ln": {
+            "name": "Lingála"
+        }, 
+        "tw": {
+            "name": "Twi"
+        }, 
+        "tt": {
+            "name": "Tatar"
+        }, 
+        "tr": {
+            "name": "Türkçe"
+        }, 
+        "lv": {
+            "name": "Latviešu"
+        }, 
+        "to": {
+            "name": "Lea Fakatonga"
+        }, 
+        "lt": {
+            "name": "Lietuvių"
+        }, 
+        "tk": {
+            "name": "Turkmen"
+        }, 
+        "th": {
+            "name": "ไทย"
+        }, 
+        "pcm": {
+            "name": "Nigerian Pidgin"
+        }, 
+        "tg": {
+            "name": "Tajik"
+        }, 
+        "te": {
+            "name": "తెలుగు"
+        }, 
+        "haw": {
+            "name": "ʻŌlelo HawaiʻI"
+        }, 
+        "yi": {
+            "name": "ייִדיש"
+        }, 
+        "ceb": {
+            "name": "Cebuano"
+        }, 
+        "yo": {
+            "name": "Èdè Yorùbá"
+        }, 
+        "de": {
+            "name": "Deutsch"
+        }, 
+        "ko": {
+            "name": "한국어"
+        }, 
+        "da": {
+            "name": "Dansk"
+        }, 
+        "crs": {
+            "name": "Seychellois Creole"
+        }, 
+        "qu": {
+            "name": "Runasimi"
+        }, 
+        "bem": {
+            "name": "Ichibemba"
+        }, 
+        "xx": {
+            "name": "Pirate"
+        }, 
+        "ban": {
+            "name": "Balinese"
+        }, 
+        "el": {
+            "name": "Ελληνικά"
+        }, 
+        "eo": {
+            "name": "Esperanto"
+        }, 
+        "en": {
+            "name": "English"
+        }, 
+        "zh": {
+            "name": "中文 (繁體)"
+        }, 
+        "ee": {
+            "name": "Eʋegbe"
+        }, 
+        "eu": {
+            "name": "Euskara"
+        }, 
+        "et": {
+            "name": "Eesti"
+        }, 
+        "es": {
+            "name": "Español (Latinoamérica)"
+        }, 
+        "ru": {
+            "name": "Русский"
+        }, 
+        "rw": {
+            "name": "Kinyarwanda"
+        }, 
+        "lua": {
+            "name": "Luba-Lulua"
+        }, 
+        "rm": {
+            "name": "Rumantsch"
+        }, 
+        "rn": {
+            "name": "Ikirundi"
+        }, 
+        "ro": {
+            "name": "Română"
+        }, 
+        "be": {
+            "name": "Беларуская"
+        }, 
+        "bg": {
+            "name": "Български"
+        }, 
+        "uk": {
+            "name": "Українська"
+        }, 
+        "ps": {
+            "name": "پښتو"
+        }, 
+        "wo": {
+            "name": "Wolof"
+        }, 
+        "bn": {
+            "name": "বাংলা"
+        }, 
+        "jw": {
+            "name": "Javanese"
+        }, 
+        "tum": {
+            "name": "Tumbuka"
+        }, 
+        "br": {
+            "name": "Brezhoneg"
+        }, 
+        "bs": {
+            "name": "Bosanski"
+        }, 
+        "ja": {
+            "name": "日本語"
+        }, 
+        "om": {
+            "name": "Oromoo"
+        }, 
+        "ach": {
+            "name": "Acoli"
+        }, 
+        "oc": {
+            "name": "Occitan"
+        }, 
+        "kri": {
+            "name": "Krio (Sierra Leone)"
+        }, 
+        "lo": {
+            "name": "ລາວ"
+        }, 
+        "tlh": {
+            "name": "Klingon"
+        }, 
+        "or": {
+            "name": "ଓଡ଼ିଆ"
+        }, 
+        "xh": {
+            "name": "Xhosa"
+        }, 
+        "co": {
+            "name": "Corsican"
+        }, 
+        "nso": {
+            "name": "Northern Sotho"
+        }, 
+        "ca": {
+            "name": "Català"
+        }, 
+        "cy": {
+            "name": "Cymraeg"
+        }, 
+        "cs": {
+            "name": "Čeština"
+        }, 
+        "tn": {
+            "name": "Tswana"
+        }, 
+        "pt": {
+            "name": "Português (Portugal)"
+        }, 
+        "tl": {
+            "name": "Filipino"
+        }, 
+        "chr": {
+            "name": "ᏣᎳᎩ"
+        }, 
+        "pa": {
+            "name": "ਪੰਜਾਬੀ"
+        }, 
+        "loz": {
+            "name": "Lozi"
+        }, 
+        "is": {
+            "name": "Íslenska"
+        }, 
+        "pl": {
+            "name": "Polski"
+        }, 
+        "hy": {
+            "name": "Հայերեն"
+        }, 
+        "hr": {
+            "name": "Hrvatski"
+        }, 
+        "ti": {
+            "name": "ትግርኛ"
+        }, 
+        "ht": {
+            "name": "Haitian Creole"
+        }, 
+        "hu": {
+            "name": "Magyar"
+        }, 
+        "hi": {
+            "name": "हिन्दी"
+        }, 
+        "ha": {
+            "name": "Hausa"
+        }, 
+        "gaa": {
+            "name": "Ga"
+        }, 
+        "mg": {
+            "name": "Malagasy"
+        }, 
+        "uz": {
+            "name": "O‘Zbek"
+        }, 
+        "ml": {
+            "name": "മലയാളം"
+        }, 
+        "mn": {
+            "name": "Монгол"
+        }, 
+        "mi": {
+            "name": "Maori"
+        }, 
+        "mk": {
+            "name": "Македонски"
+        }, 
+        "ur": {
+            "name": "اردو"
+        }, 
+        "mt": {
+            "name": "Malti"
+        }, 
+        "ms": {
+            "name": "Bahasa Melayu"
+        }, 
+        "mr": {
+            "name": "मराठी"
+        }, 
+        "ug": {
+            "name": "ئۇيغۇرچە"
+        }, 
+        "ta": {
+            "name": "தமிழ்"
+        }, 
+        "my": {
+            "name": "ဗမာ"
+        }, 
+        "af": {
+            "name": "Afrikaans"
+        }, 
+        "vi": {
+            "name": "Tiếng Việt"
+        }, 
+        "ak": {
+            "name": "Akan"
+        }, 
+        "am": {
+            "name": "አማርኛ"
+        }, 
+        "it": {
+            "name": "Italiano"
+        }, 
+        "iw": {
+            "name": "עברית"
+        }, 
+        "kn": {
+            "name": "ಕನ್ನಡ"
+        }, 
+        "ar": {
+            "name": "العربية"
+        }, 
+        "km": {
+            "name": "ខ្មែរ"
+        }, 
+        "zu": {
+            "name": "Isizulu"
+        }, 
+        "ia": {
+            "name": "Interlingua"
+        }, 
+        "az": {
+            "name": "Azərbaycan Dili"
+        }, 
+        "id": {
+            "name": "Indonesia"
+        }, 
+        "ig": {
+            "name": "Igbo"
+        }, 
+        "nl": {
+            "name": "Nederlands"
+        }, 
+        "nn": {
+            "name": "Nynorsk"
+        }, 
+        "no": {
+            "name": "Norsk"
+        }, 
+        "ne": {
+            "name": "नेपाली"
+        }, 
+        "ny": {
+            "name": "Nyanja"
+        }, 
+        "nyn": {
+            "name": "Runyankore"
+        }, 
+        "fr": {
+            "name": "Français"
+        }, 
+        "fy": {
+            "name": "West-Frysk"
+        }, 
+        "fa": {
+            "name": "فارسی"
+        }, 
+        "fi": {
+            "name": "Suomi"
+        }, 
+        "fo": {
+            "name": "Føroyskt"
+        }, 
+        "ka": {
+            "name": "ქართული"
+        }, 
+        "kg": {
+            "name": "Kongo"
+        }, 
+        "ckb": {
+            "name": "Central Kurdish"
+        }, 
+        "kk": {
+            "name": "Қазақ Тілі"
+        }, 
+        "sr": {
+            "name": "Српски"
+        }, 
+        "sq": {
+            "name": "Shqip"
+        }, 
+        "sw": {
+            "name": "Kiswahili"
+        }, 
+        "sv": {
+            "name": "Svenska"
+        }, 
+        "su": {
+            "name": "Sundanese"
+        }, 
+        "st": {
+            "name": "Southern Sotho"
+        }, 
+        "sk": {
+            "name": "Slovenčina"
+        }, 
+        "si": {
+            "name": "සිංහල"
+        }, 
+        "so": {
+            "name": "Soomaali"
+        }, 
+        "sn": {
+            "name": "Chishona"
+        }, 
+        "sl": {
+            "name": "Slovenščina"
+        }, 
+        "ky": {
+            "name": "Кыргызча"
+        }, 
+        "sd": {
+            "name": "Sindhi"
+        }
+    }, 
+    "duckduckgo": [
+        "da-DK", 
+        "vi-VN", 
+        "en-SG", 
+        "sl-SL", 
+        "en-XA", 
+        "tzh-HK", 
+        "en-UK", 
+        "ro-RO", 
+        "en-MY", 
+        "el-GR", 
+        "it-CH", 
+        "hu-HU", 
+        "fr-FR", 
+        "en-PH", 
+        "tl-PH", 
+        "fr-CA", 
+        "fi-FI", 
+        "et-EE", 
+        "sv-SE", 
+        "es-XL", 
+        "th-TH", 
+        "sk-SK", 
+        "es-ES", 
+        "en-IE", 
+        "es-US", 
+        "es-PE", 
+        "nl-NL", 
+        "en-US", 
+        "de-DE", 
+        "de-AT", 
+        "wt-WT", 
+        "no-NO", 
+        "tr-TR", 
+        "ca-ES", 
+        "it-IT", 
+        "es-CO", 
+        "ru-RU", 
+        "ca-CT", 
+        "en-ZA", 
+        "en-CA", 
+        "jp-JP", 
+        "es-MX", 
+        "id-ID", 
+        "es-AR", 
+        "he-IL", 
+        "kr-KR", 
+        "en-AU", 
+        "ms-MY", 
+        "pl-PL", 
+        "lv-LV", 
+        "bg-BG", 
+        "zh-CN", 
+        "en-NZ", 
+        "lt-LT", 
+        "tzh-TW", 
+        "hr-HR", 
+        "pt-PT", 
+        "fr-BE", 
+        "de-CH", 
+        "cs-CZ", 
+        "en-IN", 
+        "nl-BE", 
+        "fr-CH", 
+        "en-ID", 
+        "ar-XA", 
+        "pt-BR", 
+        "uk-UA", 
+        "es-CL"
+    ], 
+    "bing": [
+        "sq", 
+        "de", 
+        "ar", 
+        "bg", 
+        "ca", 
+        "cs", 
+        "zh-CHS", 
+        "zh-CHT", 
+        "ko", 
+        "hr", 
+        "da", 
+        "sk", 
+        "sl", 
+        "es", 
+        "et", 
+        "fi", 
+        "fr", 
+        "el", 
+        "he", 
+        "nl", 
+        "hu", 
+        "id", 
+        "en", 
+        "is", 
+        "it", 
+        "ja", 
+        "lv", 
+        "lt", 
+        "ms", 
+        "nb", 
+        "fa", 
+        "pl", 
+        "pt-BR", 
+        "pt-PT", 
+        "ro", 
+        "ru", 
+        "sr", 
+        "sv", 
+        "th", 
+        "tr", 
+        "uk", 
+        "vi"
+    ], 
+    "wikipedia": {
+        "sco": {
+            "articles": 41754, 
+            "name": "Scots", 
+            "english_name": "Scots"
+        }, 
+        "scn": {
+            "articles": 25373, 
+            "name": "Sicilianu", 
+            "english_name": "Sicilian"
+        }, 
+        "gu": {
+            "articles": 26690, 
+            "name": "ગુજરાતી", 
+            "english_name": "Gujarati"
+        }, 
+        "gd": {
+            "articles": 14246, 
+            "name": "Gàidhlig", 
+            "english_name": "Scottish Gaelic"
+        }, 
+        "ga": {
+            "articles": 38828, 
+            "name": "Gaeilge", 
+            "english_name": "Irish"
+        }, 
+        "gl": {
+            "articles": 134667, 
+            "name": "Galego", 
+            "english_name": "Galician"
+        }, 
+        "als": {
+            "articles": 22337, 
+            "name": "Alemannisch", 
+            "english_name": "Alemannic"
+        }, 
+        "lb": {
+            "articles": 47306, 
+            "name": "Lëtzebuergesch", 
+            "english_name": "Luxembourgish"
+        }, 
+        "la": {
+            "articles": 125689, 
+            "name": "Latina", 
+            "english_name": "Latin"
+        }, 
+        "tt": {
+            "articles": 69902, 
+            "name": "Tatarça / Татарча", 
+            "english_name": "Tatar"
+        }, 
+        "tr": {
+            "articles": 287403, 
+            "name": "Türkçe", 
+            "english_name": "Turkish"
+        }, 
+        "li": {
+            "articles": 11552, 
+            "name": "Limburgs", 
+            "english_name": "Limburgish"
+        }, 
+        "lv": {
+            "articles": 74333, 
+            "name": "Latviešu", 
+            "english_name": "Latvian"
+        }, 
+        "tl": {
+            "articles": 65681, 
+            "name": "Tagalog", 
+            "english_name": "Tagalog"
+        }, 
+        "vec": {
+            "articles": 10855, 
+            "name": "Vèneto", 
+            "english_name": "Venetian"
+        }, 
+        "th": {
+            "articles": 113214, 
+            "name": "ไทย", 
+            "english_name": "Thai"
+        }, 
+        "tg": {
+            "articles": 67389, 
+            "name": "Тоҷикӣ", 
+            "english_name": "Tajik"
+        }, 
+        "te": {
+            "articles": 66207, 
+            "name": "తెలుగు", 
+            "english_name": "Telugu"
+        }, 
+        "ta": {
+            "articles": 89565, 
+            "name": "தமிழ்", 
+            "english_name": "Tamil"
+        }, 
+        "yi": {
+            "articles": 13590, 
+            "name": "ייִדיש", 
+            "english_name": "Yiddish"
+        }, 
+        "ceb": {
+            "articles": 3525383, 
+            "name": "Sinugboanong Binisaya", 
+            "english_name": "Cebuano"
+        }, 
+        "yo": {
+            "articles": 31493, 
+            "name": "Yorùbá", 
+            "english_name": "Yoruba"
+        }, 
+        "de": {
+            "articles": 2008971, 
+            "name": "Deutsch", 
+            "english_name": "German"
+        }, 
+        "da": {
+            "articles": 221798, 
+            "name": "Dansk", 
+            "english_name": "Danish"
+        }, 
+        "qu": {
+            "articles": 19808, 
+            "name": "Runa Simi", 
+            "english_name": "Quechua"
+        }, 
+        "bar": {
+            "articles": 21966, 
+            "name": "Boarisch", 
+            "english_name": "Bavarian"
+        }, 
+        "kn": {
+            "articles": 21617, 
+            "name": "ಕನ್ನಡ", 
+            "english_name": "Kannada"
+        }, 
+        "bpy": {
+            "articles": 25067, 
+            "name": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", 
+            "english_name": "Bishnupriya Manipuri"
+        }, 
+        "el": {
+            "articles": 124348, 
+            "name": "Ελληνικά", 
+            "english_name": "Greek"
+        }, 
+        "eo": {
+            "articles": 235567, 
+            "name": "Esperanto", 
+            "english_name": "Esperanto"
+        }, 
+        "en": {
+            "articles": 5307436, 
+            "name": "English", 
+            "english_name": "English"
+        }, 
+        "zh": {
+            "articles": 915298, 
+            "name": "中文", 
+            "english_name": "Chinese"
+        }, 
+        "pms": {
+            "articles": 63988, 
+            "name": "Piemontèis", 
+            "english_name": "Piedmontese"
+        }, 
+        "arz": {
+            "articles": 16098, 
+            "name": "مصرى (Maṣri)", 
+            "english_name": "Egyptian Arabic"
+        }, 
+        "eu": {
+            "articles": 261846, 
+            "name": "Euskara", 
+            "english_name": "Basque"
+        }, 
+        "et": {
+            "articles": 151580, 
+            "name": "Eesti", 
+            "english_name": "Estonian"
+        }, 
+        "es": {
+            "articles": 1301725, 
+            "name": "Español", 
+            "english_name": "Spanish"
+        }, 
+        "ba": {
+            "articles": 36610, 
+            "name": "Башҡорт", 
+            "english_name": "Bashkir"
+        }, 
+        "ru": {
+            "articles": 1359173, 
+            "name": "Русский", 
+            "english_name": "Russian"
+        }, 
+        "new": {
+            "articles": 72175, 
+            "name": "नेपाल भाषा", 
+            "english_name": "Newar"
+        }, 
+        "ro": {
+            "articles": 373067, 
+            "name": "Română", 
+            "english_name": "Romanian"
+        }, 
+        "jv": {
+            "articles": 49675, 
+            "name": "Basa Jawa", 
+            "english_name": "Javanese"
+        }, 
+        "hsb": {
+            "articles": 10908, 
+            "name": "Hornjoserbsce", 
+            "english_name": "Upper Sorbian"
+        }, 
+        "be": {
+            "articles": 123470, 
+            "name": "Беларуская", 
+            "english_name": "Belarusian"
+        }, 
+        "bg": {
+            "articles": 223701, 
+            "name": "Български", 
+            "english_name": "Bulgarian"
+        }, 
+        "uk": {
+            "articles": 666877, 
+            "name": "Українська", 
+            "english_name": "Ukrainian"
+        }, 
+        "wa": {
+            "articles": 14312, 
+            "name": "Walon", 
+            "english_name": "Walloon"
+        }, 
+        "ast": {
+            "articles": 47712, 
+            "name": "Asturianu", 
+            "english_name": "Asturian"
+        }, 
+        "bn": {
+            "articles": 46038, 
+            "name": "বাংলা", 
+            "english_name": "Bengali"
+        }, 
+        "map-bms": {
+            "articles": 13275, 
+            "name": "Basa Banyumasan", 
+            "english_name": "Banyumasan"
+        }, 
+        "br": {
+            "articles": 60624, 
+            "name": "Brezhoneg", 
+            "english_name": "Breton"
+        }, 
+        "bs": {
+            "articles": 72057, 
+            "name": "Bosanski", 
+            "english_name": "Bosnian"
+        }, 
+        "ja": {
+            "articles": 1041538, 
+            "name": "日本語", 
+            "english_name": "Japanese"
+        }, 
+        "oc": {
+            "articles": 84521, 
+            "name": "Occitan", 
+            "english_name": "Occitan"
+        }, 
+        "be-tarask": {
+            "articles": 59872, 
+            "name": "Беларуская (тарашкевіца)", 
+            "english_name": "Belarusian (Taraškievica)"
+        }, 
+        "nds": {
+            "articles": 25732, 
+            "name": "Plattdüütsch", 
+            "english_name": "Low Saxon"
+        }, 
+        "os": {
+            "articles": 10293, 
+            "name": "Иронау", 
+            "english_name": "Ossetian"
+        }, 
+        "or": {
+            "articles": 11703, 
+            "name": "ଓଡ଼ିଆ", 
+            "english_name": "Oriya"
+        }, 
+        "simple": {
+            "articles": 121809, 
+            "name": "Simple English", 
+            "english_name": "Simple English"
+        }, 
+        "ca": {
+            "articles": 528658, 
+            "name": "Català", 
+            "english_name": "Catalan"
+        }, 
+        "lmo": {
+            "articles": 34556, 
+            "name": "Lumbaart", 
+            "english_name": "Lombard"
+        }, 
+        "ce": {
+            "articles": 158845, 
+            "name": "Нохчийн", 
+            "english_name": "Chechen"
+        }, 
+        "cy": {
+            "articles": 89271, 
+            "name": "Cymraeg", 
+            "english_name": "Welsh"
+        }, 
+        "cs": {
+            "articles": 369023, 
+            "name": "Čeština", 
+            "english_name": "Czech"
+        }, 
+        "cv": {
+            "articles": 36500, 
+            "name": "Чăваш", 
+            "english_name": "Chuvash"
+        }, 
+        "pt": {
+            "articles": 949039, 
+            "name": "Português", 
+            "english_name": "Portuguese"
+        }, 
+        "lt": {
+            "articles": 180372, 
+            "name": "Lietuvių", 
+            "english_name": "Lithuanian"
+        }, 
+        "zh-min-nan": {
+            "articles": 201851, 
+            "name": "Bân-lâm-gú", 
+            "english_name": "Min Nan"
+        }, 
+        "pa": {
+            "articles": 24065, 
+            "name": "ਪੰਜਾਬੀ", 
+            "english_name": "Punjabi"
+        }, 
+        "war": {
+            "articles": 1261969, 
+            "name": "Winaray", 
+            "english_name": "Waray-Waray"
+        }, 
+        "pl": {
+            "articles": 1197444, 
+            "name": "Polski", 
+            "english_name": "Polish"
+        }, 
+        "hy": {
+            "articles": 212704, 
+            "name": "Հայերեն", 
+            "english_name": "Armenian"
+        }, 
+        "an": {
+            "articles": 31832, 
+            "name": "Aragonés", 
+            "english_name": "Aragonese"
+        }, 
+        "hr": {
+            "articles": 171042, 
+            "name": "Hrvatski", 
+            "english_name": "Croatian"
+        }, 
+        "ht": {
+            "articles": 51108, 
+            "name": "Krèyol ayisyen", 
+            "english_name": "Haitian"
+        }, 
+        "hu": {
+            "articles": 399859, 
+            "name": "Magyar", 
+            "english_name": "Hungarian"
+        }, 
+        "bat-smg": {
+            "articles": 15940, 
+            "name": "Žemaitėška", 
+            "english_name": "Samogitian"
+        }, 
+        "hi": {
+            "articles": 114388, 
+            "name": "हिन्दी", 
+            "english_name": "Hindi"
+        }, 
+        "pnb": {
+            "articles": 42659, 
+            "name": "شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", 
+            "english_name": "Western Punjabi"
+        }, 
+        "bug": {
+            "articles": 14116, 
+            "name": "Basa Ugi", 
+            "english_name": "Buginese"
+        }, 
+        "he": {
+            "articles": 199202, 
+            "name": "עברית", 
+            "english_name": "Hebrew"
+        }, 
+        "mg": {
+            "articles": 82826, 
+            "name": "Malagasy", 
+            "english_name": "Malagasy"
+        }, 
+        "uz": {
+            "articles": 128742, 
+            "name": "O‘zbek", 
+            "english_name": "Uzbek"
+        }, 
+        "ml": {
+            "articles": 46792, 
+            "name": "മലയാളം", 
+            "english_name": "Malayalam"
+        }, 
+        "azb": {
+            "articles": 11813, 
+            "name": "تۆرکجه", 
+            "english_name": "South Azerbaijani"
+        }, 
+        "mn": {
+            "articles": 16281, 
+            "name": "Монгол", 
+            "english_name": "Mongolian"
+        }, 
+        "mk": {
+            "articles": 87527, 
+            "name": "Македонски", 
+            "english_name": "Macedonian"
+        }, 
+        "ur": {
+            "articles": 110767, 
+            "name": "اردو", 
+            "english_name": "Urdu"
+        }, 
+        "ms": {
+            "articles": 286177, 
+            "name": "Bahasa Melayu", 
+            "english_name": "Malay"
+        }, 
+        "mr": {
+            "articles": 45049, 
+            "name": "मराठी", 
+            "english_name": "Marathi"
+        }, 
+        "my": {
+            "articles": 33571, 
+            "name": "မြန်မာဘာသာ", 
+            "english_name": "Burmese"
+        }, 
+        "sah": {
+            "articles": 10965, 
+            "name": "Саха тыла (Saxa Tyla)", 
+            "english_name": "Sakha"
+        }, 
+        "af": {
+            "articles": 42949, 
+            "name": "Afrikaans", 
+            "english_name": "Afrikaans"
+        }, 
+        "vi": {
+            "articles": 1151564, 
+            "name": "Tiếng Việt", 
+            "english_name": "Vietnamese"
+        }, 
+        "is": {
+            "articles": 41500, 
+            "name": "Íslenska", 
+            "english_name": "Icelandic"
+        }, 
+        "am": {
+            "articles": 13291, 
+            "name": "አማርኛ", 
+            "english_name": "Amharic"
+        }, 
+        "it": {
+            "articles": 1317506, 
+            "name": "Italiano", 
+            "english_name": "Italian"
+        }, 
+        "vo": {
+            "articles": 120413, 
+            "name": "Volapük", 
+            "english_name": "Volapük"
+        }, 
+        "ar": {
+            "articles": 453499, 
+            "name": "العربية", 
+            "english_name": "Arabic"
+        }, 
+        "io": {
+            "articles": 26845, 
+            "name": "Ido", 
+            "english_name": "Ido"
+        }, 
+        "ia": {
+            "articles": 19784, 
+            "name": "Interlingua", 
+            "english_name": "Interlingua"
+        }, 
+        "az": {
+            "articles": 111474, 
+            "name": "Azərbaycanca", 
+            "english_name": "Azerbaijani"
+        }, 
+        "id": {
+            "articles": 390200, 
+            "name": "Bahasa Indonesia", 
+            "english_name": "Indonesian"
+        }, 
+        "nl": {
+            "articles": 1885741, 
+            "name": "Nederlands", 
+            "english_name": "Dutch"
+        }, 
+        "nn": {
+            "articles": 131696, 
+            "name": "Nynorsk", 
+            "english_name": "Norwegian (Nynorsk)"
+        }, 
+        "no": {
+            "articles": 458147, 
+            "name": "Norsk (Bokmål)", 
+            "english_name": "Norwegian (Bokmål)"
+        }, 
+        "nah": {
+            "articles": 10428, 
+            "name": "Nāhuatl", 
+            "english_name": "Nahuatl"
+        }, 
+        "ne": {
+            "articles": 29164, 
+            "name": "नेपाली", 
+            "english_name": "Nepali"
+        }, 
+        "nap": {
+            "articles": 14400, 
+            "name": "Nnapulitano", 
+            "english_name": "Neapolitan"
+        }, 
+        "fr": {
+            "articles": 1822985, 
+            "name": "Français", 
+            "english_name": "French"
+        }, 
+        "mrj": {
+            "articles": 10164, 
+            "name": "Кырык Мары (Kyryk Mary)", 
+            "english_name": "Hill Mari"
+        }, 
+        "zh-yue": {
+            "articles": 49352, 
+            "name": "粵語", 
+            "english_name": "Cantonese"
+        }, 
+        "fy": {
+            "articles": 36464, 
+            "name": "Frysk", 
+            "english_name": "West Frisian"
+        }, 
+        "fa": {
+            "articles": 516569, 
+            "name": "فارسی", 
+            "english_name": "Persian"
+        }, 
+        "fi": {
+            "articles": 405166, 
+            "name": "Suomi", 
+            "english_name": "Finnish"
+        }, 
+        "mzn": {
+            "articles": 12362, 
+            "name": "مَزِروني", 
+            "english_name": "Mazandarani"
+        }, 
+        "sa": {
+            "articles": 10198, 
+            "name": "संस्कृतम्", 
+            "english_name": "Sanskrit"
+        }, 
+        "fo": {
+            "articles": 12370, 
+            "name": "Føroyskt", 
+            "english_name": "Faroese"
+        }, 
+        "ka": {
+            "articles": 111155, 
+            "name": "ქართული", 
+            "english_name": "Georgian"
+        }, 
+        "ckb": {
+            "articles": 18217, 
+            "name": "Soranî / کوردی", 
+            "english_name": "Sorani"
+        }, 
+        "kk": {
+            "articles": 217477, 
+            "name": "Қазақша", 
+            "english_name": "Kazakh"
+        }, 
+        "sr": {
+            "articles": 342497, 
+            "name": "Српски / Srpski", 
+            "english_name": "Serbian"
+        }, 
+        "sq": {
+            "articles": 62437, 
+            "name": "Shqip", 
+            "english_name": "Albanian"
+        }, 
+        "min": {
+            "articles": 221961, 
+            "name": "Minangkabau", 
+            "english_name": "Minangkabau"
+        }, 
+        "ko": {
+            "articles": 367127, 
+            "name": "한국어", 
+            "english_name": "Korean"
+        }, 
+        "sv": {
+            "articles": 3783326, 
+            "name": "Svenska", 
+            "english_name": "Swedish"
+        }, 
+        "su": {
+            "articles": 19163, 
+            "name": "Basa Sunda", 
+            "english_name": "Sundanese"
+        }, 
+        "sk": {
+            "articles": 215360, 
+            "name": "Slovenčina", 
+            "english_name": "Slovak"
+        }, 
+        "si": {
+            "articles": 12832, 
+            "name": "සිංහල", 
+            "english_name": "Sinhalese"
+        }, 
+        "sh": {
+            "articles": 436526, 
+            "name": "Srpskohrvatski / Српскохрватски", 
+            "english_name": "Serbo-Croatian"
+        }, 
+        "ku": {
+            "articles": 22367, 
+            "name": "Kurdî / كوردی", 
+            "english_name": "Kurdish"
+        }, 
+        "sl": {
+            "articles": 153978, 
+            "name": "Slovenščina", 
+            "english_name": "Slovenian"
+        }, 
+        "ky": {
+            "articles": 59677, 
+            "name": "Кыргызча", 
+            "english_name": "Kirghiz"
+        }, 
+        "sw": {
+            "articles": 34773, 
+            "name": "Kiswahili", 
+            "english_name": "Swahili"
+        }
+    }, 
+    "bing news": [
+        "sq", 
+        "de", 
+        "ar", 
+        "bg", 
+        "ca", 
+        "cs", 
+        "zh-CHS", 
+        "zh-CHT", 
+        "ko", 
+        "hr", 
+        "da", 
+        "sk", 
+        "sl", 
+        "es", 
+        "et", 
+        "fi", 
+        "fr", 
+        "el", 
+        "he", 
+        "nl", 
+        "hu", 
+        "id", 
+        "en", 
+        "is", 
+        "it", 
+        "ja", 
+        "lv", 
+        "lt", 
+        "ms", 
+        "nb", 
+        "fa", 
+        "pl", 
+        "pt-BR", 
+        "pt-PT", 
+        "ro", 
+        "ru", 
+        "sr", 
+        "sv", 
+        "th", 
+        "tr", 
+        "uk", 
+        "vi"
+    ], 
+    "yahoo news": [
+        "ar", 
+        "bg", 
+        "zh_chs", 
+        "zh_cht", 
+        "hr", 
+        "cs", 
+        "da", 
+        "nl", 
+        "en", 
+        "et", 
+        "fi", 
+        "fr", 
+        "de", 
+        "el", 
+        "he", 
+        "hu", 
+        "it", 
+        "ja", 
+        "ko", 
+        "lv", 
+        "lt", 
+        "no", 
+        "pl", 
+        "pt", 
+        "ro", 
+        "ru", 
+        "sk", 
+        "sl", 
+        "es", 
+        "sv", 
+        "th", 
+        "tr"
+    ], 
+    "swisscows": [
+        "browser", 
+        "ar-SA", 
+        "es-AR", 
+        "en-AU", 
+        "de-AT", 
+        "fr-BE", 
+        "nl-BE", 
+        "pt-BR", 
+        "en-CA", 
+        "fr-CA", 
+        "es-CL", 
+        "zh-CN", 
+        "da-DK", 
+        "fi-FI", 
+        "fr-FR", 
+        "de-DE", 
+        "zh-HK", 
+        "en-IN", 
+        "en-IE", 
+        "it-IT", 
+        "ja-JP", 
+        "ko-KR", 
+        "en-MY", 
+        "es-MX", 
+        "nl-NL", 
+        "en-NZ", 
+        "nb-NO", 
+        "en-PH", 
+        "pl-PL", 
+        "pt-PT", 
+        "ru-RU", 
+        "en-ZA", 
+        "es-ES", 
+        "sv-SE", 
+        "de-CH", 
+        "fr-CH", 
+        "zh-TW", 
+        "tr-TR", 
+        "en-GB", 
+        "en-US", 
+        "es-US"
+    ], 
+    "wikidata": {
+        "sco": {
+            "articles": 41754, 
+            "name": "Scots", 
+            "english_name": "Scots"
+        }, 
+        "scn": {
+            "articles": 25373, 
+            "name": "Sicilianu", 
+            "english_name": "Sicilian"
+        }, 
+        "gu": {
+            "articles": 26690, 
+            "name": "ગુજરાતી", 
+            "english_name": "Gujarati"
+        }, 
+        "gd": {
+            "articles": 14246, 
+            "name": "Gàidhlig", 
+            "english_name": "Scottish Gaelic"
+        }, 
+        "ga": {
+            "articles": 38828, 
+            "name": "Gaeilge", 
+            "english_name": "Irish"
+        }, 
+        "gl": {
+            "articles": 134667, 
+            "name": "Galego", 
+            "english_name": "Galician"
+        }, 
+        "als": {
+            "articles": 22337, 
+            "name": "Alemannisch", 
+            "english_name": "Alemannic"
+        }, 
+        "lb": {
+            "articles": 47306, 
+            "name": "Lëtzebuergesch", 
+            "english_name": "Luxembourgish"
+        }, 
+        "la": {
+            "articles": 125689, 
+            "name": "Latina", 
+            "english_name": "Latin"
+        }, 
+        "tt": {
+            "articles": 69902, 
+            "name": "Tatarça / Татарча", 
+            "english_name": "Tatar"
+        }, 
+        "tr": {
+            "articles": 287403, 
+            "name": "Türkçe", 
+            "english_name": "Turkish"
+        }, 
+        "li": {
+            "articles": 11552, 
+            "name": "Limburgs", 
+            "english_name": "Limburgish"
+        }, 
+        "lv": {
+            "articles": 74333, 
+            "name": "Latviešu", 
+            "english_name": "Latvian"
+        }, 
+        "tl": {
+            "articles": 65681, 
+            "name": "Tagalog", 
+            "english_name": "Tagalog"
+        }, 
+        "vec": {
+            "articles": 10855, 
+            "name": "Vèneto", 
+            "english_name": "Venetian"
+        }, 
+        "th": {
+            "articles": 113214, 
+            "name": "ไทย", 
+            "english_name": "Thai"
+        }, 
+        "tg": {
+            "articles": 67389, 
+            "name": "Тоҷикӣ", 
+            "english_name": "Tajik"
+        }, 
+        "te": {
+            "articles": 66207, 
+            "name": "తెలుగు", 
+            "english_name": "Telugu"
+        }, 
+        "ta": {
+            "articles": 89565, 
+            "name": "தமிழ்", 
+            "english_name": "Tamil"
+        }, 
+        "yi": {
+            "articles": 13590, 
+            "name": "ייִדיש", 
+            "english_name": "Yiddish"
+        }, 
+        "ceb": {
+            "articles": 3525383, 
+            "name": "Sinugboanong Binisaya", 
+            "english_name": "Cebuano"
+        }, 
+        "yo": {
+            "articles": 31493, 
+            "name": "Yorùbá", 
+            "english_name": "Yoruba"
+        }, 
+        "de": {
+            "articles": 2008971, 
+            "name": "Deutsch", 
+            "english_name": "German"
+        }, 
+        "da": {
+            "articles": 221798, 
+            "name": "Dansk", 
+            "english_name": "Danish"
+        }, 
+        "qu": {
+            "articles": 19808, 
+            "name": "Runa Simi", 
+            "english_name": "Quechua"
+        }, 
+        "bar": {
+            "articles": 21966, 
+            "name": "Boarisch", 
+            "english_name": "Bavarian"
+        }, 
+        "kn": {
+            "articles": 21617, 
+            "name": "ಕನ್ನಡ", 
+            "english_name": "Kannada"
+        }, 
+        "bpy": {
+            "articles": 25067, 
+            "name": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", 
+            "english_name": "Bishnupriya Manipuri"
+        }, 
+        "el": {
+            "articles": 124348, 
+            "name": "Ελληνικά", 
+            "english_name": "Greek"
+        }, 
+        "eo": {
+            "articles": 235567, 
+            "name": "Esperanto", 
+            "english_name": "Esperanto"
+        }, 
+        "en": {
+            "articles": 5307436, 
+            "name": "English", 
+            "english_name": "English"
+        }, 
+        "zh": {
+            "articles": 915298, 
+            "name": "中文", 
+            "english_name": "Chinese"
+        }, 
+        "pms": {
+            "articles": 63988, 
+            "name": "Piemontèis", 
+            "english_name": "Piedmontese"
+        }, 
+        "arz": {
+            "articles": 16098, 
+            "name": "مصرى (Maṣri)", 
+            "english_name": "Egyptian Arabic"
+        }, 
+        "eu": {
+            "articles": 261846, 
+            "name": "Euskara", 
+            "english_name": "Basque"
+        }, 
+        "et": {
+            "articles": 151580, 
+            "name": "Eesti", 
+            "english_name": "Estonian"
+        }, 
+        "es": {
+            "articles": 1301725, 
+            "name": "Español", 
+            "english_name": "Spanish"
+        }, 
+        "ba": {
+            "articles": 36610, 
+            "name": "Башҡорт", 
+            "english_name": "Bashkir"
+        }, 
+        "ru": {
+            "articles": 1359173, 
+            "name": "Русский", 
+            "english_name": "Russian"
+        }, 
+        "new": {
+            "articles": 72175, 
+            "name": "नेपाल भाषा", 
+            "english_name": "Newar"
+        }, 
+        "ro": {
+            "articles": 373067, 
+            "name": "Română", 
+            "english_name": "Romanian"
+        }, 
+        "jv": {
+            "articles": 49675, 
+            "name": "Basa Jawa", 
+            "english_name": "Javanese"
+        }, 
+        "hsb": {
+            "articles": 10908, 
+            "name": "Hornjoserbsce", 
+            "english_name": "Upper Sorbian"
+        }, 
+        "be": {
+            "articles": 123470, 
+            "name": "Беларуская", 
+            "english_name": "Belarusian"
+        }, 
+        "bg": {
+            "articles": 223701, 
+            "name": "Български", 
+            "english_name": "Bulgarian"
+        }, 
+        "uk": {
+            "articles": 666877, 
+            "name": "Українська", 
+            "english_name": "Ukrainian"
+        }, 
+        "wa": {
+            "articles": 14312, 
+            "name": "Walon", 
+            "english_name": "Walloon"
+        }, 
+        "ast": {
+            "articles": 47712, 
+            "name": "Asturianu", 
+            "english_name": "Asturian"
+        }, 
+        "bn": {
+            "articles": 46038, 
+            "name": "বাংলা", 
+            "english_name": "Bengali"
+        }, 
+        "map-bms": {
+            "articles": 13275, 
+            "name": "Basa Banyumasan", 
+            "english_name": "Banyumasan"
+        }, 
+        "br": {
+            "articles": 60624, 
+            "name": "Brezhoneg", 
+            "english_name": "Breton"
+        }, 
+        "bs": {
+            "articles": 72057, 
+            "name": "Bosanski", 
+            "english_name": "Bosnian"
+        }, 
+        "ja": {
+            "articles": 1041538, 
+            "name": "日本語", 
+            "english_name": "Japanese"
+        }, 
+        "oc": {
+            "articles": 84521, 
+            "name": "Occitan", 
+            "english_name": "Occitan"
+        }, 
+        "be-tarask": {
+            "articles": 59872, 
+            "name": "Беларуская (тарашкевіца)", 
+            "english_name": "Belarusian (Taraškievica)"
+        }, 
+        "nds": {
+            "articles": 25732, 
+            "name": "Plattdüütsch", 
+            "english_name": "Low Saxon"
+        }, 
+        "os": {
+            "articles": 10293, 
+            "name": "Иронау", 
+            "english_name": "Ossetian"
+        }, 
+        "or": {
+            "articles": 11703, 
+            "name": "ଓଡ଼ିଆ", 
+            "english_name": "Oriya"
+        }, 
+        "simple": {
+            "articles": 121809, 
+            "name": "Simple English", 
+            "english_name": "Simple English"
+        }, 
+        "ca": {
+            "articles": 528658, 
+            "name": "Català", 
+            "english_name": "Catalan"
+        }, 
+        "lmo": {
+            "articles": 34556, 
+            "name": "Lumbaart", 
+            "english_name": "Lombard"
+        }, 
+        "ce": {
+            "articles": 158845, 
+            "name": "Нохчийн", 
+            "english_name": "Chechen"
+        }, 
+        "cy": {
+            "articles": 89271, 
+            "name": "Cymraeg", 
+            "english_name": "Welsh"
+        }, 
+        "cs": {
+            "articles": 369023, 
+            "name": "Čeština", 
+            "english_name": "Czech"
+        }, 
+        "cv": {
+            "articles": 36500, 
+            "name": "Чăваш", 
+            "english_name": "Chuvash"
+        }, 
+        "pt": {
+            "articles": 949039, 
+            "name": "Português", 
+            "english_name": "Portuguese"
+        }, 
+        "lt": {
+            "articles": 180372, 
+            "name": "Lietuvių", 
+            "english_name": "Lithuanian"
+        }, 
+        "zh-min-nan": {
+            "articles": 201851, 
+            "name": "Bân-lâm-gú", 
+            "english_name": "Min Nan"
+        }, 
+        "pa": {
+            "articles": 24065, 
+            "name": "ਪੰਜਾਬੀ", 
+            "english_name": "Punjabi"
+        }, 
+        "war": {
+            "articles": 1261969, 
+            "name": "Winaray", 
+            "english_name": "Waray-Waray"
+        }, 
+        "pl": {
+            "articles": 1197444, 
+            "name": "Polski", 
+            "english_name": "Polish"
+        }, 
+        "hy": {
+            "articles": 212704, 
+            "name": "Հայերեն", 
+            "english_name": "Armenian"
+        }, 
+        "an": {
+            "articles": 31832, 
+            "name": "Aragonés", 
+            "english_name": "Aragonese"
+        }, 
+        "hr": {
+            "articles": 171042, 
+            "name": "Hrvatski", 
+            "english_name": "Croatian"
+        }, 
+        "ht": {
+            "articles": 51108, 
+            "name": "Krèyol ayisyen", 
+            "english_name": "Haitian"
+        }, 
+        "hu": {
+            "articles": 399859, 
+            "name": "Magyar", 
+            "english_name": "Hungarian"
+        }, 
+        "bat-smg": {
+            "articles": 15940, 
+            "name": "Žemaitėška", 
+            "english_name": "Samogitian"
+        }, 
+        "hi": {
+            "articles": 114388, 
+            "name": "हिन्दी", 
+            "english_name": "Hindi"
+        }, 
+        "pnb": {
+            "articles": 42659, 
+            "name": "شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", 
+            "english_name": "Western Punjabi"
+        }, 
+        "bug": {
+            "articles": 14116, 
+            "name": "Basa Ugi", 
+            "english_name": "Buginese"
+        }, 
+        "he": {
+            "articles": 199202, 
+            "name": "עברית", 
+            "english_name": "Hebrew"
+        }, 
+        "mg": {
+            "articles": 82826, 
+            "name": "Malagasy", 
+            "english_name": "Malagasy"
+        }, 
+        "uz": {
+            "articles": 128742, 
+            "name": "O‘zbek", 
+            "english_name": "Uzbek"
+        }, 
+        "ml": {
+            "articles": 46792, 
+            "name": "മലയാളം", 
+            "english_name": "Malayalam"
+        }, 
+        "azb": {
+            "articles": 11813, 
+            "name": "تۆرکجه", 
+            "english_name": "South Azerbaijani"
+        }, 
+        "mn": {
+            "articles": 16281, 
+            "name": "Монгол", 
+            "english_name": "Mongolian"
+        }, 
+        "mk": {
+            "articles": 87527, 
+            "name": "Македонски", 
+            "english_name": "Macedonian"
+        }, 
+        "ur": {
+            "articles": 110767, 
+            "name": "اردو", 
+            "english_name": "Urdu"
+        }, 
+        "ms": {
+            "articles": 286177, 
+            "name": "Bahasa Melayu", 
+            "english_name": "Malay"
+        }, 
+        "mr": {
+            "articles": 45049, 
+            "name": "मराठी", 
+            "english_name": "Marathi"
+        }, 
+        "my": {
+            "articles": 33571, 
+            "name": "မြန်မာဘာသာ", 
+            "english_name": "Burmese"
+        }, 
+        "sah": {
+            "articles": 10965, 
+            "name": "Саха тыла (Saxa Tyla)", 
+            "english_name": "Sakha"
+        }, 
+        "af": {
+            "articles": 42949, 
+            "name": "Afrikaans", 
+            "english_name": "Afrikaans"
+        }, 
+        "vi": {
+            "articles": 1151564, 
+            "name": "Tiếng Việt", 
+            "english_name": "Vietnamese"
+        }, 
+        "is": {
+            "articles": 41500, 
+            "name": "Íslenska", 
+            "english_name": "Icelandic"
+        }, 
+        "am": {
+            "articles": 13291, 
+            "name": "አማርኛ", 
+            "english_name": "Amharic"
+        }, 
+        "it": {
+            "articles": 1317506, 
+            "name": "Italiano", 
+            "english_name": "Italian"
+        }, 
+        "vo": {
+            "articles": 120413, 
+            "name": "Volapük", 
+            "english_name": "Volapük"
+        }, 
+        "ar": {
+            "articles": 453499, 
+            "name": "العربية", 
+            "english_name": "Arabic"
+        }, 
+        "io": {
+            "articles": 26845, 
+            "name": "Ido", 
+            "english_name": "Ido"
+        }, 
+        "ia": {
+            "articles": 19784, 
+            "name": "Interlingua", 
+            "english_name": "Interlingua"
+        }, 
+        "az": {
+            "articles": 111474, 
+            "name": "Azərbaycanca", 
+            "english_name": "Azerbaijani"
+        }, 
+        "id": {
+            "articles": 390200, 
+            "name": "Bahasa Indonesia", 
+            "english_name": "Indonesian"
+        }, 
+        "nl": {
+            "articles": 1885741, 
+            "name": "Nederlands", 
+            "english_name": "Dutch"
+        }, 
+        "nn": {
+            "articles": 131696, 
+            "name": "Nynorsk", 
+            "english_name": "Norwegian (Nynorsk)"
+        }, 
+        "no": {
+            "articles": 458147, 
+            "name": "Norsk (Bokmål)", 
+            "english_name": "Norwegian (Bokmål)"
+        }, 
+        "nah": {
+            "articles": 10428, 
+            "name": "Nāhuatl", 
+            "english_name": "Nahuatl"
+        }, 
+        "ne": {
+            "articles": 29164, 
+            "name": "नेपाली", 
+            "english_name": "Nepali"
+        }, 
+        "nap": {
+            "articles": 14400, 
+            "name": "Nnapulitano", 
+            "english_name": "Neapolitan"
+        }, 
+        "fr": {
+            "articles": 1822985, 
+            "name": "Français", 
+            "english_name": "French"
+        }, 
+        "mrj": {
+            "articles": 10164, 
+            "name": "Кырык Мары (Kyryk Mary)", 
+            "english_name": "Hill Mari"
+        }, 
+        "zh-yue": {
+            "articles": 49352, 
+            "name": "粵語", 
+            "english_name": "Cantonese"
+        }, 
+        "fy": {
+            "articles": 36464, 
+            "name": "Frysk", 
+            "english_name": "West Frisian"
+        }, 
+        "fa": {
+            "articles": 516569, 
+            "name": "فارسی", 
+            "english_name": "Persian"
+        }, 
+        "fi": {
+            "articles": 405166, 
+            "name": "Suomi", 
+            "english_name": "Finnish"
+        }, 
+        "mzn": {
+            "articles": 12362, 
+            "name": "مَزِروني", 
+            "english_name": "Mazandarani"
+        }, 
+        "sa": {
+            "articles": 10198, 
+            "name": "संस्कृतम्", 
+            "english_name": "Sanskrit"
+        }, 
+        "fo": {
+            "articles": 12370, 
+            "name": "Føroyskt", 
+            "english_name": "Faroese"
+        }, 
+        "ka": {
+            "articles": 111155, 
+            "name": "ქართული", 
+            "english_name": "Georgian"
+        }, 
+        "ckb": {
+            "articles": 18217, 
+            "name": "Soranî / کوردی", 
+            "english_name": "Sorani"
+        }, 
+        "kk": {
+            "articles": 217477, 
+            "name": "Қазақша", 
+            "english_name": "Kazakh"
+        }, 
+        "sr": {
+            "articles": 342497, 
+            "name": "Српски / Srpski", 
+            "english_name": "Serbian"
+        }, 
+        "sq": {
+            "articles": 62437, 
+            "name": "Shqip", 
+            "english_name": "Albanian"
+        }, 
+        "min": {
+            "articles": 221961, 
+            "name": "Minangkabau", 
+            "english_name": "Minangkabau"
+        }, 
+        "ko": {
+            "articles": 367127, 
+            "name": "한국어", 
+            "english_name": "Korean"
+        }, 
+        "sv": {
+            "articles": 3783326, 
+            "name": "Svenska", 
+            "english_name": "Swedish"
+        }, 
+        "su": {
+            "articles": 19163, 
+            "name": "Basa Sunda", 
+            "english_name": "Sundanese"
+        }, 
+        "sk": {
+            "articles": 215360, 
+            "name": "Slovenčina", 
+            "english_name": "Slovak"
+        }, 
+        "si": {
+            "articles": 12832, 
+            "name": "සිංහල", 
+            "english_name": "Sinhalese"
+        }, 
+        "sh": {
+            "articles": 436526, 
+            "name": "Srpskohrvatski / Српскохрватски", 
+            "english_name": "Serbo-Croatian"
+        }, 
+        "ku": {
+            "articles": 22367, 
+            "name": "Kurdî / كوردی", 
+            "english_name": "Kurdish"
+        }, 
+        "sl": {
+            "articles": 153978, 
+            "name": "Slovenščina", 
+            "english_name": "Slovenian"
+        }, 
+        "ky": {
+            "articles": 59677, 
+            "name": "Кыргызча", 
+            "english_name": "Kirghiz"
+        }, 
+        "sw": {
+            "articles": 34773, 
+            "name": "Kiswahili", 
+            "english_name": "Swahili"
+        }
+    }, 
+    "ddg definitions": [
+        "da-DK", 
+        "vi-VN", 
+        "en-SG", 
+        "sl-SL", 
+        "en-XA", 
+        "tzh-HK", 
+        "en-UK", 
+        "ro-RO", 
+        "en-MY", 
+        "el-GR", 
+        "it-CH", 
+        "hu-HU", 
+        "fr-FR", 
+        "en-PH", 
+        "tl-PH", 
+        "fr-CA", 
+        "fi-FI", 
+        "et-EE", 
+        "sv-SE", 
+        "es-XL", 
+        "th-TH", 
+        "sk-SK", 
+        "es-ES", 
+        "en-IE", 
+        "es-US", 
+        "es-PE", 
+        "nl-NL", 
+        "en-US", 
+        "de-DE", 
+        "de-AT", 
+        "wt-WT", 
+        "no-NO", 
+        "tr-TR", 
+        "ca-ES", 
+        "it-IT", 
+        "es-CO", 
+        "ru-RU", 
+        "ca-CT", 
+        "en-ZA", 
+        "en-CA", 
+        "jp-JP", 
+        "es-MX", 
+        "id-ID", 
+        "es-AR", 
+        "he-IL", 
+        "kr-KR", 
+        "en-AU", 
+        "ms-MY", 
+        "pl-PL", 
+        "lv-LV", 
+        "bg-BG", 
+        "zh-CN", 
+        "en-NZ", 
+        "lt-LT", 
+        "tzh-TW", 
+        "hr-HR", 
+        "pt-PT", 
+        "fr-BE", 
+        "de-CH", 
+        "cs-CZ", 
+        "en-IN", 
+        "nl-BE", 
+        "fr-CH", 
+        "en-ID", 
+        "ar-XA", 
+        "pt-BR", 
+        "uk-UA", 
+        "es-CL"
+    ], 
+    "bing images": [
+        "sq", 
+        "de", 
+        "ar", 
+        "bg", 
+        "ca", 
+        "cs", 
+        "zh-CHS", 
+        "zh-CHT", 
+        "ko", 
+        "hr", 
+        "da", 
+        "sk", 
+        "sl", 
+        "es", 
+        "et", 
+        "fi", 
+        "fr", 
+        "el", 
+        "he", 
+        "nl", 
+        "hu", 
+        "id", 
+        "en", 
+        "is", 
+        "it", 
+        "ja", 
+        "lv", 
+        "lt", 
+        "ms", 
+        "nb", 
+        "fa", 
+        "pl", 
+        "pt-BR", 
+        "pt-PT", 
+        "ro", 
+        "ru", 
+        "sr", 
+        "sv", 
+        "th", 
+        "tr", 
+        "uk", 
+        "vi"
+    ], 
+    "yahoo": [
+        "ar", 
+        "bg", 
+        "zh_chs", 
+        "zh_cht", 
+        "hr", 
+        "cs", 
+        "da", 
+        "nl", 
+        "en", 
+        "et", 
+        "fi", 
+        "fr", 
+        "de", 
+        "el", 
+        "he", 
+        "hu", 
+        "it", 
+        "ja", 
+        "ko", 
+        "lv", 
+        "lt", 
+        "no", 
+        "pl", 
+        "pt", 
+        "ro", 
+        "ru", 
+        "sk", 
+        "sl", 
+        "es", 
+        "sv", 
+        "th", 
+        "tr"
+    ], 
+    "gigablast": [
+        "en", 
+        "fr", 
+        "es", 
+        "ru", 
+        "tr", 
+        "ja", 
+        "h_", 
+        "tw", 
+        "cn", 
+        "ko", 
+        "de", 
+        "nl", 
+        "it", 
+        "fi", 
+        "sv", 
+        "no", 
+        "pt", 
+        "vi", 
+        "ar", 
+        "he", 
+        "id", 
+        "el", 
+        "th", 
+        "hi", 
+        "bn", 
+        "pl", 
+        "tl", 
+        "la", 
+        "eo", 
+        "ca", 
+        "bg", 
+        "tx", 
+        "sr", 
+        "hu", 
+        "da", 
+        "lt", 
+        "cs", 
+        "gl", 
+        "ka", 
+        "gd", 
+        "go", 
+        "ro", 
+        "ga", 
+        "lv", 
+        "hy", 
+        "is", 
+        "ag", 
+        "gv", 
+        "io", 
+        "fa", 
+        "te", 
+        "vv", 
+        "mg", 
+        "ku", 
+        "lb", 
+        "et"
+    ]
+}

+ 6 - 0
searx/engines/__init__.py

@@ -20,6 +20,7 @@ from os.path import realpath, dirname
 import sys
 from flask_babel import gettext
 from operator import itemgetter
+from json import loads
 from searx import settings
 from searx import logger
 from searx.utils import load_module
@@ -78,6 +79,9 @@ def load_engine(engine_data):
         if not hasattr(engine, arg_name):
             setattr(engine, arg_name, arg_value)
 
+    if engine_data['name'] in languages:
+        setattr(engine, 'supported_languages', languages[engine_data['name']])
+
     # checking required variables
     for engine_attr in dir(engine):
         if engine_attr.startswith('_'):
@@ -207,6 +211,8 @@ if 'engines' not in settings or not settings['engines']:
     logger.error('No engines found. Edit your settings.yml')
     exit(2)
 
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
+
 for engine_data in settings['engines']:
     engine = load_engine(engine_data)
     if engine is not None:

+ 15 - 0
searx/engines/bing.py

@@ -15,12 +15,14 @@
 
 from urllib import urlencode
 from lxml import html
+from requests import get
 from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['general']
 paging = True
 language_support = True
+supported_languages_url = 'https://www.bing.com/account/general'
 
 # search-url
 base_url = 'https://www.bing.com/'
@@ -81,3 +83,16 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def fetch_supported_languages():
+    supported_languages = []
+    response = get(supported_languages_url)
+    dom = html.fromstring(response.text)
+    options = dom.xpath('//div[@id="limit-languages"]//input')
+    for option in options:
+        code = option.xpath('./@id')[0].replace('_', '-')
+        supported_languages.append(code)
+
+    return supported_languages

+ 1 - 1
searx/engines/bing_images.py

@@ -19,7 +19,7 @@ from urllib import urlencode
 from lxml import html
 from json import loads
 import re
-from searx.engines.bing import supported_languages
+from searx.engines.bing import fetch_supported_languages
 
 # engine dependent config
 categories = ['images']

+ 1 - 1
searx/engines/bing_news.py

@@ -17,7 +17,7 @@ from datetime import datetime
 from dateutil import parser
 from lxml import etree
 from searx.utils import list_get
-from searx.engines.bing import supported_languages
+from searx.engines.bing import fetch_supported_languages
 
 # engine dependent config
 categories = ['news']

+ 23 - 18
searx/engines/dailymotion.py

@@ -15,29 +15,12 @@
 from urllib import urlencode
 from json import loads
 from datetime import datetime
+from requests import get
 
 # engine dependent config
 categories = ['videos']
 paging = True
 language_support = True
-supported_languages = ["af", "ak", "am", "ar", "an", "as", "av", "ae", "ay", "az",
-                       "ba", "bm", "be", "bn", "bi", "bo", "bs", "br", "bg", "ca",
-                       "cs", "ch", "ce", "cu", "cv", "kw", "co", "cr", "cy", "da",
-                       "de", "dv", "dz", "el", "en", "eo", "et", "eu", "ee", "fo",
-                       "fa", "fj", "fi", "fr", "fy", "ff", "gd", "ga", "gl", "gv",
-                       "gn", "gu", "ht", "ha", "sh", "he", "hz", "hi", "ho", "hr",
-                       "hu", "hy", "ig", "io", "ii", "iu", "ie", "ia", "id", "ik",
-                       "is", "it", "jv", "ja", "kl", "kn", "ks", "ka", "kr", "kk",
-                       "km", "ki", "rw", "ky", "kv", "kg", "ko", "kj", "ku", "lo",
-                       "la", "lv", "li", "ln", "lt", "lb", "lu", "lg", "mh", "ml",
-                       "mr", "mk", "mg", "mt", "mn", "mi", "ms", "my", "na", "nv",
-                       "nr", "nd", "ng", "ne", "nl", "nn", "nb", "no", "ny", "oc",
-                       "oj", "or", "om", "os", "pa", "pi", "pl", "pt", "ps", "qu",
-                       "rm", "ro", "rn", "ru", "sg", "sa", "si", "sk", "sl", "se",
-                       "sm", "sn", "sd", "so", "st", "es", "sq", "sc", "sr", "ss",
-                       "su", "sw", "sv", "ty", "ta", "tt", "te", "tg", "tl", "th",
-                       "ti", "to", "tn", "ts", "tk", "tr", "tw", "ug", "uk", "ur",
-                       "uz", "ve", "vi", "vo", "wa", "wo", "xh", "yi", "yo", "za", "zh", "zu"]
 
 # search-url
 # see http://www.dailymotion.com/doc/api/obj-video.html
@@ -45,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr
 embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
     'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
 
+supported_languages_url = 'https://api.dailymotion.com/languages'
+
 
 # do search-request
 def request(query, params):
@@ -92,3 +77,23 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def fetch_supported_languages():
+    supported_languages = {}
+
+    response = get(supported_languages_url)
+    response_json = loads(response.text)
+
+    for language in response_json['list']:
+        supported_languages[language['code']] = {}
+
+        name = language['native_name']
+        if name:
+            supported_languages[language['code']]['name'] = name
+        english_name = language['name']
+        if english_name:
+            supported_languages[language['code']]['english_name'] = english_name
+
+    return supported_languages

+ 18 - 9
searx/engines/duckduckgo.py

@@ -15,19 +15,15 @@
 
 from urllib import urlencode
 from lxml.html import fromstring
+from requests import get
+from json import loads
 from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['general']
 paging = True
 language_support = True
-supported_languages = ["es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA", "ca-CT",
-                       "es-CL", "zh-CN", "es-CO", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE",
-                       "el-GR", "tzh-HK", "hu-HU", "en-IN", "id-ID", "en-ID", "en-IE", "he-IL", "it-IT", "jp-JP",
-                       "kr-KR", "es-XL", "lv-LV", "lt-LT", "ms-MY", "en-MY", "es-MX", "nl-NL", "en-NZ", "no-NO",
-                       "es-PE", "en-PH", "tl-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU", "ar-XA", "en-XA", "en-SG",
-                       "sk-SK", "sl-SL", "en-ZA", "es-ES", "ca-ES", "sv-SE", "de-CH", "fr-CH", "it-CH", "tzh-TW",
-                       "th-TH", "tr-TR", "uk-UA", "en-UK", "en-US", "es-US", "vi-VN"]
+supported_languages_url = 'https://duckduckgo.com/d2030.js'
 time_range_support = True
 
 # search-url
@@ -65,8 +61,6 @@ def request(query, params):
         locale = 'xa' + params['language'].split('-')[0]
     elif params['language'][-2:] == 'GB':
         locale = 'uk' + params['language'].split('-')[0]
-    elif params['language'] == 'es-419':
-        locale = 'xl-es'
     else:
         locale = params['language'].split('-')
         if len(locale) == 2:
@@ -120,3 +114,18 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def fetch_supported_languages():
+    response = get(supported_languages_url)
+
+    # response is a js file with regions as an embedded object
+    response_page = response.text
+    response_page = response_page[response_page.find('regions:{') + 8:]
+    response_page = response_page[:response_page.find('}') + 1]
+
+    regions_json = loads(response_page)
+    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+
+    return supported_languages

+ 1 - 1
searx/engines/duckduckgo_definitions.py

@@ -4,7 +4,7 @@ from re import compile, sub
 from lxml import html
 from searx.utils import html_to_text
 from searx.engines.xpath import extract_text
-from searx.engines.duckduckgo import supported_languages
+from searx.engines.duckduckgo import fetch_supported_languages
 
 url = 'https://api.duckduckgo.com/'\
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

+ 17 - 5
searx/engines/gigablast.py

@@ -14,6 +14,8 @@ from json import loads
 from random import randint
 from time import time
 from urllib import urlencode
+from requests import get
+from lxml.html import fromstring
 
 # engine dependent config
 categories = ['general']
@@ -40,11 +42,7 @@ url_xpath = './/url'
 title_xpath = './/title'
 content_xpath = './/sum'
 
-supported_languages = ["en", "fr", "es", "ru", "tr", "ja", "zh-CN", "zh-TW", "ko", "de",
-                       "nl", "it", "fi", "sv", "no", "pt", "vi", "ar", "he", "id", "el",
-                       "th", "hi", "bn", "pl", "tl", "la", "eo", "ca", "bg", "tx", "sr",
-                       "hu", "da", "lt", "cs", "gl", "ka", "gd", "go", "ro", "ga", "lv",
-                       "hy", "is", "ag", "gv", "io", "fa", "te", "vv", "mg", "ku", "lb", "et"]
+supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
 
 
 # do search-request
@@ -90,3 +88,17 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def fetch_supported_languages():
+    supported_languages = []
+    response = get(supported_languages_url)
+    dom = fromstring(response.text)
+    links = dom.xpath('//span[@id="menu2"]/a')
+    for link in links:
+        code = link.xpath('./@href')[0][-2:]
+        if code != 'xx' and code not in supported_languages:
+            supported_languages.append(code)
+
+    return supported_languages

+ 16 - 14
searx/engines/google.py

@@ -12,6 +12,7 @@ import re
 from urllib import urlencode
 from urlparse import urlparse, parse_qsl
 from lxml import html, etree
+from requests import get
 from searx.engines.xpath import extract_text, extract_url
 from searx.search import logger
 
@@ -23,20 +24,6 @@ categories = ['general']
 paging = True
 language_support = True
 use_locale_domain = True
-supported_languages = ["ach", "af", "ak", "az", "ms", "ban", "xx-bork", "bs", "br", "ca",
-                       "ceb", "ckb", "cs", "sn", "co", "cy", "da", "de", "yo", "et",
-                       "xx-elmer", "en", "es", "es-419", "eo", "eu", "ee", "tl", "fo", "fr",
-                       "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "hr", "haw",
-                       "bem", "ig", "rn", "id", "ia", "zu", "is", "it", "jw", "rw", "sw",
-                       "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz",
-                       "lua", "lg", "hu", "mg", "mt", "mi", "nl", "pcm", "no", "nso",
-                       "ny", "nn", "uz", "oc", "om", "xx-pirate", "pl", "pt-BR", "pt-PT",
-                       "ro", "rm", "qu", "nyn", "crs", "sq", "sd", "sk", "sl", "so", "st",
-                       "sr-ME", "sr-Latn", "su", "fi", "sv", "tg", "tt", "vi", "tn", "tum",
-                       "tr", "tk", "tw", "fy", "wo", "xh", "el", "be", "bg", "ky", "kk", "mk",
-                       "mn", "ru", "sr", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ar", "ps",
-                       "fa", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te",
-                       "kn", "ml", "si", "th", "lo", "my", "km", "chr", "ko", "zh-CN", "zh-TW", "ja"]
 time_range_support = True
 
 # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
@@ -117,6 +104,7 @@ map_hostname_start = 'maps.google.'
 maps_path = '/maps'
 redirect_path = '/url'
 images_path = '/images'
+supported_languages_url = 'https://www.google.com/preferences?#languages'
 
 # specific xpath variables
 results_xpath = '//div[@class="g"]'
@@ -373,3 +361,17 @@ def attributes_to_html(attributes):
         retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
     retval = retval + '</table>'
     return retval
+
+
+# get supported languages from their site
+def fetch_supported_languages():
+    supported_languages = {}
+    response = get(supported_languages_url)
+    dom = html.fromstring(response.text)
+    options = dom.xpath('//select[@name="hl"]/option')
+    for option in options:
+        code = option.xpath('./@value')[0].split('-')[0]
+        name = option.text[:-1].title()
+        supported_languages[code] = {"name": name}
+
+    return supported_languages

+ 1 - 1
searx/engines/google_news.py

@@ -13,7 +13,7 @@
 from lxml import html
 from urllib import urlencode
 from json import loads
-from searx.engines.google import supported_languages
+from searx.engines.google import fetch_supported_languages
 
 # search-url
 categories = ['news']

+ 0 - 1
searx/engines/mediawiki.py

@@ -15,7 +15,6 @@
 from json import loads
 from string import Formatter
 from urllib import urlencode, quote
-from searx.engines.wikipedia import supported_languages
 
 # engine dependent config
 categories = ['general']

+ 1 - 14
searx/engines/qwant.py

@@ -20,11 +20,6 @@ from searx.utils import html_to_text
 categories = None
 paging = True
 language_support = True
-supported_languages = ["fr-FR", "de-DE", "en-GB", "it-IT", "es-ES", "pt-PT", "de-CH", "fr-CH", "it-CH", "de-AT",
-                       "fr-BE", "nl-BE", "nl-NL", "da-DK", "fi-FI", "sv-SE", "en-IE", "no-NO", "pl-PL", "ru-RU",
-                       "el-GR", "bg-BG", "cs-CZ", "et-EE", "hu-HU", "ro-RO", "en-US", "en-CA", "fr-CA", "pt-BR",
-                       "es-AR", "es-CL", "es-MX", "ja-JP", "en-SG", "en-IN", "en-MY", "ms-MY", "ko-KR", "tl-PH",
-                       "th-TH", "he-IL", "tr-TR", "en-AU", "en-NZ"]
 
 category_to_keyword = {'general': 'web',
                        'images': 'images',
@@ -51,15 +46,7 @@ def request(query, params):
 
     # add language tag if specified
     if params['language'] != 'all':
-        locale = params['language'].split('-')
-        if len(locale) == 2 and params['language'] in supported_languages:
-            params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
-        else:
-            # try to get a country code for language
-            for lang in supported_languages:
-                if locale[0] == lang.split('-')[0]:
-                    params['url'] += '&locale=' + lang.replace('-', '_').lower()
-                    break
+        params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
 
     return params
 

+ 0 - 5
searx/engines/startpage.py

@@ -24,11 +24,6 @@ categories = ['general']
 
 # paging = False
 language_support = True
-supported_languages = ["af", "de", "ar", "hy", "be", "bg", "ca", "cs", "zh-CN", "zh-TW",
-                       "ko", "hr", "da", "sk", "sl", "es", "eo", "et", "fi", "fr",
-                       "el", "iw", "hi", "nl", "hu", "id", "en", "is", "it", "ja",
-                       "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sr", "sw",
-                       "sv", "tl", "th", "tr", "uk", "vi"]
 
 # search-url
 base_url = 'https://startpage.com/'

+ 3 - 2
searx/engines/subtitleseeker.py

@@ -22,7 +22,7 @@ language = ""
 
 # search-url
 url = 'http://www.subtitleseeker.com/'
-search_url = url + 'search/TITLES/{query}&p={pageno}'
+search_url = url + 'search/TITLES/{query}?p={pageno}'
 
 # specific xpath variables
 results_xpath = '//div[@class="boxRows"]'
@@ -51,7 +51,8 @@ def response(resp):
     elif resp.search_params['language'] != 'all':
         search_lang = [lc[3]
                        for lc in language_codes
-                       if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
+                       if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
+        search_lang = search_lang[0].split(' (')[0]
 
     # parse results
     for result in dom.xpath(results_xpath):

+ 15 - 6
searx/engines/swisscows.py

@@ -13,17 +13,13 @@
 from json import loads
 from urllib import urlencode, unquote
 import re
+from requests import get
+from lxml.html import fromstring
 
 # engine dependent config
 categories = ['general', 'images']
 paging = True
 language_support = True
-supported_languages = ["ar-SA", "es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA",
-                       "es-CL", "zh-CN", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE", "el-GR",
-                       "zh-HK", "hu-HU", "en-IN", "en-IE", "he-IL", "it-IT", "ja-JP", "ko-KR", "lv-LV", "lt-LT",
-                       "en-MY", "es-MX", "nl-NL", "en-NZ", "nb-NO", "en-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU",
-                       "en-SG", "sk-SK", "sl-SI", "en-ZA", "es-ES", "sv-SE", "de-CH", "fr-CH", "zh-TW", "th-TH",
-                       "tr-TR", "uk-UA", "en-GB", "en-US", "es-US"]
 
 # search-url
 base_url = 'https://swisscows.ch/'
@@ -114,3 +110,16 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def fetch_supported_languages():
+    supported_languages = []
+    response = get(base_url)
+    dom = fromstring(response.text)
+    options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
+    for option in options:
+        code = option.xpath('./@data-val')[0]
+        supported_languages.append(code)
+
+    return supported_languages

+ 3 - 3
searx/engines/wikidata.py

@@ -15,7 +15,7 @@ from searx import logger
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text
 from searx.utils import format_date_by_locale
-from searx.engines.wikipedia import supported_languages
+from searx.engines.wikipedia import fetch_supported_languages
 
 from json import loads
 from lxml.html import fromstring
@@ -57,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
 
 
 def request(query, params):
-    language = params['language'].split('_')[0]
+    language = params['language'].split('-')[0]
     if language == 'all':
         language = 'en'
 
@@ -72,7 +72,7 @@ def response(resp):
     html = fromstring(resp.content)
     wikidata_ids = html.xpath(wikidata_ids_xpath)
 
-    language = resp.search_params['language'].split('_')[0]
+    language = resp.search_params['language'].split('-')[0]
     if language == 'all':
         language = 'en'
 

+ 24 - 29
searx/engines/wikipedia.py

@@ -12,36 +12,9 @@
 
 from json import loads
 from urllib import urlencode, quote
+from requests import get
+from lxml.html import fromstring
 
-supported_languages = ["en", "sv", "ceb", "de", "nl", "fr", "ru", "it", "es", "war",
-                       "pl", "vi", "ja", "pt", "zh", "uk", "ca", "fa", "no", "sh",
-                       "ar", "fi", "hu", "id", "ro", "cs", "ko", "sr", "ms", "tr",
-                       "eu", "eo", "min", "bg", "da", "kk", "sk", "hy", "he", "zh-min-nan",
-                       "lt", "hr", "sl", "et", "ce", "gl", "nn", "uz", "la", "vo",
-                       "el", "simple", "be", "az", "th", "ur", "ka", "hi", "oc", "ta",
-                       "mk", "mg", "new", "lv", "cy", "bs", "tt", "tl", "te", "pms",
-                       "be-tarask", "br", "sq", "ky", "ht", "jv", "tg", "ast", "zh-yue", "lb",
-                       "mr", "ml", "bn", "pnb", "is", "af", "sco", "ga", "ba", "fy",
-                       "cv", "lmo", "sw", "my", "an", "yo", "ne", "io", "gu", "nds",
-                       "scn", "bpy", "pa", "ku", "als", "kn", "bar", "ia", "qu", "su",
-                       "ckb", "bat-smg", "mn", "arz", "nap", "wa", "bug", "gd", "yi", "map-bms",
-                       "am", "mzn", "fo", "si", "nah", "li", "sah", "vec", "hsb", "or",
-                       "os", "mrj", "sa", "hif", "mhr", "roa-tara", "azb", "pam", "ilo",
-                       "sd", "ps", "se", "mi", "bh", "eml", "bcl", "xmf", "diq", "hak",
-                       "gan", "glk", "vls", "nds-nl", "rue", "bo", "fiu-vro", "co", "sc",
-                       "tk", "csb", "lrc", "vep", "wuu", "km", "szl", "gv", "crh", "kv",
-                       "zh-classical", "frr", "zea", "as", "so", "kw", "nso", "ay", "stq",
-                       "udm", "cdo", "nrm", "ie", "koi", "rm", "pcd", "myv", "mt", "fur",
-                       "ace", "lad", "gn", "lij", "dsb", "dv", "cbk-zam", "ext", "gom",
-                       "kab", "ksh", "ang", "mai", "mwl", "lez", "gag", "ln", "ug", "pi",
-                       "pag", "frp", "sn", "nv", "av", "pfl", "haw", "xal", "krc", "kaa",
-                       "rw", "bxr", "pdc", "to", "kl", "nov", "arc", "kbd", "lo", "bjn",
-                       "pap", "ha", "tet", "ki", "tyv", "tpi", "na", "lbe", "ig", "jbo",
-                       "roa-rup", "ty", "jam", "za", "kg", "mdf", "lg", "wo", "srn", "ab",
-                       "ltg", "zu", "sm", "chr", "om", "tn", "chy", "rmy", "cu", "tw", "tum",
-                       "xh", "bi", "rn", "pih", "got", "ss", "pnt", "bm", "ch", "mo", "ts",
-                       "ady", "iu", "st", "ee", "ny", "fj", "ks", "ak", "ik", "sg", "ve",
-                       "dz", "ff", "ti", "cr", "ng", "cho", "kj", "mh", "ho", "ii", "aa", "mus", "hz", "kr"]
 
 # search-url
 base_url = 'https://{language}.wikipedia.org/'
@@ -54,6 +27,7 @@ search_postfix = 'w/api.php?'\
     '&explaintext'\
     '&pithumbsize=300'\
     '&redirects'
+supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
 
 
 # set language in base_url
@@ -142,3 +116,24 @@ def response(resp):
                     'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
 
     return results
+
+
+# get supported languages from their site
+def fetch_supported_languages():
+    supported_languages = {}
+    response = get(supported_languages_url)
+    dom = fromstring(response.text)
+    tables = dom.xpath('//table[contains(@class,"sortable")]')
+    for table in tables:
+        # exclude header row
+        trs = table.xpath('.//tr')[1:]
+        for tr in trs:
+            td = tr.xpath('./td')
+            code = td[3].xpath('./a')[0].text
+            name = td[2].xpath('./a')[0].text
+            english_name = td[1].xpath('./a')[0].text
+            articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
+            if articles >= 10000:
+                supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
+
+    return supported_languages

+ 16 - 4
searx/engines/yahoo.py

@@ -14,16 +14,13 @@
 from urllib import urlencode
 from urlparse import unquote
 from lxml import html
+from requests import get
 from searx.engines.xpath import extract_text, extract_url
 
 # engine dependent config
 categories = ['general']
 paging = True
 language_support = True
-supported_languages = ["ar", "bg", "ca", "szh", "tzh", "hr", "cs", "da", "nl", "en",
-                       "et", "fi", "fr", "de", "el", "he", "hu", "is", "id", "it", "ja",
-                       "ko", "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sk", "sr",
-                       "sl", "es", "sv", "th", "tr"]
 time_range_support = True
 
 # search-url
@@ -31,6 +28,8 @@ base_url = 'https://search.yahoo.com/'
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
 search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
 
+supported_languages_url = 'https://search.yahoo.com/web/advanced'
+
 # specific xpath variables
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
 url_xpath = './/h3/a/@href'
@@ -142,3 +141,16 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def fetch_supported_languages():
+    supported_languages = []
+    response = get(supported_languages_url)
+    dom = html.fromstring(response.text)
+    options = dom.xpath('//div[@id="yschlang"]/span/label/input')
+    for option in options:
+        code = option.xpath('./@value')[0][5:]
+        supported_languages.append(code)
+
+    return supported_languages

+ 1 - 1
searx/engines/yahoo_news.py

@@ -12,7 +12,7 @@
 from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
-from searx.engines.yahoo import parse_url, supported_languages
+from searx.engines.yahoo import parse_url, fetch_supported_languages
 from datetime import datetime, timedelta
 import re
 from dateutil import parser

+ 47 - 76
searx/languages.py

@@ -4,39 +4,29 @@
 
 language_codes = (
     (u"ach", u"Acoli", u"", u""),
-    (u"af", u"Afrikaans", u"", u"Afrikaans"),
+    (u"af", u"Afrikaans", u"", u""),
     (u"ak", u"Akan", u"", u""),
-    (u"als", u"Alemannisch", u"", u"Alemannic"),
-    (u"am", u"አማርኛ", u"", u"Amharic"),
-    (u"an", u"Aragonés", u"", u"Aragonese"),
+    (u"am", u"አማርኛ", u"", u""),
     (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
-    (u"arz", u"مصرى (Maṣri)", u"", u"Egyptian Arabic"),
-    (u"ast", u"Asturianu", u"", u"Asturian"),
     (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
-    (u"azb", u"تۆرکجه", u"", u"South Azerbaijani"),
-    (u"ba", u"Башҡорт", u"", u"Bashkir"),
     (u"ban", u"Balinese", u"", u""),
-    (u"bar", u"Boarisch", u"", u"Bavarian"),
     (u"be", u"Беларуская", u"", u"Belarusian"),
     (u"bem", u"Ichibemba", u"", u""),
     (u"bg-BG", u"Български", u"България", u"Bulgarian"),
-    (u"bn", u"বাংলা", u"", u"Bengali"),
-    (u"bpy", u"ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", u"", u"Bishnupriya Manipuri"),
-    (u"br", u"Brezhoneg", u"", u"Breton"),
-    (u"bs", u"Bosanski", u"", u"Bosnian"),
-    (u"bug", u"Basa Ugi", u"", u"Buginese"),
+    (u"bn", u"বাংলা", u"", u""),
+    (u"br", u"Brezhoneg", u"", u""),
+    (u"bs", u"Bosanski", u"", u""),
     (u"ca", u"Català", u"", u"Catalan"),
     (u"ca-CT", u"Català", u"", u"Catalan"),
     (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
     (u"ce", u"Нохчийн", u"", u"Chechen"),
     (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
     (u"chr", u"ᏣᎳᎩ", u"", u""),
-    (u"ckb", u"Soranî / کوردی", u"", u"Sorani"),
+    (u"ckb", u"Central Kurdish", u"", u""),
     (u"co", u"Corsican", u"", u""),
     (u"crs", u"Seychellois Creole", u"", u""),
     (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
-    (u"cv", u"Чăваш", u"", u"Chuvash"),
-    (u"cy", u"Cymraeg", u"", u"Welsh"),
+    (u"cy", u"Cymraeg", u"", u""),
     (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
     (u"de", u"Deutsch", u"", u"German"),
     (u"de-AT", u"Deutsch", u"Österreich", u"German"),
@@ -70,148 +60,129 @@ language_codes = (
     (u"eu", u"Euskara", u"", u"Basque"),
     (u"fa", u"فارسی", u"", u"Persian"),
     (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
-    (u"fo", u"Føroyskt", u"", u"Faroese"),
+    (u"fo", u"Føroyskt", u"", u""),
     (u"fr", u"Français", u"", u"French"),
     (u"fr-BE", u"Français", u"Belgique", u"French"),
     (u"fr-CA", u"Français", u"Canada", u"French"),
     (u"fr-CH", u"Français", u"Suisse", u"French"),
     (u"fr-FR", u"Français", u"France", u"French"),
-    (u"fy", u"Frysk", u"", u"West Frisian"),
-    (u"ga", u"Gaeilge", u"", u"Irish"),
+    (u"fy", u"West-Frysk", u"", u""),
+    (u"ga", u"Gaeilge", u"", u""),
     (u"gaa", u"Ga", u"", u""),
-    (u"gd", u"Gàidhlig", u"", u"Scottish Gaelic"),
+    (u"gd", u"Gàidhlig", u"", u""),
     (u"gl", u"Galego", u"", u"Galician"),
     (u"gn", u"Guarani", u"", u""),
-    (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
+    (u"gu", u"ગુજરાતી", u"", u""),
     (u"ha", u"Hausa", u"", u""),
     (u"haw", u"ʻŌlelo HawaiʻI", u"", u""),
     (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
     (u"hi", u"हिन्दी", u"", u"Hindi"),
     (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
-    (u"hsb", u"Hornjoserbsce", u"", u"Upper Sorbian"),
-    (u"ht", u"Krèyol ayisyen", u"", u"Haitian"),
+    (u"ht", u"Haitian Creole", u"", u""),
     (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
     (u"hy", u"Հայերեն", u"", u"Armenian"),
-    (u"ia", u"Interlingua", u"", u"Interlingua"),
+    (u"ia", u"Interlingua", u"", u""),
     (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
     (u"ig", u"Igbo", u"", u""),
-    (u"io", u"Ido", u"", u"Ido"),
-    (u"is", u"Íslenska", u"", u"Icelandic"),
+    (u"is", u"Íslenska", u"", u""),
     (u"it", u"Italiano", u"", u"Italian"),
     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
     (u"it-IT", u"Italiano", u"Italia", u"Italian"),
     (u"iw", u"עברית", u"", u""),
     (u"ja-JP", u"日本語", u"日本", u"Japanese"),
-    (u"jv", u"Basa Jawa", u"", u"Javanese"),
     (u"ka", u"ქართული", u"", u"Georgian"),
     (u"kg", u"Kongo", u"", u""),
     (u"kk", u"Қазақша", u"", u"Kazakh"),
     (u"km", u"ខ្មែរ", u"", u""),
-    (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
+    (u"kn", u"ಕನ್ನಡ", u"", u""),
     (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
-    (u"kri", u"Krio (Sierra Leone)", u"", u""),
-    (u"ku", u"Kurdî / كوردی", u"", u"Kurdish"),
-    (u"ky", u"Кыргызча", u"", u"Kirghiz"),
+    (u"kri", u"Krio", u"", u""),
+    (u"ky", u"Кыргызча", u"", u""),
     (u"la", u"Latina", u"", u"Latin"),
-    (u"lb", u"Lëtzebuergesch", u"", u"Luxembourgish"),
     (u"lg", u"Luganda", u"", u""),
-    (u"li", u"Limburgs", u"", u"Limburgish"),
-    (u"lmo", u"Lumbaart", u"", u"Lombard"),
     (u"ln", u"Lingála", u"", u""),
     (u"lo", u"ລາວ", u"", u""),
     (u"loz", u"Lozi", u"", u""),
     (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
     (u"lua", u"Luba-Lulua", u"", u""),
-    (u"lv-LV", u"Latviešu", u"Latvijas Republika", u"Latvian"),
+    (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
     (u"mfe", u"Kreol Morisien", u"", u""),
-    (u"mg", u"Malagasy", u"", u"Malagasy"),
+    (u"mg", u"Malagasy", u"", u""),
     (u"mi", u"Maori", u"", u""),
     (u"min", u"Minangkabau", u"", u"Minangkabau"),
-    (u"mk", u"Македонски", u"", u"Macedonian"),
-    (u"ml", u"മലയാളം", u"", u"Malayalam"),
-    (u"mn", u"Монгол", u"", u"Mongolian"),
-    (u"mr", u"मराठी", u"", u"Marathi"),
-    (u"mrj", u"Кырык Мары (Kyryk Mary)", u"", u"Hill Mari"),
+    (u"mk", u"Македонски", u"", u""),
+    (u"ml", u"മലയാളം", u"", u""),
+    (u"mn", u"Монгол", u"", u""),
+    (u"mr", u"मराठी", u"", u""),
     (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
     (u"mt", u"Malti", u"", u""),
-    (u"my", u"မြန်မာဘာသာ", u"", u"Burmese"),
-    (u"mzn", u"مَزِروني", u"", u"Mazandarani"),
-    (u"nah", u"Nāhuatl", u"", u"Nahuatl"),
-    (u"nap", u"Nnapulitano", u"", u"Neapolitan"),
-    (u"nds-nl", u"Plattdüütsch", u"Nedderlannen", u"Low Saxon"),
-    (u"ne", u"नेपाली", u"", u"Nepali"),
-    (u"new", u"नेपाल भाषा", u"", u"Newar"),
+    (u"my", u"ဗမာ", u"", u""),
+    (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
+    (u"ne", u"नेपाली", u"", u""),
     (u"nl", u"Nederlands", u"", u"Dutch"),
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
-    (u"nn", u"Nynorsk", u"", u"Norwegian (Nynorsk)"),
-    (u"no-NO", u"Norsk (Bokmål)", u"Norge", u"Norwegian (Bokmål)"),
+    (u"nn", u"Nynorsk", u"", u"Norwegian"),
+    (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
     (u"nso", u"Northern Sotho", u"", u""),
     (u"ny", u"Nyanja", u"", u""),
     (u"nyn", u"Runyankore", u"", u""),
-    (u"oc", u"Occitan", u"", u"Occitan"),
+    (u"oc", u"Occitan", u"", u""),
     (u"om", u"Oromoo", u"", u""),
-    (u"or", u"ଓଡ଼ିଆ", u"", u"Oriya"),
-    (u"os", u"Иронау", u"", u"Ossetian"),
-    (u"pa", u"ਪੰਜਾਬੀ", u"", u"Punjabi"),
+    (u"or", u"ଓଡ଼ିଆ", u"", u""),
+    (u"pa", u"ਪੰਜਾਬੀ", u"", u""),
     (u"pcm", u"Nigerian Pidgin", u"", u""),
     (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
-    (u"pms", u"Piemontèis", u"", u"Piedmontese"),
-    (u"pnb", u"شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", u"", u"Western Punjabi"),
     (u"ps", u"پښتو", u"", u""),
     (u"pt", u"Português", u"", u"Portuguese"),
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
-    (u"qu", u"Runa Simi", u"", u"Quechua"),
+    (u"qu", u"Runasimi", u"", u""),
     (u"rm", u"Rumantsch", u"", u""),
     (u"rn", u"Ikirundi", u"", u""),
     (u"ro-RO", u"Română", u"România", u"Romanian"),
     (u"ru-RU", u"Русский", u"Россия", u"Russian"),
     (u"rw", u"Kinyarwanda", u"", u""),
-    (u"sa", u"संस्कृतम्", u"", u"Sanskrit"),
-    (u"sah", u"Саха тыла (Saxa Tyla)", u"", u"Sakha"),
-    (u"scn", u"Sicilianu", u"", u"Sicilian"),
-    (u"sco", u"Scots", u"", u"Scots"),
     (u"sd", u"Sindhi", u"", u""),
     (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
-    (u"si", u"සිංහල", u"", u"Sinhalese"),
+    (u"si", u"සිංහල", u"", u""),
     (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
-    (u"sl-SI", u"Slovenščina", u"Slovenija", u"Slovenian"),
+    (u"sl", u"Slovenščina", u"", u"Slovenian"),
     (u"sn", u"Chishona", u"", u""),
     (u"so", u"Soomaali", u"", u""),
-    (u"sq", u"Shqip", u"", u"Albanian"),
-    (u"sr-ME", u"Српски / Srpski", u"Црна Гора", u"Serbian"),
+    (u"sq", u"Shqip", u"", u""),
+    (u"sr", u"Српски / Srpski", u"", u"Serbian"),
     (u"st", u"Southern Sotho", u"", u""),
-    (u"su", u"Basa Sunda", u"", u"Sundanese"),
+    (u"su", u"Sundanese", u"", u""),
     (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
-    (u"sw", u"Kiswahili", u"", u"Swahili"),
-    (u"ta", u"தமிழ்", u"", u"Tamil"),
-    (u"te", u"తెలుగు", u"", u"Telugu"),
-    (u"tg", u"Тоҷикӣ", u"", u"Tajik"),
+    (u"sw", u"Kiswahili", u"", u""),
+    (u"ta", u"தமிழ்", u"", u""),
+    (u"te", u"తెలుగు", u"", u""),
+    (u"tg", u"Tajik", u"", u""),
     (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
     (u"ti", u"ትግርኛ", u"", u""),
     (u"tk", u"Turkmen", u"", u""),
-    (u"tl-PH", u"Tagalog", u"Pilipinas", u"Tagalog"),
+    (u"tl-PH", u"Filipino", u"Pilipinas", u""),
     (u"tlh", u"Klingon", u"", u""),
     (u"tn", u"Tswana", u"", u""),
     (u"to", u"Lea Fakatonga", u"", u""),
     (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
-    (u"tt", u"Tatarça / Татарча", u"", u"Tatar"),
+    (u"tt", u"Tatar", u"", u""),
     (u"tum", u"Tumbuka", u"", u""),
     (u"tw", u"Twi", u"", u""),
     (u"ug", u"ئۇيغۇرچە", u"", u""),
     (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
     (u"ur", u"اردو", u"", u"Urdu"),
     (u"uz", u"O‘zbek", u"", u"Uzbek"),
-    (u"vec", u"Vèneto", u"", u"Venetian"),
+    (u"ve", u"Venda", u"", u"Venda"),
     (u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"),
     (u"vo", u"Volapük", u"", u"Volapük"),
     (u"wa", u"Walon", u"", u"Walloon"),
     (u"war", u"Winaray", u"", u"Waray-Waray"),
     (u"wo", u"Wolof", u"", u""),
     (u"xh", u"Xhosa", u"", u""),
-    (u"yi", u"ייִדיש", u"", u"Yiddish"),
-    (u"yo", u"Yorùbá", u"", u"Yoruba"),
+    (u"yi", u"ייִדיש", u"", u""),
+    (u"yo", u"Èdè Yorùbá", u"", u""),
     (u"zh", u"中文", u"", u"Chinese"),
     (u"zh-CN", u"中文", u"中国", u"Chinese"),
     (u"zh-HK", u"中文", u"香港", u"Chinese"),

+ 1 - 1
searx/webapp.py

@@ -514,7 +514,7 @@ def index():
         answers=result_container.answers,
         infoboxes=result_container.infoboxes,
         paging=result_container.paging,
-        current_language=search.lang,
+        current_language=search_query.lang,
         base_url=get_base_url(),
         theme=get_current_theme_name(),
         favicons=global_favicons[themes.index(get_current_theme_name())]

+ 1 - 1
tests/unit/engines/test_subtitleseeker.py

@@ -17,7 +17,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
 
     def test_response(self):
         dicto = defaultdict(dict)
-        dicto['language'] = 'fr_FR'
+        dicto['language'] = 'fr-FR'
         response = mock.Mock(search_params=dicto)
 
         self.assertRaises(AttributeError, subtitleseeker.response, None)

+ 2 - 0
tests/unit/engines/test_wikipedia.py

@@ -8,6 +8,8 @@ from searx.testing import SearxTestCase
 class TestWikipediaEngine(SearxTestCase):
 
     def test_request(self):
+        wikipedia.supported_languages = ['fr', 'en']
+
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto['language'] = 'fr-FR'

+ 164 - 0
utils/fetch_languages.py

@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+
+# This script generates languages.py from intersecting each engine's supported languages.
+#
+# The country names are obtained from http://api.geonames.org which requires registering as a user.
+#
+# Output files (engines_languages.json and languages.py)
+# are written in current directory to avoid overwriting in case something goes wrong.
+
+from requests import get
+from urllib import urlencode
+from lxml.html import fromstring
+from json import loads, dumps
+import io
+from sys import path
+path.append('../searx')  # noqa
+from searx.engines import engines
+
+# Geonames API for country names.
+geonames_user = ''  # ADD USER NAME HERE
+country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}'
+
+# Output files.
+engines_languages_file = 'engines_languages.json'
+languages_file = 'languages.py'
+
+engines_languages = {}
+languages = {}
+
+
+# To filter out invalid codes and dialects.
+def valid_code(lang_code):
+    # filter invalid codes
+    # sl-SL is technically not invalid, but still a mistake
+    if lang_code[:2] == 'xx'\
+       or lang_code == 'sl-SL'\
+       or lang_code == 'wt-WT'\
+       or lang_code == 'jw'\
+       or lang_code[-2:] == 'UK'\
+       or lang_code[-2:] == 'XA'\
+       or lang_code[-2:] == 'XL':
+        return False
+
+    # filter dialects
+    lang_code = lang_code.split('-')
+    if len(lang_code) > 2 or len(lang_code[0]) > 3:
+        return False
+    if len(lang_code) == 2 and len(lang_code[1]) > 2:
+        return False
+
+    return True
+
+
+# Get country name in specified language.
+def get_country_name(locale):
+    if geonames_user is '':
+        return ''
+
+    locale = locale.split('-')
+    if len(locale) != 2:
+        return ''
+
+    url = country_names_url.format(parameters=urlencode({'lang': locale[0],
+                                                         'country': locale[1],
+                                                         'username': geonames_user}))
+    response = get(url)
+    json = loads(response.text)
+    content = json.get('geonames', None)
+    if content is None or len(content) != 1:
+        print "No country name found for " + locale[0] + "-" + locale[1]
+        return ''
+
+    return content[0].get('countryName', '')
+
+
+# Fetchs supported languages for each engine and writes json file with those.
+def fetch_supported_languages():
+    for engine_name in engines:
+        if hasattr(engines[engine_name], 'fetch_supported_languages'):
+            try:
+                engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
+            except Exception as e:
+                print e
+
+    # write json file
+    f = io.open(engines_languages_file, "w", encoding="utf-8")
+    f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8")))
+    f.close()
+
+
+# Join all language lists.
+# Iterate all languages supported by each engine.
+def join_language_lists():
+    # include wikipedia first for more accurate language names
+    # exclude languages with too few articles
+    languages.update({code: lang for code, lang
+                      in engines_languages['wikipedia'].iteritems()
+                      if valid_code(code) and lang['articles'] >= 100000})
+
+    for engine_name in engines_languages:
+        for locale in engines_languages[engine_name]:
+            if not valid_code(locale):
+                continue
+
+            # if language is not on list or if it has no name yet
+            if locale not in languages or not languages[locale].get('name'):
+                if isinstance(engines_languages[engine_name], dict) \
+                  and engines_languages[engine_name][locale].get('articles', float('inf')) >= 100000:
+                    languages[locale] = engines_languages[engine_name][locale]
+                else:
+                    languages[locale] = {}
+
+    # get locales that have no name yet
+    for locale in languages.keys():
+        if not languages[locale].get('name'):
+            # try to get language and country names
+            name = languages.get(locale.split('-')[0], {}).get('name', None)
+            if name:
+                languages[locale]['name'] = name
+                languages[locale]['country'] = get_country_name(locale) or ''
+                languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
+            else:
+                # filter out locales with no name
+                del languages[locale]
+
+
+# Remove countryless language if language is featured in only one country.
+def filter_single_country_languages():
+    prev_lang = None
+    for code in sorted(languages):
+        lang = code.split('-')[0]
+        if lang == prev_lang:
+            countries += 1
+        else:
+            if prev_lang is not None and countries == 1:
+                del languages[prev_lang]
+            countries = 0
+            prev_lang = lang
+
+
+# Write languages.py.
+def write_languages_file():
+    new_file = open(languages_file, 'w')
+    file_content = '# -*- coding: utf-8 -*-\n'
+    file_content += '# list of language codes\n'
+    file_content += '# this file is generated automatically by utils/update_search_languages.py\n'
+    file_content += '\nlanguage_codes = ('
+    for code in sorted(languages):
+        file_content += '\n    (u"' + code + '"'\
+                        + ', u"' + languages[code]['name'].split(' (')[0] + '"'\
+                        + ', u"' + languages[code].get('country', '') + '"'\
+                        + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),'
+    # remove last comma
+    file_content = file_content[:-1]
+    file_content += '\n)\n'
+    new_file.write(file_content.encode('utf8'))
+    new_file.close()
+
+
+if __name__ == "__main__":
+    fetch_supported_languages()
+    join_language_lists()
+    filter_single_country_languages()
+    write_languages_file()

+ 0 - 169
utils/update_languages.py

@@ -1,169 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This script generates languages.py from
-# intersecting each engine's supported languages.
-#
-# The language's native names are obtained from
-# Wikipedia and Google's supported languages.
-#
-# The country names are obtained from http://api.geonames.org
-# which requires registering as a user.
-#
-# Output file (languages.py) is written in current directory
-# to avoid overwriting in case something goes wrong.
-
-from requests import get
-from urllib import urlencode
-from lxml.html import fromstring
-from json import loads
-from sys import path
-path.append('../searx')
-from searx.engines import engines
-
-# list of names
-wiki_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
-google_languages_url = 'https://www.google.com/preferences?#languages'
-country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}'
-
-geonames_user = ''  # add user name here
-
-google_json_name = 'google.preferences.langMap'
-
-languages = {}
-
-
-# To filter out invalid codes and dialects.
-def valid_code(lang_code):
-    # filter invalid codes
-    # sl-SL is technically not invalid, but still a mistake
-    if lang_code[:2] == 'xx'\
-       or lang_code == 'sl-SL'\
-       or lang_code == 'jw'\
-       or lang_code[-2:] == 'UK'\
-       or lang_code[-2:] == 'XA'\
-       or lang_code[-2:] == 'XL':
-        return False
-
-    # filter dialects
-    lang_code = lang_code.split('-')
-    if len(lang_code) > 2 or len(lang_code[0]) > 3:
-        return False
-    if len(lang_code) == 2 and len(lang_code[1]) > 2:
-        return False
-        
-    return True
-
-
-# Get country name in specified language.
-def get_country_name(locale):
-    if geonames_user is '':
-        return ''
-
-    locale = locale.split('-')
-    if len(locale) != 2:
-        return ''
-
-    url = country_names_url.format(parameters=urlencode({'lang': locale[0],
-                                                         'country': locale[1],
-                                                         'username': geonames_user}))
-    response = get(url)
-    json = loads(response.text)
-    content = json.get('geonames', None)
-    if content is None or len(content) != 1:
-        print "No country name found for " + locale[0] + "-" + locale[1]
-        print json
-        return ''
-
-    return content[0].get('countryName', '')
-
-
-# Get language names from Wikipedia.
-def get_wikipedia_languages():
-    response = get(wiki_languages_url)
-    dom = fromstring(response.text)
-    tables = dom.xpath('//table[contains(@class,"sortable")]')
-    for table in tables:
-        # exclude header row
-        trs = table.xpath('.//tr')[1:]
-        for tr in trs:
-            td = tr.xpath('./td')
-            code = td[3].xpath('./a')[0].text
-            name = td[2].xpath('./a')[0].text
-            english_name = td[1].xpath('./a')[0].text
-            articles = int(td[4].xpath('./a/b')[0].text.replace(',',''))
-            
-            # exclude language variants and languages with few articles
-            if code not in languages and articles >= 10000 and valid_code(code):
-                languages[code] = (name, '', english_name)
-
-
-# Get language names from Google.
-def get_google_languages():
-    response = get(google_languages_url)
-    dom = fromstring(response.text)
-    options = dom.xpath('//select[@name="hl"]/option')
-    for option in options:
-        code = option.xpath('./@value')[0].split('-')[0]
-        name = option.text[:-1].title()
-
-        if code not in languages and valid_code(code):
-            languages[code] = (name, '', '')
-
-
-# Join all language lists.
-# iterate all languages supported by each engine
-def join_language_lists():
-    for engine_name in engines:
-        for locale in engines[engine_name].supported_languages:
-            locale = locale.replace('_', '-')
-            if locale not in languages and valid_code(locale):
-                # try to get language name
-                language = languages.get(locale.split('-')[0], None)
-                if language == None:
-                    print engine_name + ": " + locale
-                    continue
-
-                country = get_country_name(locale)
-                languages[locale] = (language[0], country, language[2])
-
-
-# Remove countryless language if language is featured in only one country.
-def filter_single_country_languages():
-    prev_lang = None
-    for code in sorted(languages):
-        lang = code.split('-')[0]
-        if lang == prev_lang:
-            countries += 1
-        else:
-            if prev_lang is not None and countries == 1:
-                del languages[prev_lang]
-            countries = 0
-            prev_lang = lang
-
-
-# Write languages.py.
-def write_languages_file():
-    new_file = open('languages.py', 'w')
-    file_content = '# -*- coding: utf-8 -*-\n'
-    file_content += '# list of language codes\n'
-    file_content += '# this file is generated automatically by utils/update_search_languages.py\n'
-    file_content += '\nlanguage_codes = ('
-    for code in sorted(languages):
-        (name, country, english) = languages[code]
-        file_content += '\n    (u"' + code + '"'\
-                        + ', u"' + name + '"'\
-                        + ', u"' + country + '"'\
-                        + ', u"' + english + '"),'
-    # remove last comma
-    file_content = file_content[:-1]
-    file_content += '\n)\n'
-    new_file.write(file_content.encode('utf8'))
-    new_file.close()
-
-
-if __name__ == "__main__":
-    get_wikipedia_languages()
-    get_google_languages()
-    join_language_lists()
-    filter_single_country_languages()
-    write_languages_file()