Browse Source

tests for _fetch_supported_languages in engines
and refactor method to make it testable without making requests

marc 8 years ago
parent
commit
af35eee10b

File diff suppressed because it is too large
+ 0 - 3256
searx/data/engines_languages.json


BIN
searx/engines/.yandex.py.swp


+ 10 - 3
searx/engines/__init__.py

@@ -21,6 +21,7 @@ import sys
 from flask_babel import gettext
 from operator import itemgetter
 from json import loads
+from requests import get
 from searx import settings
 from searx import logger
 from searx.utils import load_module
@@ -79,9 +80,6 @@ def load_engine(engine_data):
         if not hasattr(engine, arg_name):
             setattr(engine, arg_name, arg_value)
 
-    if engine_data['name'] in languages:
-        setattr(engine, 'supported_languages', languages[engine_data['name']])
-
     # checking required variables
     for engine_attr in dir(engine):
         if engine_attr.startswith('_'):
@@ -91,6 +89,15 @@ def load_engine(engine_data):
                          .format(engine.name, engine_attr))
             sys.exit(1)
 
+    # assign supported languages from json file
+    if engine_data['name'] in languages:
+        setattr(engine, 'supported_languages', languages[engine_data['name']])
+
+    # assign language fetching method if auxiliary method exists
+    if hasattr(engine, '_fetch_supported_languages'):
+        setattr(engine, 'fetch_supported_languages',
+                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
+
     engine.stats = {
         'result_count': 0,
         'search_count': 0,

+ 2 - 4
searx/engines/bing.py

@@ -15,7 +15,6 @@
 
 from urllib import urlencode
 from lxml import html
-from requests import get
 from searx.engines.xpath import extract_text
 
 # engine dependent config
@@ -86,10 +85,9 @@ def response(resp):
 
 
 # get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
     supported_languages = []
-    response = get(supported_languages_url)
-    dom = html.fromstring(response.text)
+    dom = html.fromstring(resp.text)
     options = dom.xpath('//div[@id="limit-languages"]//input')
     for option in options:
         code = option.xpath('./@id')[0].replace('_', '-')

+ 1 - 1
searx/engines/bing_images.py

@@ -19,7 +19,7 @@ from urllib import urlencode
 from lxml import html
 from json import loads
 import re
-from searx.engines.bing import fetch_supported_languages
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
 
 # engine dependent config
 categories = ['images']

+ 1 - 1
searx/engines/bing_news.py

@@ -17,7 +17,7 @@ from datetime import datetime
 from dateutil import parser
 from lxml import etree
 from searx.utils import list_get
-from searx.engines.bing import fetch_supported_languages
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
 
 # engine dependent config
 categories = ['news']

+ 2 - 3
searx/engines/dailymotion.py

@@ -80,11 +80,10 @@ def response(resp):
 
 
 # get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
     supported_languages = {}
 
-    response = get(supported_languages_url)
-    response_json = loads(response.text)
+    response_json = loads(resp.text)
 
     for language in response_json['list']:
         supported_languages[language['code']] = {}

+ 2 - 3
searx/engines/duckduckgo.py

@@ -119,11 +119,10 @@ def response(resp):
 
 
 # get supported languages from their site
-def fetch_supported_languages():
-    response = get(supported_languages_url)
+def _fetch_supported_languages(resp):
 
     # response is a js file with regions as an embedded object
-    response_page = response.text
+    response_page = resp.text
     response_page = response_page[response_page.find('regions:{') + 8:]
     response_page = response_page[:response_page.find('}') + 1]
 

+ 1 - 1
searx/engines/duckduckgo_definitions.py

@@ -4,7 +4,7 @@ from re import compile, sub
 from lxml import html
 from searx.utils import html_to_text
 from searx.engines.xpath import extract_text
-from searx.engines.duckduckgo import fetch_supported_languages
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
 
 url = 'https://api.duckduckgo.com/'\
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

+ 2 - 4
searx/engines/gigablast.py

@@ -14,7 +14,6 @@ from json import loads
 from random import randint
 from time import time
 from urllib import urlencode
-from requests import get
 from lxml.html import fromstring
 
 # engine dependent config
@@ -91,10 +90,9 @@ def response(resp):
 
 
 # get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
     supported_languages = []
-    response = get(supported_languages_url)
-    dom = fromstring(response.text)
+    dom = fromstring(resp.text)
     links = dom.xpath('//span[@id="menu2"]/a')
     for link in links:
         code = link.xpath('./@href')[0][-2:]

+ 5 - 7
searx/engines/google.py

@@ -12,7 +12,6 @@ import re
 from urllib import urlencode
 from urlparse import urlparse, parse_qsl
 from lxml import html, etree
-from requests import get
 from searx.engines.xpath import extract_text, extract_url
 from searx.search import logger
 
@@ -364,14 +363,13 @@ def attributes_to_html(attributes):
 
 
 # get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
     supported_languages = {}
-    response = get(supported_languages_url)
-    dom = html.fromstring(response.text)
-    options = dom.xpath('//select[@name="hl"]/option')
+    dom = html.fromstring(resp.text)
+    options = dom.xpath('//table//td/font/label/span')
     for option in options:
-        code = option.xpath('./@value')[0].split('-')[0]
-        name = option.text[:-1].title()
+        code = option.xpath('./@id')[0][1:]
+        name = option.text.title()
         supported_languages[code] = {"name": name}
 
     return supported_languages

+ 1 - 1
searx/engines/google_news.py

@@ -13,7 +13,7 @@
 from lxml import html
 from urllib import urlencode
 from json import loads
-from searx.engines.google import fetch_supported_languages
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
 
 # search-url
 categories = ['news']

+ 4 - 4
searx/engines/swisscows.py

@@ -13,7 +13,6 @@
 from json import loads
 from urllib import urlencode, unquote
 import re
-from requests import get
 from lxml.html import fromstring
 
 # engine dependent config
@@ -25,6 +24,8 @@ language_support = True
 base_url = 'https://swisscows.ch/'
 search_string = '?{query}&page={page}'
 
+supported_languages_url = base_url
+
 # regex
 regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
 regex_json_remove_start = re.compile(r'^initialData:\s*')
@@ -113,10 +114,9 @@ def response(resp):
 
 
 # get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
     supported_languages = []
-    response = get(base_url)
-    dom = fromstring(response.text)
+    dom = fromstring(resp.text)
     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
     for option in options:
         code = option.xpath('./@data-val')[0]

+ 1 - 1
searx/engines/wikidata.py

@@ -15,7 +15,7 @@ from searx import logger
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text
 from searx.utils import format_date_by_locale
-from searx.engines.wikipedia import fetch_supported_languages
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
 
 from json import loads
 from lxml.html import fromstring

+ 2 - 4
searx/engines/wikipedia.py

@@ -12,7 +12,6 @@
 
 from json import loads
 from urllib import urlencode, quote
-from requests import get
 from lxml.html import fromstring
 
 
@@ -119,10 +118,9 @@ def response(resp):
 
 
 # get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
     supported_languages = {}
-    response = get(supported_languages_url)
-    dom = fromstring(response.text)
+    dom = fromstring(resp.text)
     tables = dom.xpath('//table[contains(@class,"sortable")]')
     for table in tables:
         # exclude header row

+ 3 - 5
searx/engines/yahoo.py

@@ -14,7 +14,6 @@
 from urllib import urlencode
 from urlparse import unquote
 from lxml import html
-from requests import get
 from searx.engines.xpath import extract_text, extract_url
 
 # engine dependent config
@@ -144,13 +143,12 @@ def response(resp):
 
 
 # get supported languages from their site
-def fetch_supported_languages():
+def _fetch_supported_languages(resp):
     supported_languages = []
-    response = get(supported_languages_url)
-    dom = html.fromstring(response.text)
+    dom = html.fromstring(resp.text)
     options = dom.xpath('//div[@id="yschlang"]/span/label/input')
     for option in options:
-        code = option.xpath('./@value')[0][5:]
+        code = option.xpath('./@value')[0][5:].replace('_', '-')
         supported_languages.append(code)
 
     return supported_languages

+ 1 - 1
searx/engines/yahoo_news.py

@@ -12,7 +12,7 @@
 from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
-from searx.engines.yahoo import parse_url, fetch_supported_languages
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
 from datetime import datetime, timedelta
 import re
 from dateutil import parser

+ 25 - 85
searx/languages.py

@@ -3,36 +3,27 @@
 # this file is generated automatically by utils/update_search_languages.py
 
 language_codes = (
-    (u"ach", u"Acoli", u"", u""),
     (u"af", u"Afrikaans", u"", u""),
-    (u"ak", u"Akan", u"", u""),
-    (u"am", u"አማርኛ", u"", u""),
+    (u"am", u"አማርኛ", u"", u"Amharic"),
     (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
     (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
-    (u"ban", u"Balinese", u"", u""),
     (u"be", u"Беларуская", u"", u"Belarusian"),
-    (u"bem", u"Ichibemba", u"", u""),
     (u"bg-BG", u"Български", u"България", u"Bulgarian"),
-    (u"bn", u"বাংলা", u"", u""),
-    (u"br", u"Brezhoneg", u"", u""),
-    (u"bs", u"Bosanski", u"", u""),
+    (u"bn", u"বাংলা", u"", u"Bengali"),
+    (u"br", u"Brezhoneg", u"", u"Breton"),
+    (u"bs", u"Bosnian", u"", u"Bosnian"),
     (u"ca", u"Català", u"", u"Catalan"),
     (u"ca-CT", u"Català", u"", u"Catalan"),
     (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
     (u"ce", u"Нохчийн", u"", u"Chechen"),
     (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
-    (u"chr", u"ᏣᎳᎩ", u"", u""),
-    (u"ckb", u"Central Kurdish", u"", u""),
-    (u"co", u"Corsican", u"", u""),
-    (u"crs", u"Seychellois Creole", u"", u""),
     (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
-    (u"cy", u"Cymraeg", u"", u""),
+    (u"cy", u"Cymraeg", u"", u"Welsh"),
     (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
     (u"de", u"Deutsch", u"", u"German"),
     (u"de-AT", u"Deutsch", u"Österreich", u"German"),
     (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
     (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
-    (u"ee", u"Eʋegbe", u"", u""),
     (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
     (u"en", u"English", u"", u"English"),
     (u"en-AU", u"English", u"Australia", u"English"),
@@ -60,30 +51,20 @@ language_codes = (
     (u"eu", u"Euskara", u"", u"Basque"),
     (u"fa", u"فارسی", u"", u"Persian"),
     (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
-    (u"fo", u"Føroyskt", u"", u""),
     (u"fr", u"Français", u"", u"French"),
     (u"fr-BE", u"Français", u"Belgique", u"French"),
     (u"fr-CA", u"Français", u"Canada", u"French"),
     (u"fr-CH", u"Français", u"Suisse", u"French"),
     (u"fr-FR", u"Français", u"France", u"French"),
-    (u"fy", u"West-Frysk", u"", u""),
-    (u"ga", u"Gaeilge", u"", u""),
-    (u"gaa", u"Ga", u"", u""),
-    (u"gd", u"Gàidhlig", u"", u""),
+    (u"ga", u"Gaeilge", u"", u"Irish"),
     (u"gl", u"Galego", u"", u"Galician"),
-    (u"gn", u"Guarani", u"", u""),
-    (u"gu", u"ગુજરાતી", u"", u""),
-    (u"ha", u"Hausa", u"", u""),
-    (u"haw", u"ʻŌlelo HawaiʻI", u"", u""),
+    (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
     (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
     (u"hi", u"हिन्दी", u"", u"Hindi"),
     (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
-    (u"ht", u"Haitian Creole", u"", u""),
     (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
     (u"hy", u"Հայերեն", u"", u"Armenian"),
-    (u"ia", u"Interlingua", u"", u""),
     (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
-    (u"ig", u"Igbo", u"", u""),
     (u"is", u"Íslenska", u"", u""),
     (u"it", u"Italiano", u"", u"Italian"),
     (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
@@ -91,86 +72,48 @@ language_codes = (
     (u"iw", u"עברית", u"", u""),
     (u"ja-JP", u"日本語", u"日本", u"Japanese"),
     (u"ka", u"ქართული", u"", u"Georgian"),
-    (u"kg", u"Kongo", u"", u""),
     (u"kk", u"Қазақша", u"", u"Kazakh"),
-    (u"km", u"ខ្មែរ", u"", u""),
-    (u"kn", u"ಕನ್ನಡ", u"", u""),
+    (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
     (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
-    (u"kri", u"Krio", u"", u""),
-    (u"ky", u"Кыргызча", u"", u""),
     (u"la", u"Latina", u"", u"Latin"),
-    (u"lg", u"Luganda", u"", u""),
-    (u"ln", u"Lingála", u"", u""),
-    (u"lo", u"ລາວ", u"", u""),
-    (u"loz", u"Lozi", u"", u""),
     (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
-    (u"lua", u"Luba-Lulua", u"", u""),
     (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
-    (u"mfe", u"Kreol Morisien", u"", u""),
-    (u"mg", u"Malagasy", u"", u""),
-    (u"mi", u"Maori", u"", u""),
+    (u"mi", u"Reo Māori", u"", u"Maori"),
     (u"min", u"Minangkabau", u"", u"Minangkabau"),
-    (u"mk", u"Македонски", u"", u""),
-    (u"ml", u"മലയാളം", u"", u""),
-    (u"mn", u"Монгол", u"", u""),
-    (u"mr", u"मराठी", u"", u""),
+    (u"mk", u"Македонски", u"", u"Macedonian"),
+    (u"mn", u"Монгол", u"", u"Mongolian"),
+    (u"mr", u"मराठी", u"", u"Marathi"),
     (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
-    (u"mt", u"Malti", u"", u""),
-    (u"my", u"ဗမာ", u"", u""),
+    (u"mt", u"Malti", u"", u"Maltese"),
     (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
-    (u"ne", u"नेपाली", u"", u""),
     (u"nl", u"Nederlands", u"", u"Dutch"),
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
     (u"nn", u"Nynorsk", u"", u"Norwegian"),
     (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
-    (u"nso", u"Northern Sotho", u"", u""),
-    (u"ny", u"Nyanja", u"", u""),
-    (u"nyn", u"Runyankore", u"", u""),
-    (u"oc", u"Occitan", u"", u""),
-    (u"om", u"Oromoo", u"", u""),
-    (u"or", u"ଓଡ଼ିଆ", u"", u""),
-    (u"pa", u"ਪੰਜਾਬੀ", u"", u""),
-    (u"pcm", u"Nigerian Pidgin", u"", u""),
+    (u"oc", u"Occitan", u"", u"Occitan"),
+    (u"or", u"Oriya", u"", u"Oriya"),
+    (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
     (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
-    (u"ps", u"پښتو", u"", u""),
+    (u"ps", u"Pushto", u"", u"Pushto"),
     (u"pt", u"Português", u"", u"Portuguese"),
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
-    (u"qu", u"Runasimi", u"", u""),
-    (u"rm", u"Rumantsch", u"", u""),
-    (u"rn", u"Ikirundi", u"", u""),
     (u"ro-RO", u"Română", u"România", u"Romanian"),
     (u"ru-RU", u"Русский", u"Россия", u"Russian"),
-    (u"rw", u"Kinyarwanda", u"", u""),
-    (u"sd", u"Sindhi", u"", u""),
+    (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
     (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
-    (u"si", u"සිංහල", u"", u""),
     (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
     (u"sl", u"Slovenščina", u"", u"Slovenian"),
-    (u"sn", u"Chishona", u"", u""),
-    (u"so", u"Soomaali", u"", u""),
-    (u"sq", u"Shqip", u"", u""),
     (u"sr", u"Српски / Srpski", u"", u"Serbian"),
-    (u"st", u"Southern Sotho", u"", u""),
-    (u"su", u"Sundanese", u"", u""),
     (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
     (u"sw", u"Kiswahili", u"", u""),
-    (u"ta", u"தமிழ்", u"", u""),
-    (u"te", u"తెలుగు", u"", u""),
-    (u"tg", u"Tajik", u"", u""),
+    (u"ta", u"தமிழ்", u"", u"Tamil"),
     (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
-    (u"ti", u"ትግርኛ", u"", u""),
-    (u"tk", u"Turkmen", u"", u""),
+    (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
     (u"tl-PH", u"Filipino", u"Pilipinas", u""),
-    (u"tlh", u"Klingon", u"", u""),
-    (u"tn", u"Tswana", u"", u""),
-    (u"to", u"Lea Fakatonga", u"", u""),
     (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
-    (u"tt", u"Tatar", u"", u""),
-    (u"tum", u"Tumbuka", u"", u""),
-    (u"tw", u"Twi", u"", u""),
-    (u"ug", u"ئۇيغۇرچە", u"", u""),
+    (u"tt", u"Татарча", u"", u"Tatar"),
     (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
     (u"ur", u"اردو", u"", u"Urdu"),
     (u"uz", u"O‘zbek", u"", u"Uzbek"),
@@ -179,13 +122,10 @@ language_codes = (
     (u"vo", u"Volapük", u"", u"Volapük"),
     (u"wa", u"Walon", u"", u"Walloon"),
     (u"war", u"Winaray", u"", u"Waray-Waray"),
-    (u"wo", u"Wolof", u"", u""),
-    (u"xh", u"Xhosa", u"", u""),
-    (u"yi", u"ייִדיש", u"", u""),
-    (u"yo", u"Èdè Yorùbá", u"", u""),
+    (u"xh", u"Xhosa", u"", u"Xhosa"),
     (u"zh", u"中文", u"", u"Chinese"),
-    (u"zh-CN", u"中文", u"中国", u"Chinese"),
+    (u"zh-CN", u"中文", u"中国", u""),
     (u"zh-HK", u"中文", u"香港", u"Chinese"),
-    (u"zh-TW", u"中文", u"台湾", u"Chinese"),
-    (u"zu", u"Isizulu", u"", u"")
+    (u"zh-TW", u"中文", u"台湾", u""),
+    (u"zu", u"Isi-Zulu", u"", u"Zulu")
 )

+ 32 - 0
tests/unit/engines/test_bing.py

@@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], 'This should be the title')
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
         self.assertEqual(results[0]['content'], 'This should be the content.')
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        results = bing._fetch_supported_languages(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)
+
+        html = """
+        <html>
+            <body>
+                <form>
+                    <div id="limit-languages">
+                        <div>
+                            <div><input id="es" value="es"></input></div>
+                        </div>
+                        <div>
+                            <div><input id="pt_BR" value="pt_BR"></input></div>
+                            <div><input id="pt_PT" value="pt_PT"></input></div>
+                        </div>
+                    </div>
+                </form>
+            </body>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = bing._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 3)
+        self.assertIn('es', languages)
+        self.assertIn('pt-BR', languages)
+        self.assertIn('pt-PT', languages)

+ 37 - 0
tests/unit/engines/test_dailymotion.py

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from collections import defaultdict
 import mock
 from searx.engines import dailymotion
@@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
         results = dailymotion.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
+
+    def test_fetch_supported_languages(self):
+        json = r"""
+        {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
+                  "localized_name":"Afrikaans","display_name":"Afrikaans"},
+                 {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
+                  "localized_name":"Arabic","display_name":"Arabic"},
+                 {"code":"la","name":"Latin","native_name":null,
+                  "localized_name":"Latin","display_name":"Latin"}
+        ]}
+        """
+        response = mock.Mock(text=json)
+        languages = dailymotion._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 3)
+        self.assertIn('af', languages)
+        self.assertIn('ar', languages)
+        self.assertIn('la', languages)
+
+        self.assertEqual(type(languages['af']), dict)
+        self.assertEqual(type(languages['ar']), dict)
+        self.assertEqual(type(languages['la']), dict)
+
+        self.assertIn('name', languages['af'])
+        self.assertIn('name', languages['ar'])
+        self.assertNotIn('name', languages['la'])
+
+        self.assertIn('english_name', languages['af'])
+        self.assertIn('english_name', languages['ar'])
+        self.assertIn('english_name', languages['la'])
+
+        self.assertEqual(languages['af']['name'], 'Afrikaans')
+        self.assertEqual(languages['af']['english_name'], 'Afrikaans')
+        self.assertEqual(languages['ar']['name'], u'العربية')
+        self.assertEqual(languages['ar']['english_name'], 'Arabic')
+        self.assertEqual(languages['la']['english_name'], 'Latin')

+ 14 - 0
tests/unit/engines/test_duckduckgo.py

@@ -84,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], 'This is the title')
         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
         self.assertEqual(results[0]['content'], 'This should be the content.')
+
+    def test_fetch_supported_languages(self):
+        js = """some code...regions:{
+        "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
+        }some more code..."""
+        response = mock.Mock(text=js)
+        languages = duckduckgo._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 5)
+        self.assertIn('wt-WT', languages)
+        self.assertIn('es-AR', languages)
+        self.assertIn('en-AU', languages)
+        self.assertIn('de-AT', languages)
+        self.assertIn('fr-BE', languages)

+ 25 - 0
tests/unit/engines/test_gigablast.py

@@ -89,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], 'South by Southwest 2016')
         self.assertEqual(results[0]['url'], 'www.sxsw.com')
         self.assertEqual(results[0]['content'], 'This should be the content.')
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        results = gigablast._fetch_supported_languages(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)
+
+        html = """
+        <html>
+            <body>
+                <span id="menu2">
+                    <a href="/search?&rxikd=1&qlang=xx"></a>
+                    <a href="/search?&rxikd=1&qlang=en"></a>
+                    <a href="/search?&rxikd=1&qlang=fr"></a>
+                </span>
+            </body>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = gigablast._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 2)
+        self.assertIn('en', languages)
+        self.assertIn('fr', languages)

+ 57 - 0
tests/unit/engines/test_google.py

@@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], '')
         self.assertEqual(results[0]['content'], '')
         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        languages = google._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 0)
+
+        html = u"""
+        <html>
+            <body>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+                                <font>
+                                    <label>
+                                        <span id="ten">English</span>
+                                    </label>
+                                </font>
+                            </td>
+                            <td>
+                                <font>
+                                    <label>
+                                        <span id="tzh-CN">中文 (简体)</span>
+                                    </label>
+                                    <label>
+                                        <span id="tzh-TW">中文 (繁體)</span>
+                                    </label>
+                                </font>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </body>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = google._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 3)
+
+        self.assertIn('en', languages)
+        self.assertIn('zh-CN', languages)
+        self.assertIn('zh-TW', languages)
+
+        self.assertEquals(type(languages['en']), dict)
+        self.assertEquals(type(languages['zh-CN']), dict)
+        self.assertEquals(type(languages['zh-TW']), dict)
+
+        self.assertIn('name', languages['en'])
+        self.assertIn('name', languages['zh-CN'])
+        self.assertIn('name', languages['zh-TW'])
+
+        self.assertEquals(languages['en']['name'], 'English')
+        self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
+        self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

+ 27 - 0
tests/unit/engines/test_swisscows.py

@@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
         self.assertEqual(results[2]['template'], 'images.html')
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        languages = swisscows._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 0)
+
+        html = """
+        <html>
+            <div id="regions-popup">
+                <div>
+                    <ul>
+                        <li><a data-val="browser"></a></li>
+                        <li><a data-val="de-CH"></a></li>
+                        <li><a data-val="fr-CH"></a></li>
+                    </ul>
+                </div>
+            </div>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = swisscows._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 3)
+        self.assertIn('de-CH', languages)
+        self.assertIn('fr-CH', languages)

+ 93 - 0
tests/unit/engines/test_wikipedia.py

@@ -164,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
         self.assertEqual(len(results), 2)
         self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
         self.assertIn(u'披头士乐队...', results[1]['content'])
+
+    def test_fetch_supported_languages(self):
+        html = u"""<html></html>"""
+        response = mock.Mock(text=html)
+        languages = wikipedia._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 0)
+
+        html = u"""
+        <html>
+            <body>
+                <div>
+                    <div>
+                        <h3>Table header</h3>
+                        <table class="sortable jquery-tablesorter">
+                            <thead>
+                                <tr>
+                                    <th>N</th>
+                                    <th>Language</th>
+                                    <th>Language (local)</th>
+                                    <th>Wiki</th>
+                                    <th>Articles</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr>
+                                    <td>2</td>
+                                    <td><a>Swedish</a></td>
+                                    <td><a>Svenska</a></td>
+                                    <td><a>sv</a></td>
+                                    <td><a><b>3000000</b></a></td>
+                                </tr>
+                                <tr>
+                                    <td>3</td>
+                                    <td><a>Cebuano</a></td>
+                                    <td><a>Sinugboanong Binisaya</a></td>
+                                    <td><a>ceb</a></td>
+                                    <td><a><b>3000000</b></a></td>
+                                </tr>
+                            </tbody>
+                        </table>
+                        <h3>Table header</h3>
+                        <table class="sortable jquery-tablesorter">
+                            <thead>
+                                <tr>
+                                    <th>N</th>
+                                    <th>Language</th>
+                                    <th>Language (local)</th>
+                                    <th>Wiki</th>
+                                    <th>Articles</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr>
+                                    <td>2</td>
+                                    <td><a>Norwegian (Bokmål)</a></td>
+                                    <td><a>Norsk (Bokmål)</a></td>
+                                    <td><a>no</a></td>
+                                    <td><a><b>100000</b></a></td>
+                                </tr>
+                            </tbody>
+                        </table>
+                    </div>
+                </div>
+            </body>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = wikipedia._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 3)
+
+        self.assertIn('sv', languages)
+        self.assertIn('ceb', languages)
+        self.assertIn('no', languages)
+
+        self.assertEqual(type(languages['sv']), dict)
+        self.assertEqual(type(languages['ceb']), dict)
+        self.assertEqual(type(languages['no']), dict)
+
+        self.assertIn('name', languages['sv'])
+        self.assertIn('english_name', languages['sv'])
+        self.assertIn('articles', languages['sv'])
+
+        self.assertEqual(languages['sv']['name'], 'Svenska')
+        self.assertEqual(languages['sv']['english_name'], 'Swedish')
+        self.assertEqual(languages['sv']['articles'], 3000000)
+        self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
+        self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
+        self.assertEqual(languages['ceb']['articles'], 3000000)
+        self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
+        self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
+        self.assertEqual(languages['no']['articles'], 100000)

+ 30 - 0
tests/unit/engines/test_yahoo.py

@@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
         results = yahoo.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        results = yahoo._fetch_supported_languages(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)
+
+        html = """
+        <html>
+            <div>
+                <div id="yschlang">
+                    <span>
+                        <label><input value="lang_ar"></input></label>
+                    </span>
+                    <span>
+                        <label><input value="lang_zh_chs"></input></label>
+                        <label><input value="lang_zh_cht"></input></label>
+                    </span>
+                </div>
+            </div>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = yahoo._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 3)
+        self.assertIn('ar', languages)
+        self.assertIn('zh-chs', languages)
+        self.assertIn('zh-cht', languages)

+ 8 - 4
utils/fetch_languages.py

@@ -84,7 +84,7 @@ def fetch_supported_languages():
 
     # write json file
     f = io.open(engines_languages_file, "w", encoding="utf-8")
-    f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8")))
+    f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
     f.close()
 
 
@@ -110,18 +110,22 @@ def join_language_lists():
                 else:
                     languages[locale] = {}
 
-    # get locales that have no name yet
+    # get locales that have no name or country yet
     for locale in languages.keys():
         if not languages[locale].get('name'):
-            # try to get language and country names
+            # try to get language names
             name = languages.get(locale.split('-')[0], {}).get('name', None)
             if name:
                 languages[locale]['name'] = name
-                languages[locale]['country'] = get_country_name(locale) or ''
                 languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
             else:
                 # filter out locales with no name
                 del languages[locale]
+                continue
+
+        # try to get country name
+        if locale.find('-') > 0 and not languages[locale].get('country'):
+            languages[locale]['country'] = get_country_name(locale) or ''
 
 
 # Remove countryless language if language is featured in only one country.

Some files were not shown because too many files changed in this diff