Browse Source

Merge pull request #748 from a01200356/languages

[mod] Allow users to search in most engine supported languages
Adam Tauber 8 years ago
parent
commit
9743bde25e
54 changed files with 982 additions and 153 deletions
  1. 1 2
      AUTHORS.rst
  2. 3 3
      searx/autocomplete.py
  3. 0 0
      searx/data/engines_languages.json
  4. 14 0
      searx/engines/__init__.py
  5. 3 2
      searx/engines/archlinux.py
  6. 14 1
      searx/engines/bing.py
  7. 2 1
      searx/engines/bing_images.py
  8. 2 1
      searx/engines/bing_news.py
  9. 22 0
      searx/engines/dailymotion.py
  10. 33 5
      searx/engines/duckduckgo.py
  11. 2 1
      searx/engines/duckduckgo_definitions.py
  12. 19 1
      searx/engines/gigablast.py
  13. 19 1
      searx/engines/google.py
  14. 3 1
      searx/engines/google_news.py
  15. 1 1
      searx/engines/mediawiki.py
  16. 2 2
      searx/engines/photon.py
  17. 1 1
      searx/engines/qwant.py
  18. 1 1
      searx/engines/startpage.py
  19. 10 4
      searx/engines/subtitleseeker.py
  20. 19 2
      searx/engines/swisscows.py
  21. 1 1
      searx/engines/twitter.py
  22. 4 2
      searx/engines/wikidata.py
  23. 27 2
      searx/engines/wikipedia.py
  24. 1 1
      searx/engines/yacy.py
  25. 21 1
      searx/engines/yahoo.py
  26. 1 1
      searx/engines/yahoo_news.py
  27. 4 2
      searx/engines/yandex.py
  28. 1 1
      searx/engines/youtube_api.py
  29. 129 76
      searx/languages.py
  30. 21 2
      searx/preferences.py
  31. 7 4
      searx/query.py
  32. 5 1
      searx/search.py
  33. 5 0
      searx/static/plugins/js/search_on_category_select.js
  34. 3 3
      searx/templates/courgette/preferences.html
  35. 3 3
      searx/templates/legacy/preferences.html
  36. 1 0
      searx/templates/oscar/advanced.html
  37. 12 0
      searx/templates/oscar/languages.html
  38. 5 6
      searx/templates/oscar/preferences.html
  39. 3 3
      searx/templates/pix-art/preferences.html
  40. 8 4
      searx/webapp.py
  41. 3 3
      tests/robot/test_basic.robot
  42. 32 0
      tests/unit/engines/test_bing.py
  43. 37 0
      tests/unit/engines/test_dailymotion.py
  44. 26 1
      tests/unit/engines/test_duckduckgo.py
  45. 4 0
      tests/unit/engines/test_duckduckgo_definitions.py
  46. 31 0
      tests/unit/engines/test_gigablast.py
  47. 58 1
      tests/unit/engines/test_google.py
  48. 1 1
      tests/unit/engines/test_qwant.py
  49. 6 1
      tests/unit/engines/test_subtitleseeker.py
  50. 28 1
      tests/unit/engines/test_swisscows.py
  51. 100 1
      tests/unit/engines/test_wikipedia.py
  52. 30 0
      tests/unit/engines/test_yahoo.py
  53. 22 1
      tests/unit/test_preferences.py
  54. 171 0
      utils/fetch_languages.py

+ 1 - 2
AUTHORS.rst

@@ -43,7 +43,7 @@ generally made searx better:
 - Kang-min Liu
 - Kirill Isakov
 - Guilhem Bonnefille
-- Marc Abonce Seguin
+- Marc Abonce Seguin @a01200356
 - @jibe-b
 - Christian Pietsch @pietsch
 - @Maxqia
@@ -55,7 +55,6 @@ generally made searx better:
 - Ammar Najjar @ammarnajjar
 - @stepshal
 - François Revol @mmuman
-- marc @a01200356
 - Harry Wood @harry-wood
 - Thomas Renard @threnard
 - Pydo `<https://github.com/pydo>`_

+ 3 - 3
searx/autocomplete.py

@@ -81,17 +81,17 @@ def searx_bang(full_query):
             engine_query = full_query.getSearchQuery()[1:]
 
             for lc in language_codes:
-                lang_id, lang_name, country = map(str.lower, lc)
+                lang_id, lang_name, country, english_name = map(str.lower, lc)
 
                 # check if query starts with language-id
                 if lang_id.startswith(engine_query):
                     if len(engine_query) <= 2:
-                        results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
+                        results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
                     else:
                         results.append(':{lang_id}'.format(lang_id=lang_id))
 
                 # check if query starts with language name
-                if lang_name.startswith(engine_query):
+                if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
                     results.append(':{lang_name}'.format(lang_name=lang_name))
 
                 # check if query starts with country

File diff suppressed because it is too large
+ 0 - 0
searx/data/engines_languages.json


+ 14 - 0
searx/engines/__init__.py

@@ -20,6 +20,8 @@ from os.path import realpath, dirname
 import sys
 from flask_babel import gettext
 from operator import itemgetter
+from json import loads
+from requests import get
 from searx import settings
 from searx import logger
 from searx.utils import load_module
@@ -33,10 +35,13 @@ engines = {}
 
 categories = {'general': []}
 
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
+
 engine_shortcuts = {}
 engine_default_args = {'paging': False,
                        'categories': ['general'],
                        'language_support': True,
+                       'supported_languages': [],
                        'safesearch': False,
                        'timeout': settings['outgoing']['request_timeout'],
                        'shortcut': '-',
@@ -85,6 +90,15 @@ def load_engine(engine_data):
                          .format(engine.name, engine_attr))
             sys.exit(1)
 
+    # assign supported languages from json file
+    if engine_data['name'] in languages:
+        setattr(engine, 'supported_languages', languages[engine_data['name']])
+
+    # assign language fetching method if auxiliary method exists
+    if hasattr(engine, '_fetch_supported_languages'):
+        setattr(engine, 'fetch_supported_languages',
+                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
+
     engine.stats = {
         'result_count': 0,
         'search_count': 0,

+ 3 - 2
searx/engines/archlinux.py

@@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a'
 
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
 def locale_to_lang_code(locale):
-    if locale.find('_') >= 0:
-        locale = locale.split('_')[0]
+    if locale.find('-') >= 0:
+        locale = locale.split('-')[0]
     return locale
 
 
@@ -95,6 +95,7 @@ main_langs = {
     'uk': 'Українська',
     'zh': '简体中文'
 }
+supported_languages = dict(lang_urls, **main_langs)
 
 
 # do search-request

+ 14 - 1
searx/engines/bing.py

@@ -21,6 +21,7 @@ from searx.engines.xpath import extract_text
 categories = ['general']
 paging = True
 language_support = True
+supported_languages_url = 'https://www.bing.com/account/general'
 
 # search-url
 base_url = 'https://www.bing.com/'
@@ -32,7 +33,7 @@ def request(query, params):
     offset = (params['pageno'] - 1) * 10 + 1
 
     if params['language'] != 'all':
-        query = u'language:{} {}'.format(params['language'].split('_')[0].upper(),
+        query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
                                          query.decode('utf-8')).encode('utf-8')
 
     search_path = search_string.format(
@@ -81,3 +82,15 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+    supported_languages = []
+    dom = html.fromstring(resp.text)
+    options = dom.xpath('//div[@id="limit-languages"]//input')
+    for option in options:
+        code = option.xpath('./@id')[0].replace('_', '-')
+        supported_languages.append(code)
+
+    return supported_languages

+ 2 - 1
searx/engines/bing_images.py

@@ -19,6 +19,7 @@ from urllib import urlencode
 from lxml import html
 from json import loads
 import re
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
 
 # engine dependent config
 categories = ['images']
@@ -53,7 +54,7 @@ def request(query, params):
     if params['language'] == 'all':
         language = 'en-US'
     else:
-        language = params['language'].replace('_', '-')
+        language = params['language']
 
     search_path = search_string.format(
         query=urlencode({'q': query}),

+ 2 - 1
searx/engines/bing_news.py

@@ -17,6 +17,7 @@ from datetime import datetime
 from dateutil import parser
 from lxml import etree
 from searx.utils import list_get
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url
 
 # engine dependent config
 categories = ['news']
@@ -74,7 +75,7 @@ def request(query, params):
     if params['language'] == 'all':
         language = 'en-US'
     else:
-        language = params['language'].replace('_', '-')
+        language = params['language']
 
     params['url'] = _get_url(query, language, offset, params['time_range'])
 

+ 22 - 0
searx/engines/dailymotion.py

@@ -15,6 +15,7 @@
 from urllib import urlencode
 from json import loads
 from datetime import datetime
+from requests import get
 
 # engine dependent config
 categories = ['videos']
@@ -27,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr
 embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
     'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
 
+supported_languages_url = 'https://api.dailymotion.com/languages'
+
 
 # do search-request
 def request(query, params):
@@ -74,3 +77,22 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+    supported_languages = {}
+
+    response_json = loads(resp.text)
+
+    for language in response_json['list']:
+        supported_languages[language['code']] = {}
+
+        name = language['native_name']
+        if name:
+            supported_languages[language['code']]['name'] = name
+        english_name = language['name']
+        if english_name:
+            supported_languages[language['code']]['english_name'] = english_name
+
+    return supported_languages

+ 33 - 5
searx/engines/duckduckgo.py

@@ -15,13 +15,15 @@
 
 from urllib import urlencode
 from lxml.html import fromstring
+from requests import get
+from json import loads
 from searx.engines.xpath import extract_text
-from searx.languages import language_codes
 
 # engine dependent config
 categories = ['general']
 paging = True
 language_support = True
+supported_languages_url = 'https://duckduckgo.com/d2030.js'
 time_range_support = True
 
 # search-url
@@ -46,19 +48,31 @@ def request(query, params):
 
     offset = (params['pageno'] - 1) * 30
 
+    # custom fixes for languages
     if params['language'] == 'all':
         locale = None
+    elif params['language'][:2] == 'ja':
+        locale = 'jp-jp'
+    elif params['language'][:2] == 'sl':
+        locale = 'sl-sl'
+    elif params['language'] == 'zh-TW':
+        locale = 'tw-tzh'
+    elif params['language'] == 'zh-HK':
+        locale = 'hk-tzh'
+    elif params['language'][-2:] == 'SA':
+        locale = 'xa-' + params['language'].split('-')[0]
+    elif params['language'][-2:] == 'GB':
+        locale = 'uk-' + params['language'].split('-')[0]
     else:
-        locale = params['language'].split('_')
+        locale = params['language'].split('-')
         if len(locale) == 2:
             # country code goes first
             locale = locale[1].lower() + '-' + locale[0].lower()
         else:
             # tries to get a country code from language
             locale = locale[0].lower()
-            lang_codes = [x[0] for x in language_codes]
-            for lc in lang_codes:
-                lc = lc.split('_')
+            for lc in supported_languages:
+                lc = lc.split('-')
                 if locale == lc[0]:
                     locale = lc[1].lower() + '-' + lc[0].lower()
                     break
@@ -102,3 +116,17 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+
+    # response is a js file with regions as an embedded object
+    response_page = resp.text
+    response_page = response_page[response_page.find('regions:{') + 8:]
+    response_page = response_page[:response_page.find('}') + 1]
+
+    regions_json = loads(response_page)
+    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+
+    return supported_languages

+ 2 - 1
searx/engines/duckduckgo_definitions.py

@@ -4,6 +4,7 @@ from re import compile, sub
 from lxml import html
 from searx.utils import html_to_text
 from searx.engines.xpath import extract_text
+from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
 
 url = 'https://api.duckduckgo.com/'\
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult):
 
 def request(query, params):
     params['url'] = url.format(query=urlencode({'q': query}))
-    params['headers']['Accept-Language'] = params['language']
+    params['headers']['Accept-Language'] = params['language'].split('-')[0]
     return params
 
 

+ 19 - 1
searx/engines/gigablast.py

@@ -14,6 +14,7 @@ from json import loads
 from random import randint
 from time import time
 from urllib import urlencode
+from lxml.html import fromstring
 
 # engine dependent config
 categories = ['general']
@@ -40,6 +41,8 @@ url_xpath = './/url'
 title_xpath = './/title'
 content_xpath = './/sum'
 
+supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
+
 
 # do search-request
 def request(query, params):
@@ -48,7 +51,9 @@ def request(query, params):
     if params['language'] == 'all':
         language = 'xx'
     else:
-        language = params['language'][0:2]
+        language = params['language'].replace('-', '_').lower()
+        if language.split('-')[0] != 'zh':
+            language = language.split('-')[0]
 
     if params['safesearch'] >= 1:
         safesearch = 1
@@ -82,3 +87,16 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+    supported_languages = []
+    dom = fromstring(resp.text)
+    links = dom.xpath('//span[@id="menu2"]/a')
+    for link in links:
+        code = link.xpath('./@href')[0][-2:]
+        if code != 'xx' and code not in supported_languages:
+            supported_languages.append(code)
+
+    return supported_languages

+ 19 - 1
searx/engines/google.py

@@ -103,6 +103,7 @@ map_hostname_start = 'maps.google.'
 maps_path = '/maps'
 redirect_path = '/url'
 images_path = '/images'
+supported_languages_url = 'https://www.google.com/preferences?#languages'
 
 # specific xpath variables
 results_xpath = '//div[@class="g"]'
@@ -167,8 +168,12 @@ def request(query, params):
         language = 'en'
         country = 'US'
         url_lang = ''
+    elif params['language'][:2] == 'jv':
+        language = 'jw'
+        country = 'ID'
+        url_lang = 'lang_jw'
     else:
-        language_array = params['language'].lower().split('_')
+        language_array = params['language'].lower().split('-')
         if len(language_array) == 2:
             country = language_array[1]
         else:
@@ -355,3 +360,16 @@ def attributes_to_html(attributes):
         retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
     retval = retval + '</table>'
     return retval
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+    supported_languages = {}
+    dom = html.fromstring(resp.text)
+    options = dom.xpath('//table//td/font/label/span')
+    for option in options:
+        code = option.xpath('./@id')[0][1:]
+        name = option.text.title()
+        supported_languages[code] = {"name": name}
+
+    return supported_languages

+ 3 - 1
searx/engines/google_news.py

@@ -12,6 +12,8 @@
 
 from lxml import html
 from urllib import urlencode
+from json import loads
+from searx.engines.google import _fetch_supported_languages, supported_languages_url
 
 # search-url
 categories = ['news']
@@ -50,7 +52,7 @@ def request(query, params):
                                       search_options=urlencode(search_options))
 
     if params['language'] != 'all':
-        language_array = params['language'].lower().split('_')
+        language_array = params['language'].lower().split('-')
         params['url'] += '&lr=lang_' + language_array[0]
 
     return params

+ 1 - 1
searx/engines/mediawiki.py

@@ -46,7 +46,7 @@ def request(query, params):
     if params['language'] == 'all':
         language = 'en'
     else:
-        language = params['language'].split('_')[0]
+        language = params['language'].split('-')[0]
 
     # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
     if any(x[1] == 'language' for x in format_strings):

+ 2 - 2
searx/engines/photon.py

@@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}'
 result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
 
 # list of supported languages
-allowed_languages = ['de', 'en', 'fr', 'it']
+supported_languages = ['de', 'en', 'fr', 'it']
 
 
 # do search-request
@@ -37,7 +37,7 @@ def request(query, params):
 
     if params['language'] != 'all':
         language = params['language'].split('_')[0]
-        if language in allowed_languages:
+        if language in supported_languages:
             params['url'] = params['url'] + "&lang=" + language
 
     # using searx User-Agent

+ 1 - 1
searx/engines/qwant.py

@@ -46,7 +46,7 @@ def request(query, params):
 
     # add language tag if specified
     if params['language'] != 'all':
-        params['url'] += '&locale=' + params['language'].lower()
+        params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
 
     return params
 

+ 1 - 1
searx/engines/startpage.py

@@ -47,7 +47,7 @@ def request(query, params):
 
     # set language if specified
     if params['language'] != 'all':
-        params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
+        params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
 
     return params
 

+ 10 - 4
searx/engines/subtitleseeker.py

@@ -22,7 +22,7 @@ language = ""
 
 # search-url
 url = 'http://www.subtitleseeker.com/'
-search_url = url + 'search/TITLES/{query}&p={pageno}'
+search_url = url + 'search/TITLES/{query}?p={pageno}'
 
 # specific xpath variables
 results_xpath = '//div[@class="boxRows"]'
@@ -43,10 +43,16 @@ def response(resp):
 
     search_lang = ""
 
-    if resp.search_params['language'] != 'all':
-        search_lang = [lc[1]
+    # dirty fix for languages named differenly in their site
+    if resp.search_params['language'][:2] == 'fa':
+        search_lang = 'Farsi'
+    elif resp.search_params['language'] == 'pt-BR':
+        search_lang = 'Brazilian'
+    elif resp.search_params['language'] != 'all':
+        search_lang = [lc[3]
                        for lc in language_codes
-                       if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
+                       if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
+        search_lang = search_lang[0].split(' (')[0]
 
     # parse results
     for result in dom.xpath(results_xpath):

+ 19 - 2
searx/engines/swisscows.py

@@ -13,6 +13,7 @@
 from json import loads
 from urllib import urlencode, unquote
 import re
+from lxml.html import fromstring
 
 # engine dependent config
 categories = ['general', 'images']
@@ -23,6 +24,8 @@ language_support = True
 base_url = 'https://swisscows.ch/'
 search_string = '?{query}&page={page}'
 
+supported_languages_url = base_url
+
 # regex
 regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
 regex_json_remove_start = re.compile(r'^initialData:\s*')
@@ -35,9 +38,11 @@ def request(query, params):
     if params['language'] == 'all':
         ui_language = 'browser'
         region = 'browser'
+    elif params['language'].split('-')[0] == 'no':
+        region = 'nb-NO'
     else:
-        region = params['language'].replace('_', '-')
-        ui_language = params['language'].split('_')[0]
+        region = params['language']
+        ui_language = params['language'].split('-')[0]
 
     search_path = search_string.format(
         query=urlencode({'query': query,
@@ -106,3 +111,15 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+    supported_languages = []
+    dom = fromstring(resp.text)
+    options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
+    for option in options:
+        code = option.xpath('./@data-val')[0]
+        supported_languages.append(code)
+
+    return supported_languages

+ 1 - 1
searx/engines/twitter.py

@@ -40,7 +40,7 @@ def request(query, params):
 
     # set language if specified
     if params['language'] != 'all':
-        params['cookies']['lang'] = params['language'].split('_')[0]
+        params['cookies']['lang'] = params['language'].split('-')[0]
     else:
         params['cookies']['lang'] = 'en'
 

+ 4 - 2
searx/engines/wikidata.py

@@ -14,6 +14,8 @@
 from searx import logger
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text
+from searx.utils import format_date_by_locale
+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
 
 from json import loads
 from lxml.html import fromstring
@@ -55,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
 
 
 def request(query, params):
-    language = params['language'].split('_')[0]
+    language = params['language'].split('-')[0]
     if language == 'all':
         language = 'en'
 
@@ -70,7 +72,7 @@ def response(resp):
     html = fromstring(resp.content)
     wikidata_ids = html.xpath(wikidata_ids_xpath)
 
-    language = resp.search_params['language'].split('_')[0]
+    language = resp.search_params['language'].split('-')[0]
     if language == 'all':
         language = 'en'
 

+ 27 - 2
searx/engines/wikipedia.py

@@ -12,6 +12,8 @@
 
 from json import loads
 from urllib import urlencode, quote
+from lxml.html import fromstring
+
 
 # search-url
 base_url = 'https://{language}.wikipedia.org/'
@@ -24,14 +26,16 @@ search_postfix = 'w/api.php?'\
     '&explaintext'\
     '&pithumbsize=300'\
     '&redirects'
+supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
 
 
 # set language in base_url
 def url_lang(lang):
-    if lang == 'all':
+    lang = lang.split('-')[0]
+    if lang == 'all' or lang not in supported_languages:
         language = 'en'
     else:
-        language = lang.split('_')[0]
+        language = lang
 
     return base_url.format(language=language)
 
@@ -111,3 +115,24 @@ def response(resp):
                     'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
 
     return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+    supported_languages = {}
+    dom = fromstring(resp.text)
+    tables = dom.xpath('//table[contains(@class,"sortable")]')
+    for table in tables:
+        # exclude header row
+        trs = table.xpath('.//tr')[1:]
+        for tr in trs:
+            td = tr.xpath('./td')
+            code = td[3].xpath('./a')[0].text
+            name = td[2].xpath('./a')[0].text
+            english_name = td[1].xpath('./a')[0].text
+            articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
+            # exclude languages with too few articles
+            if articles >= 100000:
+                supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
+
+    return supported_languages

+ 1 - 1
searx/engines/yacy.py

@@ -53,7 +53,7 @@ def request(query, params):
 
     # add language tag if specified
     if params['language'] != 'all':
-        params['url'] += '&lr=lang_' + params['language'].split('_')[0]
+        params['url'] += '&lr=lang_' + params['language'].split('-')[0]
 
     return params
 

+ 21 - 1
searx/engines/yahoo.py

@@ -27,6 +27,8 @@ base_url = 'https://search.yahoo.com/'
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
 search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
 
+supported_languages_url = 'https://search.yahoo.com/web/advanced'
+
 # specific xpath variables
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
 url_xpath = './/h3/a/@href'
@@ -72,7 +74,13 @@ def _get_url(query, offset, language, time_range):
 def _get_language(params):
     if params['language'] == 'all':
         return 'en'
-    return params['language'].split('_')[0]
+    elif params['language'][:2] == 'zh':
+        if params['language'] == 'zh' or params['language'] == 'zh-CH':
+            return 'szh'
+        else:
+            return 'tzh'
+    else:
+        return params['language'].split('-')[0]
 
 
 # do search-request
@@ -132,3 +140,15 @@ def response(resp):
 
     # return results
     return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+    supported_languages = []
+    dom = html.fromstring(resp.text)
+    options = dom.xpath('//div[@id="yschlang"]/span/label/input')
+    for option in options:
+        code = option.xpath('./@value')[0][5:].replace('_', '-')
+        supported_languages.append(code)
+
+    return supported_languages

+ 1 - 1
searx/engines/yahoo_news.py

@@ -12,7 +12,7 @@
 from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
-from searx.engines.yahoo import parse_url
+from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
 from datetime import datetime, timedelta
 import re
 from dateutil import parser

+ 4 - 2
searx/engines/yandex.py

@@ -22,7 +22,9 @@ language_support = True  # TODO
 
 default_tld = 'com'
 language_map = {'ru': 'ru',
-                'ua': 'uk',
+                'ua': 'ua',
+                'be': 'by',
+                'kk': 'kz',
                 'tr': 'com.tr'}
 
 # search-url
@@ -36,7 +38,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga
 
 
 def request(query, params):
-    lang = params['language'].split('_')[0]
+    lang = params['language'].split('-')[0]
     host = base_url.format(tld=language_map.get(lang) or default_tld)
     params['url'] = host + search_url.format(page=params['pageno'] - 1,
                                              query=urlencode({'text': query}))

+ 1 - 1
searx/engines/youtube_api.py

@@ -36,7 +36,7 @@ def request(query, params):
 
     # add language tag if specified
     if params['language'] != 'all':
-        params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
+        params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
 
     return params
 

+ 129 - 76
searx/languages.py

@@ -1,78 +1,131 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-
-(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
-'''
-
+# -*- coding: utf-8 -*-
 # list of language codes
+# this file is generated automatically by utils/update_search_languages.py
+
 language_codes = (
-    ("ar_XA", "Arabic", "Arabia"),
-    ("bg_BG", "Bulgarian", "Bulgaria"),
-    ("cs_CZ", "Czech", "Czech Republic"),
-    ("da_DK", "Danish", "Denmark"),
-    ("de_AT", "German", "Austria"),
-    ("de_CH", "German", "Switzerland"),
-    ("de_DE", "German", "Germany"),
-    ("el_GR", "Greek", "Greece"),
-    ("en_AU", "English", "Australia"),
-    ("en_CA", "English", "Canada"),
-    ("en_GB", "English", "United Kingdom"),
-    ("en_ID", "English", "Indonesia"),
-    ("en_IE", "English", "Ireland"),
-    ("en_IN", "English", "India"),
-    ("en_MY", "English", "Malaysia"),
-    ("en_NZ", "English", "New Zealand"),
-    ("en_PH", "English", "Philippines"),
-    ("en_SG", "English", "Singapore"),
-    ("en_US", "English", "United States"),
-    ("en_XA", "English", "Arabia"),
-    ("en_ZA", "English", "South Africa"),
-    ("es_AR", "Spanish", "Argentina"),
-    ("es_CL", "Spanish", "Chile"),
-    ("es_ES", "Spanish", "Spain"),
-    ("es_MX", "Spanish", "Mexico"),
-    ("es_US", "Spanish", "United States"),
-    ("es_XL", "Spanish", "Latin America"),
-    ("et_EE", "Estonian", "Estonia"),
-    ("fi_FI", "Finnish", "Finland"),
-    ("fr_BE", "French", "Belgium"),
-    ("fr_CA", "French", "Canada"),
-    ("fr_CH", "French", "Switzerland"),
-    ("fr_FR", "French", "France"),
-    ("he_IL", "Hebrew", "Israel"),
-    ("hr_HR", "Croatian", "Croatia"),
-    ("hu_HU", "Hungarian", "Hungary"),
-    ("it_IT", "Italian", "Italy"),
-    ("ja_JP", "Japanese", "Japan"),
-    ("ko_KR", "Korean", "Korea"),
-    ("lt_LT", "Lithuanian", "Lithuania"),
-    ("lv_LV", "Latvian", "Latvia"),
-    ("nb_NO", "Norwegian", "Norway"),
-    ("nl_BE", "Dutch", "Belgium"),
-    ("nl_NL", "Dutch", "Netherlands"),
-    ("oc_OC", "Occitan", "Occitan"),
-    ("pl_PL", "Polish", "Poland"),
-    ("pt_BR", "Portuguese", "Brazil"),
-    ("pt_PT", "Portuguese", "Portugal"),
-    ("ro_RO", "Romanian", "Romania"),
-    ("ru_RU", "Russian", "Russia"),
-    ("sk_SK", "Slovak", "Slovak Republic"),
-    ("sl_SL", "Slovenian", "Slovenia"),
-    ("sv_SE", "Swedish", "Sweden"),
-    ("th_TH", "Thai", "Thailand"),
-    ("tr_TR", "Turkish", "Turkey"),
-    ("uk_UA", "Ukrainian", "Ukraine"),
-    ("zh_CN", "Chinese", "China"),
-    ("zh_HK", "Chinese", "Hong Kong SAR"),
-    ("zh_TW", "Chinese", "Taiwan"))
+    (u"af", u"Afrikaans", u"", u""),
+    (u"am", u"አማርኛ", u"", u"Amharic"),
+    (u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
+    (u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
+    (u"be", u"Беларуская", u"", u"Belarusian"),
+    (u"bg-BG", u"Български", u"България", u"Bulgarian"),
+    (u"bn", u"বাংলা", u"", u"Bengali"),
+    (u"br", u"Brezhoneg", u"", u"Breton"),
+    (u"bs", u"Bosnian", u"", u"Bosnian"),
+    (u"ca", u"Català", u"", u"Catalan"),
+    (u"ca-CT", u"Català", u"", u"Catalan"),
+    (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
+    (u"ce", u"Нохчийн", u"", u"Chechen"),
+    (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
+    (u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
+    (u"cy", u"Cymraeg", u"", u"Welsh"),
+    (u"da-DK", u"Dansk", u"Danmark", u"Danish"),
+    (u"de", u"Deutsch", u"", u"German"),
+    (u"de-AT", u"Deutsch", u"Österreich", u"German"),
+    (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
+    (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
+    (u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
+    (u"en", u"English", u"", u"English"),
+    (u"en-AU", u"English", u"Australia", u"English"),
+    (u"en-CA", u"English", u"Canada", u"English"),
+    (u"en-GB", u"English", u"United Kingdom", u"English"),
+    (u"en-ID", u"English", u"Indonesia", u"English"),
+    (u"en-IE", u"English", u"Ireland", u"English"),
+    (u"en-IN", u"English", u"India", u"English"),
+    (u"en-MY", u"English", u"Malaysia", u"English"),
+    (u"en-NZ", u"English", u"New Zealand", u"English"),
+    (u"en-PH", u"English", u"Philippines", u"English"),
+    (u"en-SG", u"English", u"Singapore", u"English"),
+    (u"en-US", u"English", u"United States", u"English"),
+    (u"en-ZA", u"English", u"South Africa", u"English"),
+    (u"eo", u"Esperanto", u"", u"Esperanto"),
+    (u"es", u"Español", u"", u"Spanish"),
+    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
+    (u"es-CL", u"Español", u"Chile", u"Spanish"),
+    (u"es-CO", u"Español", u"Colombia", u"Spanish"),
+    (u"es-ES", u"Español", u"España", u"Spanish"),
+    (u"es-MX", u"Español", u"México", u"Spanish"),
+    (u"es-PE", u"Español", u"Perú", u"Spanish"),
+    (u"es-US", u"Español", u"Estados Unidos", u"Spanish"),
+    (u"et-EE", u"Eesti", u"Eesti", u"Estonian"),
+    (u"eu", u"Euskara", u"", u"Basque"),
+    (u"fa", u"فارسی", u"", u"Persian"),
+    (u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
+    (u"fr", u"Français", u"", u"French"),
+    (u"fr-BE", u"Français", u"Belgique", u"French"),
+    (u"fr-CA", u"Français", u"Canada", u"French"),
+    (u"fr-CH", u"Français", u"Suisse", u"French"),
+    (u"fr-FR", u"Français", u"France", u"French"),
+    (u"ga", u"Gaeilge", u"", u"Irish"),
+    (u"gl", u"Galego", u"", u"Galician"),
+    (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
+    (u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
+    (u"hi", u"हिन्दी", u"", u"Hindi"),
+    (u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
+    (u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
+    (u"hy", u"Հայերեն", u"", u"Armenian"),
+    (u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
+    (u"is", u"Íslenska", u"", u""),
+    (u"it", u"Italiano", u"", u"Italian"),
+    (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
+    (u"it-IT", u"Italiano", u"Italia", u"Italian"),
+    (u"iw", u"עברית", u"", u""),
+    (u"ja-JP", u"日本語", u"日本", u"Japanese"),
+    (u"ka", u"ქართული", u"", u"Georgian"),
+    (u"kk", u"Қазақша", u"", u"Kazakh"),
+    (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
+    (u"ko-KR", u"한국어", u"대한민국", u"Korean"),
+    (u"la", u"Latina", u"", u"Latin"),
+    (u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
+    (u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
+    (u"mi", u"Reo Māori", u"", u"Maori"),
+    (u"min", u"Minangkabau", u"", u"Minangkabau"),
+    (u"mk", u"Македонски", u"", u"Macedonian"),
+    (u"mn", u"Монгол", u"", u"Mongolian"),
+    (u"mr", u"मराठी", u"", u"Marathi"),
+    (u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
+    (u"mt", u"Malti", u"", u"Maltese"),
+    (u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
+    (u"nl", u"Nederlands", u"", u"Dutch"),
+    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
+    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
+    (u"nn", u"Nynorsk", u"", u"Norwegian"),
+    (u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
+    (u"oc", u"Occitan", u"", u"Occitan"),
+    (u"or", u"Oriya", u"", u"Oriya"),
+    (u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
+    (u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
+    (u"ps", u"Pushto", u"", u"Pushto"),
+    (u"pt", u"Português", u"", u"Portuguese"),
+    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
+    (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
+    (u"ro-RO", u"Română", u"România", u"Romanian"),
+    (u"ru-RU", u"Русский", u"Россия", u"Russian"),
+    (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
+    (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
+    (u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
+    (u"sl", u"Slovenščina", u"", u"Slovenian"),
+    (u"sr", u"Српски / Srpski", u"", u"Serbian"),
+    (u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
+    (u"sw", u"Kiswahili", u"", u""),
+    (u"ta", u"தமிழ்", u"", u"Tamil"),
+    (u"th-TH", u"ไทย", u"ไทย", u"Thai"),
+    (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
+    (u"tl-PH", u"Filipino", u"Pilipinas", u""),
+    (u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
+    (u"tt", u"Татарча", u"", u"Tatar"),
+    (u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
+    (u"ur", u"اردو", u"", u"Urdu"),
+    (u"uz", u"O‘zbek", u"", u"Uzbek"),
+    (u"ve", u"Venda", u"", u"Venda"),
+    (u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"),
+    (u"vo", u"Volapük", u"", u"Volapük"),
+    (u"wa", u"Walon", u"", u"Walloon"),
+    (u"war", u"Winaray", u"", u"Waray-Waray"),
+    (u"xh", u"Xhosa", u"", u"Xhosa"),
+    (u"zh", u"中文", u"", u"Chinese"),
+    (u"zh-CN", u"中文", u"中国", u"Chinese"),
+    (u"zh-HK", u"中文", u"香港", u"Chinese"),
+    (u"zh-TW", u"中文", u"台湾", u"Chinese"),
+    (u"zu", u"Isi-Zulu", u"", u"Zulu")
+)

+ 21 - 2
searx/preferences.py

@@ -95,6 +95,25 @@ class MultipleChoiceSetting(EnumStringSetting):
         resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
 
 
+class SearchLanguageSetting(EnumStringSetting):
+    """Available choices may change, so user's value may not be in choices anymore"""
+
+    def parse(self, data):
+        if data not in self.choices and data != self.value:
+            # hack to give some backwards compatibility with old language cookies
+            data = str(data).replace('_', '-')
+            lang = data.split('-')[0]
+            if data in self.choices:
+                pass
+            elif lang in self.choices:
+                data = lang
+            elif data == 'ar-XA':
+                data = 'ar-SA'
+            else:
+                data = self.value
+        self.value = data
+
+
 class MapSetting(Setting):
     """Setting of a value that has to be translated in order to be storable"""
 
@@ -216,8 +235,8 @@ class Preferences(object):
         super(Preferences, self).__init__()
 
         self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories),
-                                   'language': EnumStringSetting(settings['search']['language'],
-                                                                 choices=LANGUAGE_CODES),
+                                   'language': SearchLanguageSetting(settings['search']['language'],
+                                                                     choices=LANGUAGE_CODES),
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
                                                                choices=settings['locales'].keys() + ['']),
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],

+ 7 - 4
searx/query.py

@@ -71,21 +71,24 @@ class RawTextQuery(object):
                 # check if any language-code is equal with
                 # declared language-codes
                 for lc in language_codes:
-                    lang_id, lang_name, country = map(str.lower, lc)
+                    lang_id, lang_name, country, english_name = map(unicode.lower, lc)
 
                     # if correct language-code is found
                     # set it as new search-language
                     if lang == lang_id\
                        or lang_id.startswith(lang)\
                        or lang == lang_name\
+                       or lang == english_name\
                        or lang.replace('_', ' ') == country:
                         parse_next = True
-                        self.languages.append(lang)
-                        break
+                        self.languages.append(lang_id)
+                        # to ensure best match (first match is not necessarily the best one)
+                        if lang == lang_id:
+                            break
 
             # this force a engine or category
             if query_part[0] == '!' or query_part[0] == '?':
-                prefix = query_part[1:].replace('_', ' ')
+                prefix = query_part[1:].replace('-', ' ')
 
                 # check if prefix is equal with engine shortcut
                 if prefix in engine_shortcuts:

+ 5 - 1
searx/search.py

@@ -211,10 +211,14 @@ def get_search_query_from_webapp(preferences, form):
     # set query
     query = raw_text_query.getSearchQuery()
 
-    # get last selected language in query, if possible
+    # set specific language if set on request, query or preferences
     # TODO support search with multible languages
     if len(raw_text_query.languages):
         query_lang = raw_text_query.languages[-1]
+    elif 'language' in form:
+        query_lang = form.get('language')
+    else:
+        query_lang = preferences.get_value('language')
 
     query_time_range = form.get('time_range')
 

+ 5 - 0
searx/static/plugins/js/search_on_category_select.js

@@ -15,5 +15,10 @@ $(document).ready(function() {
                 $('#search_form').submit();
             }
         });
+        $('#language').change(function(e) {
+            if($('#q').val()) {
+                $('#search_form').submit();
+            }
+        });
     }
 });

+ 3 - 3
searx/templates/courgette/preferences.html

@@ -13,9 +13,9 @@
         <legend>{{ _('Search language') }}</legend>
         <p>
             <select name='language'>
-                <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
-                {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
-                <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
+                <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+                {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
+                <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
                 {% endfor %}
             </select>
         </p>

+ 3 - 3
searx/templates/legacy/preferences.html

@@ -14,9 +14,9 @@
         <legend>{{ _('Search language') }}</legend>
         <p>
         <select name='language'>
-            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
-            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
-            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
+            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+            {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
+            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
             {% endfor %}
         </select>
         </p>

+ 1 - 0
searx/templates/oscar/advanced.html

@@ -6,4 +6,5 @@
 <div id="advanced-search-container">
     {% include 'oscar/categories.html' %}
     {% include 'oscar/time-range.html' %}
+    {% include 'oscar/languages.html' %}
 </div>

+ 12 - 0
searx/templates/oscar/languages.html

@@ -0,0 +1,12 @@
+{% if preferences %}
+<select class="form-control" name='language'>
+{% else %}
+<select class="time_range" id='language' name='language'>
+{% endif %}
+	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+		{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
+		<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
+			{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
+		</option>
+		{% endfor %}
+</select>

+ 5 - 6
searx/templates/oscar/preferences.html

@@ -40,12 +40,7 @@
                     {% set language_label = _('Search language') %}
                     {% set language_info = _('What language do you prefer for search?') %}
                     {{ preferences_item_header(language_info, language_label, rtl) }}
-                        <select class="form-control" name='language'>
-                            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
-                            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
-                            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
-                            {% endfor %}
-                        </select>
+						{% include 'oscar/languages.html' %}
                     {{ preferences_item_footer(language_info, language_label, rtl) }}
 
                     {% set locale_label = _('Interface language') %}
@@ -153,6 +148,7 @@
 				    <th>{{ _("Allow") }}</th>
 				    <th>{{ _("Engine name") }}</th>
 				    <th>{{ _("Shortcut") }}</th>
+				    <th>{{ _("Language support") }}</th>
 				    <th>{{ _("SafeSearch") }}</th>
 				    <th>{{ _("Time range") }}</th>
 				    <th>{{ _("Avg. time") }}</th>
@@ -161,6 +157,7 @@
 				    <th>{{ _("Max time") }}</th>
 				    <th>{{ _("Avg. time") }}</th>
 				    <th>{{ _("SafeSearch") }}</th>
+				    <th>{{ _("Language support") }}</th>
 				    <th>{{ _("Shortcut") }}</th>
 				    <th>{{ _("Engine name") }}</th>
 				    <th>{{ _("Allow") }}</th>
@@ -175,6 +172,7 @@
                                     </td>
                                     <th>{{ search_engine.name }}</th>
 				    <td>{{ shortcuts[search_engine.name] }}</td>
+				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
 				    <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
@@ -183,6 +181,7 @@
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
 				    <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
 				    <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
+				    <td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
 				    <td>{{ shortcuts[search_engine.name] }}</td>
                                     <th>{{ search_engine.name }}</th>
                                     <td class="onoff-checkbox">

+ 3 - 3
searx/templates/pix-art/preferences.html

@@ -9,9 +9,9 @@
         <legend>{{ _('Search language') }}</legend>
         <p>
         <select name='language'>
-            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
-            {% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
-            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
+            <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+            {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
+            <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
             {% endfor %}
         </select>
         </p>

+ 8 - 4
searx/webapp.py

@@ -330,6 +330,10 @@ def render(template_name, override_theme=None, **kwargs):
 
     kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
 
+    kwargs['language_codes'] = language_codes
+    if 'current_language' not in kwargs:
+        kwargs['current_language'] = request.preferences.get_value('language')
+
     # override url_for function in templates
     kwargs['url_for'] = url_for_theme
 
@@ -510,6 +514,7 @@ def index():
         answers=result_container.answers,
         infoboxes=result_container.infoboxes,
         paging=result_container.paging,
+        current_language=search_query.lang,
         base_url=get_base_url(),
         theme=get_current_theme_name(),
         favicons=global_favicons[themes.index(get_current_theme_name())]
@@ -552,7 +557,7 @@ def autocompleter():
         if not language or language == 'all':
             language = 'en'
         else:
-            language = language.split('_')[0]
+            language = language.split('-')[0]
         # run autocompletion
         raw_results.extend(completer(raw_text_query.getSearchQuery(), language))
 
@@ -615,9 +620,7 @@ def preferences():
     return render('preferences.html',
                   locales=settings['locales'],
                   current_locale=get_locale(),
-                  current_language=lang,
                   image_proxy=image_proxy,
-                  language_codes=language_codes,
                   engines_by_category=categories,
                   stats=stats,
                   answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
@@ -627,7 +630,8 @@ def preferences():
                   themes=themes,
                   plugins=plugins,
                   allowed_plugins=allowed_plugins,
-                  theme=get_current_theme_name())
+                  theme=get_current_theme_name(),
+                  preferences=True)
 
 
 @app.route('/image_proxy', methods=['GET'])

+ 3 - 3
tests/robot/test_basic.robot

@@ -101,11 +101,11 @@ Change search language
     Page Should Contain  about
     Page Should Contain  preferences
     Go To  http://localhost:11111/preferences
-    List Selection Should Be  language  Automatic
-    Select From List  language  Turkish (Turkey) - tr_TR
+    List Selection Should Be  language  Default language
+    Select From List  language  Türkçe (Türkiye) - tr-TR
     Submit Preferences
     Go To  http://localhost:11111/preferences
-    List Selection Should Be  language  Turkish (Turkey) - tr_TR
+    List Selection Should Be  language  Türkçe (Türkiye) - tr-TR
 
 Change autocomplete
     Page Should Contain  about

+ 32 - 0
tests/unit/engines/test_bing.py

@@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], 'This should be the title')
         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
         self.assertEqual(results[0]['content'], 'This should be the content.')
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        results = bing._fetch_supported_languages(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)
+
+        html = """
+        <html>
+            <body>
+                <form>
+                    <div id="limit-languages">
+                        <div>
+                            <div><input id="es" value="es"></input></div>
+                        </div>
+                        <div>
+                            <div><input id="pt_BR" value="pt_BR"></input></div>
+                            <div><input id="pt_PT" value="pt_PT"></input></div>
+                        </div>
+                    </div>
+                </form>
+            </body>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = bing._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 3)
+        self.assertIn('es', languages)
+        self.assertIn('pt-BR', languages)
+        self.assertIn('pt-PT', languages)

+ 37 - 0
tests/unit/engines/test_dailymotion.py

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from collections import defaultdict
 import mock
 from searx.engines import dailymotion
@@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
         results = dailymotion.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
+
+    def test_fetch_supported_languages(self):
+        json = r"""
+        {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
+                  "localized_name":"Afrikaans","display_name":"Afrikaans"},
+                 {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
+                  "localized_name":"Arabic","display_name":"Arabic"},
+                 {"code":"la","name":"Latin","native_name":null,
+                  "localized_name":"Latin","display_name":"Latin"}
+        ]}
+        """
+        response = mock.Mock(text=json)
+        languages = dailymotion._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 3)
+        self.assertIn('af', languages)
+        self.assertIn('ar', languages)
+        self.assertIn('la', languages)
+
+        self.assertEqual(type(languages['af']), dict)
+        self.assertEqual(type(languages['ar']), dict)
+        self.assertEqual(type(languages['la']), dict)
+
+        self.assertIn('name', languages['af'])
+        self.assertIn('name', languages['ar'])
+        self.assertNotIn('name', languages['la'])
+
+        self.assertIn('english_name', languages['af'])
+        self.assertIn('english_name', languages['ar'])
+        self.assertIn('english_name', languages['la'])
+
+        self.assertEqual(languages['af']['name'], 'Afrikaans')
+        self.assertEqual(languages['af']['english_name'], 'Afrikaans')
+        self.assertEqual(languages['ar']['name'], u'العربية')
+        self.assertEqual(languages['ar']['english_name'], 'Arabic')
+        self.assertEqual(languages['la']['english_name'], 'Latin')

+ 26 - 1
tests/unit/engines/test_duckduckgo.py

@@ -11,7 +11,7 @@ class TestDuckduckgoEngine(SearxTestCase):
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
-        dicto['language'] = 'de_CH'
+        dicto['language'] = 'de-CH'
         dicto['time_range'] = ''
         params = duckduckgo.request(query, dicto)
         self.assertIn('url', params)
@@ -19,6 +19,17 @@ class TestDuckduckgoEngine(SearxTestCase):
         self.assertIn('duckduckgo.com', params['url'])
         self.assertIn('ch-de', params['url'])
 
+        # when ddg uses non standard code
+        dicto['language'] = 'en-GB'
+        params = duckduckgo.request(query, dicto)
+        self.assertIn('uk-en', params['url'])
+
+        # no country given
+        duckduckgo.supported_languages = ['de-CH', 'en-US']
+        dicto['language'] = 'de'
+        params = duckduckgo.request(query, dicto)
+        self.assertIn('ch-de', params['url'])
+
     def test_no_url_in_request_year_time_range(self):
         dicto = defaultdict(dict)
         query = 'test_query'
@@ -73,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], 'This is the title')
         self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
         self.assertEqual(results[0]['content'], 'This should be the content.')
+
+    def test_fetch_supported_languages(self):
+        js = """some code...regions:{
+        "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
+        }some more code..."""
+        response = mock.Mock(text=js)
+        languages = duckduckgo._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 5)
+        self.assertIn('wt-WT', languages)
+        self.assertIn('es-AR', languages)
+        self.assertIn('en-AU', languages)
+        self.assertIn('de-AT', languages)
+        self.assertIn('fr-BE', languages)

+ 4 - 0
tests/unit/engines/test_duckduckgo_definitions.py

@@ -21,10 +21,14 @@ class TestDDGDefinitionsEngine(SearxTestCase):
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
+        dicto['language'] = 'es'
         params = duckduckgo_definitions.request(query, dicto)
         self.assertIn('url', params)
         self.assertIn(query, params['url'])
         self.assertIn('duckduckgo.com', params['url'])
+        self.assertIn('headers', params)
+        self.assertIn('Accept-Language', params['headers'])
+        self.assertIn('es', params['headers']['Accept-Language'])
 
     def test_response(self):
         self.assertRaises(AttributeError, duckduckgo_definitions.response, None)

+ 31 - 0
tests/unit/engines/test_gigablast.py

@@ -15,6 +15,12 @@ class TestGigablastEngine(SearxTestCase):
         self.assertTrue('url' in params)
         self.assertTrue(query in params['url'])
         self.assertTrue('gigablast.com' in params['url'])
+        self.assertTrue('xx' in params['url'])
+
+        dicto['language'] = 'en-US'
+        params = gigablast.request(query, dicto)
+        self.assertTrue('en' in params['url'])
+        self.assertFalse('en-US' in params['url'])
 
     def test_response(self):
         self.assertRaises(AttributeError, gigablast.response, None)
@@ -83,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], 'South by Southwest 2016')
         self.assertEqual(results[0]['url'], 'www.sxsw.com')
         self.assertEqual(results[0]['content'], 'This should be the content.')
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        results = gigablast._fetch_supported_languages(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)
+
+        html = """
+        <html>
+            <body>
+                <span id="menu2">
+                    <a href="/search?&rxikd=1&qlang=xx"></a>
+                    <a href="/search?&rxikd=1&qlang=en"></a>
+                    <a href="/search?&rxikd=1&qlang=fr"></a>
+                </span>
+            </body>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = gigablast._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 2)
+        self.assertIn('en', languages)
+        self.assertIn('fr', languages)

+ 58 - 1
tests/unit/engines/test_google.py

@@ -18,7 +18,7 @@ class TestGoogleEngine(SearxTestCase):
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
-        dicto['language'] = 'fr_FR'
+        dicto['language'] = 'fr-FR'
         dicto['time_range'] = ''
         params = google.request(query, dicto)
         self.assertIn('url', params)
@@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], '')
         self.assertEqual(results[0]['content'], '')
         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        languages = google._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 0)
+
+        html = u"""
+        <html>
+            <body>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+                                <font>
+                                    <label>
+                                        <span id="ten">English</span>
+                                    </label>
+                                </font>
+                            </td>
+                            <td>
+                                <font>
+                                    <label>
+                                        <span id="tzh-CN">中文 (简体)</span>
+                                    </label>
+                                    <label>
+                                        <span id="tzh-TW">中文 (繁體)</span>
+                                    </label>
+                                </font>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </body>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = google._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 3)
+
+        self.assertIn('en', languages)
+        self.assertIn('zh-CN', languages)
+        self.assertIn('zh-TW', languages)
+
+        self.assertEquals(type(languages['en']), dict)
+        self.assertEquals(type(languages['zh-CN']), dict)
+        self.assertEquals(type(languages['zh-TW']), dict)
+
+        self.assertIn('name', languages['en'])
+        self.assertIn('name', languages['zh-CN'])
+        self.assertIn('name', languages['zh-TW'])
+
+        self.assertEquals(languages['en']['name'], 'English')
+        self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
+        self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

+ 1 - 1
tests/unit/engines/test_qwant.py

@@ -10,7 +10,7 @@ class TestQwantEngine(SearxTestCase):
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto['pageno'] = 0
-        dicto['language'] = 'fr_FR'
+        dicto['language'] = 'fr-FR'
         qwant.categories = ['']
         params = qwant.request(query, dicto)
         self.assertIn('url', params)

+ 6 - 1
tests/unit/engines/test_subtitleseeker.py

@@ -10,6 +10,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
+        dicto['language'] = 'fr-FR'
         params = subtitleseeker.request(query, dicto)
         self.assertTrue('url' in params)
         self.assertTrue(query in params['url'])
@@ -17,7 +18,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
 
     def test_response(self):
         dicto = defaultdict(dict)
-        dicto['language'] = 'fr_FR'
+        dicto['language'] = 'fr-FR'
         response = mock.Mock(search_params=dicto)
 
         self.assertRaises(AttributeError, subtitleseeker.response, None)
@@ -68,6 +69,10 @@ class TestSubtitleseekerEngine(SearxTestCase):
         self.assertIn('1039 Subs', results[0]['content'])
         self.assertIn('Alternative Title', results[0]['content'])
 
+        dicto['language'] = 'pt-BR'
+        results = subtitleseeker.response(response)
+        self.assertEqual(results[0]['url'], 'http://this.is.the.url/Brazilian/')
+
         html = """
         <div class="boxRows">
             <div class="boxRowsInner" style="width:600px;">

+ 28 - 1
tests/unit/engines/test_swisscows.py

@@ -10,7 +10,7 @@ class TestSwisscowsEngine(SearxTestCase):
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
-        dicto['language'] = 'de_DE'
+        dicto['language'] = 'de-DE'
         params = swisscows.request(query, dicto)
         self.assertTrue('url' in params)
         self.assertTrue(query in params['url'])
@@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
         self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
         self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
         self.assertEqual(results[2]['template'], 'images.html')
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        languages = swisscows._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 0)
+
+        html = """
+        <html>
+            <div id="regions-popup">
+                <div>
+                    <ul>
+                        <li><a data-val="browser"></a></li>
+                        <li><a data-val="de-CH"></a></li>
+                        <li><a data-val="fr-CH"></a></li>
+                    </ul>
+                </div>
+            </div>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = swisscows._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 3)
+        self.assertIn('de-CH', languages)
+        self.assertIn('fr-CH', languages)

+ 100 - 1
tests/unit/engines/test_wikipedia.py

@@ -8,9 +8,11 @@ from searx.testing import SearxTestCase
 class TestWikipediaEngine(SearxTestCase):
 
     def test_request(self):
+        wikipedia.supported_languages = ['fr', 'en']
+
         query = 'test_query'
         dicto = defaultdict(dict)
-        dicto['language'] = 'fr_FR'
+        dicto['language'] = 'fr-FR'
         params = wikipedia.request(query, dicto)
         self.assertIn('url', params)
         self.assertIn(query, params['url'])
@@ -27,6 +29,10 @@ class TestWikipediaEngine(SearxTestCase):
         params = wikipedia.request(query, dicto)
         self.assertIn('en', params['url'])
 
+        dicto['language'] = 'xx'
+        params = wikipedia.request(query, dicto)
+        self.assertIn('en', params['url'])
+
     def test_response(self):
         dicto = defaultdict(dict)
         dicto['language'] = 'fr'
@@ -158,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
         self.assertEqual(len(results), 2)
         self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
         self.assertIn(u'披头士乐队...', results[1]['content'])
+
+    def test_fetch_supported_languages(self):
+        html = u"""<html></html>"""
+        response = mock.Mock(text=html)
+        languages = wikipedia._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 0)
+
+        html = u"""
+        <html>
+            <body>
+                <div>
+                    <div>
+                        <h3>Table header</h3>
+                        <table class="sortable jquery-tablesorter">
+                            <thead>
+                                <tr>
+                                    <th>N</th>
+                                    <th>Language</th>
+                                    <th>Language (local)</th>
+                                    <th>Wiki</th>
+                                    <th>Articles</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr>
+                                    <td>2</td>
+                                    <td><a>Swedish</a></td>
+                                    <td><a>Svenska</a></td>
+                                    <td><a>sv</a></td>
+                                    <td><a><b>3000000</b></a></td>
+                                </tr>
+                                <tr>
+                                    <td>3</td>
+                                    <td><a>Cebuano</a></td>
+                                    <td><a>Sinugboanong Binisaya</a></td>
+                                    <td><a>ceb</a></td>
+                                    <td><a><b>3000000</b></a></td>
+                                </tr>
+                            </tbody>
+                        </table>
+                        <h3>Table header</h3>
+                        <table class="sortable jquery-tablesorter">
+                            <thead>
+                                <tr>
+                                    <th>N</th>
+                                    <th>Language</th>
+                                    <th>Language (local)</th>
+                                    <th>Wiki</th>
+                                    <th>Articles</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr>
+                                    <td>2</td>
+                                    <td><a>Norwegian (Bokmål)</a></td>
+                                    <td><a>Norsk (Bokmål)</a></td>
+                                    <td><a>no</a></td>
+                                    <td><a><b>100000</b></a></td>
+                                </tr>
+                            </tbody>
+                        </table>
+                    </div>
+                </div>
+            </body>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = wikipedia._fetch_supported_languages(response)
+        self.assertEqual(type(languages), dict)
+        self.assertEqual(len(languages), 3)
+
+        self.assertIn('sv', languages)
+        self.assertIn('ceb', languages)
+        self.assertIn('no', languages)
+
+        self.assertEqual(type(languages['sv']), dict)
+        self.assertEqual(type(languages['ceb']), dict)
+        self.assertEqual(type(languages['no']), dict)
+
+        self.assertIn('name', languages['sv'])
+        self.assertIn('english_name', languages['sv'])
+        self.assertIn('articles', languages['sv'])
+
+        self.assertEqual(languages['sv']['name'], 'Svenska')
+        self.assertEqual(languages['sv']['english_name'], 'Swedish')
+        self.assertEqual(languages['sv']['articles'], 3000000)
+        self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
+        self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
+        self.assertEqual(languages['ceb']['articles'], 3000000)
+        self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
+        self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
+        self.assertEqual(languages['no']['articles'], 100000)

+ 30 - 0
tests/unit/engines/test_yahoo.py

@@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
         results = yahoo.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
+
+    def test_fetch_supported_languages(self):
+        html = """<html></html>"""
+        response = mock.Mock(text=html)
+        results = yahoo._fetch_supported_languages(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)
+
+        html = """
+        <html>
+            <div>
+                <div id="yschlang">
+                    <span>
+                        <label><input value="lang_ar"></input></label>
+                    </span>
+                    <span>
+                        <label><input value="lang_zh_chs"></input></label>
+                        <label><input value="lang_zh_cht"></input></label>
+                    </span>
+                </div>
+            </div>
+        </html>
+        """
+        response = mock.Mock(text=html)
+        languages = yahoo._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 3)
+        self.assertIn('ar', languages)
+        self.assertIn('zh-chs', languages)
+        self.assertIn('zh-cht', languages)

+ 22 - 1
tests/unit/test_preferences.py

@@ -1,4 +1,4 @@
-from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException,
+from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting,
                                MultipleChoiceSetting, PluginsSetting, ValidationException)
 from searx.testing import SearxTestCase
 
@@ -88,6 +88,27 @@ class TestSettings(SearxTestCase):
         setting.parse('2')
         self.assertEquals(setting.get_value(), ['2'])
 
+    # search language settings
+    def test_lang_setting_valid_choice(self):
+        setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
+        setting.parse('de')
+        self.assertEquals(setting.get_value(), 'de')
+
+    def test_lang_setting_invalid_choice(self):
+        setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
+        setting.parse('xx')
+        self.assertEquals(setting.get_value(), 'all')
+
+    def test_lang_setting_old_cookie_choice(self):
+        setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
+        setting.parse('es_XA')
+        self.assertEquals(setting.get_value(), 'es')
+
+    def test_lang_setting_old_cookie_format(self):
+        setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
+        setting.parse('es_ES')
+        self.assertEquals(setting.get_value(), 'es-ES')
+
     # plugins settings
     def test_plugins_setting_all_default_enabled(self):
         plugin1 = PluginStub('plugin1', True)

+ 171 - 0
utils/fetch_languages.py

@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+
+# This script generates languages.py from intersecting each engine's supported languages.
+#
+# The country names are obtained from http://api.geonames.org which requires registering as a user.
+#
+# Output files (engines_languages.json and languages.py)
+# are written in current directory to avoid overwriting in case something goes wrong.
+
+from requests import get
+from urllib import urlencode
+from lxml.html import fromstring
+from json import loads, dumps
+import io
+from sys import path
+path.append('../searx')  # noqa
+from searx.engines import engines
+
+# Geonames API for country names.
+geonames_user = ''  # ADD USER NAME HERE
+country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}'
+
+# Output files.
+engines_languages_file = 'engines_languages.json'
+languages_file = 'languages.py'
+
+engines_languages = {}
+languages = {}
+
+
+# To filter out invalid codes and dialects.
+def valid_code(lang_code):
+    # filter invalid codes
+    # sl-SL is technically not invalid, but still a mistake
+    invalid_codes = ['sl-SL', 'wt-WT', 'jw']
+    invalid_countries = ['UK', 'XA', 'XL']
+    if lang_code[:2] == 'xx'\
+       or lang_code in invalid_codes\
+       or lang_code[-2:] in invalid_countries\
+       or is_dialect(lang_code):
+        return False
+
+    return True
+
+
+# Language codes with any additional tags other than language and country.
+def is_dialect(lang_code):
+    lang_code = lang_code.split('-')
+    if len(lang_code) > 2 or len(lang_code[0]) > 3:
+        return True
+    if len(lang_code) == 2 and len(lang_code[1]) > 2:
+        return True
+
+    return False
+
+
+# Get country name in specified language.
+def get_country_name(locale):
+    if geonames_user is '':
+        return ''
+
+    locale = locale.split('-')
+    if len(locale) != 2:
+        return ''
+
+    url = country_names_url.format(parameters=urlencode({'lang': locale[0],
+                                                         'country': locale[1],
+                                                         'username': geonames_user}))
+    response = get(url)
+    json = loads(response.text)
+    content = json.get('geonames', None)
+    if content is None or len(content) != 1:
+        print "No country name found for " + locale[0] + "-" + locale[1]
+        return ''
+
+    return content[0].get('countryName', '')
+
+
+# Fetchs supported languages for each engine and writes json file with those.
+def fetch_supported_languages():
+    for engine_name in engines:
+        if hasattr(engines[engine_name], 'fetch_supported_languages'):
+            try:
+                engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
+            except Exception as e:
+                print e
+
+    # write json file
+    with io.open(engines_languages_file, "w", encoding="utf-8") as f:
+        f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
+
+
+# Join all language lists.
+# Iterate all languages supported by each engine.
+def join_language_lists():
+    # include wikipedia first for more accurate language names
+    languages.update({code: lang for code, lang
+                      in engines_languages['wikipedia'].iteritems()
+                      if valid_code(code)})
+
+    for engine_name in engines_languages:
+        for locale in engines_languages[engine_name]:
+            if not valid_code(locale):
+                continue
+
+            # if language is not on list or if it has no name yet
+            if locale not in languages or not languages[locale].get('name'):
+                if isinstance(engines_languages[engine_name], dict):
+                    languages[locale] = engines_languages[engine_name][locale]
+                else:
+                    languages[locale] = {}
+
+    # get locales that have no name or country yet
+    for locale in languages.keys():
+        # try to get language names
+        if not languages[locale].get('name'):
+            name = languages.get(locale.split('-')[0], {}).get('name', None)
+            if name:
+                languages[locale]['name'] = name
+            else:
+                # filter out locales with no name
+                del languages[locale]
+                continue
+
+        # try to get language name in english
+        if not languages[locale].get('english_name'):
+            languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
+
+        # try to get country name
+        if locale.find('-') > 0 and not languages[locale].get('country'):
+            languages[locale]['country'] = get_country_name(locale) or ''
+
+
+# Remove countryless language if language is featured in only one country.
+def filter_single_country_languages():
+    prev_lang = None
+    for code in sorted(languages):
+        lang = code.split('-')[0]
+        if lang == prev_lang:
+            countries += 1
+        else:
+            if prev_lang is not None and countries == 1:
+                del languages[prev_lang]
+            countries = 0
+            prev_lang = lang
+
+
+# Write languages.py.
+def write_languages_file():
+    new_file = open(languages_file, 'w')
+    file_content = '# -*- coding: utf-8 -*-\n'\
+                   + '# list of language codes\n'\
+                   + '# this file is generated automatically by utils/update_search_languages.py\n'\
+                   + '\nlanguage_codes = ('
+    for code in sorted(languages):
+        file_content += '\n    (u"' + code + '"'\
+                        + ', u"' + languages[code]['name'].split(' (')[0] + '"'\
+                        + ', u"' + languages[code].get('country', '') + '"'\
+                        + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),'
+    # remove last comma
+    file_content = file_content[:-1]
+    file_content += '\n)\n'
+    new_file.write(file_content.encode('utf8'))
+    new_file.close()
+
+
+if __name__ == "__main__":
+    fetch_supported_languages()
+    join_language_lists()
+    filter_single_country_languages()
+    write_languages_file()

Some files were not shown because too many files changed in this diff