Browse Source

Merge pull request #2269 from return42/locale-revision

Revision of the locale- and language- handling in SearXNG
Markus Heiser 2 years ago
parent
commit
f950119ca8
75 changed files with 7824 additions and 6415 deletions
  1. 1 1
      .github/workflows/data-update.yml
  2. 1 1
      docs/admin/engines/configured_engines.rst
  3. 5 2
      docs/admin/engines/settings.rst
  4. 5 0
      docs/conf.py
  5. 36 6
      docs/dev/engine_overview.rst
  6. 4 4
      docs/dev/searxng_extra/update.rst
  7. 9 0
      docs/src/searx.engine.archlinux.rst
  8. 8 0
      docs/src/searx.engine.dailymotion.rst
  9. 22 0
      docs/src/searx.engine.duckduckgo.rst
  10. 17 0
      docs/src/searx.enginelib.rst
  11. 43 0
      docs/src/searx.engines.bing.rst
  12. 24 3
      docs/src/searx.engines.google.rst
  13. 27 0
      docs/src/searx.engines.peertube.rst
  14. 4 4
      docs/src/searx.engines.rst
  15. 13 0
      docs/src/searx.engines.startpage.rst
  16. 27 0
      docs/src/searx.engines.wikipedia.rst
  17. 12 0
      docs/src/searx.locales.rst
  18. 47 0
      docs/src/searx.search.processors.rst
  19. 1 1
      manage
  20. 1 1
      requirements.txt
  21. 85 45
      searx/autocomplete.py
  22. 2 2
      searx/data/__init__.py
  23. 3810 0
      searx/data/engine_traits.json
  24. 0 4381
      searx/data/engines_languages.json
  25. 136 0
      searx/enginelib/__init__.py
  26. 250 0
      searx/enginelib/traits.py
  27. 15 85
      searx/engines/__init__.py
  28. 115 110
      searx/engines/archlinux.py
  29. 204 54
      searx/engines/bing.py
  30. 77 52
      searx/engines/bing_images.py
  31. 117 108
      searx/engines/bing_news.py
  32. 71 41
      searx/engines/bing_videos.py
  33. 127 48
      searx/engines/dailymotion.py
  34. 1 1
      searx/engines/demo_offline.py
  35. 298 62
      searx/engines/duckduckgo.py
  36. 22 13
      searx/engines/duckduckgo_definitions.py
  37. 55 57
      searx/engines/duckduckgo_images.py
  38. 31 4
      searx/engines/duckduckgo_weather.py
  39. 3 3
      searx/engines/gentoo.py
  40. 305 189
      searx/engines/google.py
  41. 28 21
      searx/engines/google_images.py
  42. 199 51
      searx/engines/google_news.py
  43. 63 57
      searx/engines/google_scholar.py
  44. 32 83
      searx/engines/google_videos.py
  45. 148 47
      searx/engines/peertube.py
  46. 22 23
      searx/engines/qwant.py
  47. 45 65
      searx/engines/sepiasearch.py
  48. 352 120
      searx/engines/startpage.py
  49. 36 14
      searx/engines/wikidata.py
  50. 170 62
      searx/engines/wikipedia.py
  51. 46 24
      searx/engines/yahoo.py
  52. 178 12
      searx/locales.py
  53. 1 1
      searx/preferences.py
  54. 3 3
      searx/query.py
  55. 4 1
      searx/search/processors/__init__.py
  56. 11 1
      searx/search/processors/abstract.py
  57. 4 6
      searx/search/processors/online.py
  58. 2 2
      searx/search/processors/online_currency.py
  59. 3 2
      searx/search/processors/online_dictionary.py
  60. 3 2
      searx/search/processors/online_url_search.py
  61. 2 16
      searx/settings.yml
  62. 4 4
      searx/settings_defaults.py
  63. 69 22
      searx/sxng_locales.py
  64. 4 4
      searx/templates/simple/filters/languages.html
  65. 4 4
      searx/templates/simple/preferences.html
  66. 5 93
      searx/utils.py
  67. 33 40
      searx/webapp.py
  68. 7 2
      searx/webutils.py
  69. 4 4
      searxng_extra/update/update_engine_descriptions.py
  70. 198 0
      searxng_extra/update/update_engine_traits.py
  71. 0 313
      searxng_extra/update/update_languages.py
  72. 2 2
      searxng_extra/update/update_osm_keys_tags.py
  73. 111 0
      tests/unit/test_locales.py
  74. 0 33
      tests/unit/test_utils.py
  75. 0 3
      utils/templates/etc/searxng/settings.yml

+ 1 - 1
.github/workflows/data-update.yml

@@ -17,7 +17,7 @@ jobs:
           - update_currencies.py
           - update_currencies.py
           - update_external_bangs.py
           - update_external_bangs.py
           - update_firefox_version.py
           - update_firefox_version.py
-          - update_languages.py
+          - update_engine_traits.py
           - update_wikidata_units.py
           - update_wikidata_units.py
           - update_engine_descriptions.py
           - update_engine_descriptions.py
     steps:
     steps:

+ 1 - 1
docs/admin/engines/configured_engines.rst

@@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table
         - Timeout
         - Timeout
         - Weight
         - Weight
         - Paging
         - Paging
-        - Language
+        - Language, Region
         - Safe search
         - Safe search
         - Time range
         - Time range
 
 

+ 5 - 2
docs/admin/engines/settings.rst

@@ -569,10 +569,13 @@ engine is shown.  Most of the options have a default value or even are optional.
   To disable by default the engine, but not deleting it.  It will allow the user
   To disable by default the engine, but not deleting it.  It will allow the user
   to manually activate it in the settings.
   to manually activate it in the settings.
 
 
+``inactive``: optional
+  Remove the engine from the settings (*disabled & removed*).
+
 ``language`` : optional
 ``language`` : optional
   If you want to use another language for a specific engine, you can define it
   If you want to use another language for a specific engine, you can define it
-  by using the full ISO code of language and country, like ``fr_FR``, ``en_US``,
-  ``de_DE``.
+  by using the ISO code of language (and region), like ``fr``, ``en-US``,
+  ``de-DE``.
 
 
 ``tokens`` : optional
 ``tokens`` : optional
   A list of secret tokens to make this engine *private*, more details see
   A list of secret tokens to make this engine *private*, more details see

+ 5 - 0
docs/conf.py

@@ -127,6 +127,10 @@ extensions = [
     'notfound.extension',  # https://github.com/readthedocs/sphinx-notfound-page
     'notfound.extension',  # https://github.com/readthedocs/sphinx-notfound-page
 ]
 ]
 
 
+autodoc_default_options = {
+    'member-order': 'groupwise',
+}
+
 myst_enable_extensions = [
 myst_enable_extensions = [
   "replacements", "smartquotes"
   "replacements", "smartquotes"
 ]
 ]
@@ -135,6 +139,7 @@ suppress_warnings = ['myst.domains']
 
 
 intersphinx_mapping = {
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
     "python": ("https://docs.python.org/3/", None),
+    "babel" : ("https://babel.readthedocs.io/en/latest/", None),
     "flask": ("https://flask.palletsprojects.com/", None),
     "flask": ("https://flask.palletsprojects.com/", None),
     "flask_babel": ("https://python-babel.github.io/flask-babel/", None),
     "flask_babel": ("https://python-babel.github.io/flask-babel/", None),
     # "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
     # "werkzeug": ("https://werkzeug.palletsprojects.com/", None),

+ 36 - 6
docs/dev/engine_overview.rst

@@ -54,6 +54,7 @@ Engine File
                                        - ``offline`` :ref:`[ref] <offline engines>`
                                        - ``offline`` :ref:`[ref] <offline engines>`
                                        - ``online_dictionary``
                                        - ``online_dictionary``
                                        - ``online_currency``
                                        - ``online_currency``
+                                       - ``online_url_search``
    ======================= =========== ========================================================
    ======================= =========== ========================================================
 
 
 .. _engine settings:
 .. _engine settings:
@@ -131,8 +132,10 @@ Passed Arguments (request)
 These arguments can be used to construct the search query.  Furthermore,
 These arguments can be used to construct the search query.  Furthermore,
 parameters with default value can be redefined for special purposes.
 parameters with default value can be redefined for special purposes.
 
 
+.. _engine request online:
 
 
-.. table:: If the ``engine_type`` is ``online``
+.. table:: If the ``engine_type`` is :py:obj:`online
+           <searx.search.processors.online.OnlineProcessor.get_params>`
    :width: 100%
    :width: 100%
 
 
    ====================== ============== ========================================================================
    ====================== ============== ========================================================================
@@ -149,12 +152,16 @@ parameters with default value can be redefined for special purposes.
    safesearch             int            ``0``, between ``0`` and ``2`` (normal, moderate, strict)
    safesearch             int            ``0``, between ``0`` and ``2`` (normal, moderate, strict)
    time_range             Optional[str]  ``None``, can be ``day``, ``week``, ``month``, ``year``
    time_range             Optional[str]  ``None``, can be ``day``, ``week``, ``month``, ``year``
    pageno                 int            current pagenumber
    pageno                 int            current pagenumber
-   language               str            specific language code like ``'en_US'``, or ``'all'`` if unspecified
+   searxng_locale         str            SearXNG's locale selected by user.  Specific language code like
+                                         ``'en'``, ``'en-US'``, or ``'all'`` if unspecified.
    ====================== ============== ========================================================================
    ====================== ============== ========================================================================
 
 
 
 
-.. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the
-           ``online`` arguments:
+.. _engine request online_dictionary:
+
+.. table:: If the ``engine_type`` is :py:obj:`online_dictionary
+           <searx.search.processors.online_dictionary.OnlineDictionaryProcessor.get_params>`,
+           in addition to the :ref:`online <engine request online>` arguments:
    :width: 100%
    :width: 100%
 
 
    ====================== ============== ========================================================================
    ====================== ============== ========================================================================
@@ -165,8 +172,11 @@ parameters with default value can be redefined for special purposes.
    query                  str            the text query without the languages
    query                  str            the text query without the languages
    ====================== ============== ========================================================================
    ====================== ============== ========================================================================
 
 
-.. table:: If the ``engine_type`` is ``online_currency```, in addition to the
-           ``online`` arguments:
+.. _engine request online_currency:
+
+.. table:: If the ``engine_type`` is :py:obj:`online_currency
+           <searx.search.processors.online_currency.OnlineCurrencyProcessor.get_params>`,
+           in addition to the :ref:`online <engine request online>` arguments:
    :width: 100%
    :width: 100%
 
 
    ====================== ============== ========================================================================
    ====================== ============== ========================================================================
@@ -179,6 +189,26 @@ parameters with default value can be redefined for special purposes.
    to_name                str            currency name
    to_name                str            currency name
    ====================== ============== ========================================================================
    ====================== ============== ========================================================================
 
 
+.. _engine request online_url_search:
+
+.. table:: If the ``engine_type`` is :py:obj:`online_url_search
+           <searx.search.processors.online_url_search.OnlineUrlSearchProcessor.get_params>`,
+           in addition to the :ref:`online <engine request online>` arguments:
+   :width: 100%
+
+   ====================== ============== ========================================================================
+   argument               type           default-value, information
+   ====================== ============== ========================================================================
+   search_url             dict           URLs from the search query:
+
+                                         .. code:: python
+
+                                            {
+                                              'http': str,
+                                              'ftp': str,
+                                              'data:image': str
+                                            }
+   ====================== ============== ========================================================================
 
 
 Specify Request
 Specify Request
 ---------------
 ---------------

+ 4 - 4
docs/dev/searxng_extra/update.rst

@@ -52,12 +52,12 @@ Scripts to update static data in :origin:`searx/data/`
   :members:
   :members:
 
 
 
 
-``update_languages.py``
-=======================
+``update_engine_traits.py``
+===========================
 
 
-:origin:`[source] <searxng_extra/update/update_languages.py>`
+:origin:`[source] <searxng_extra/update/update_engine_traits.py>`
 
 
-.. automodule:: searxng_extra.update.update_languages
+.. automodule:: searxng_extra.update.update_engine_traits
   :members:
   :members:
 
 
 
 

+ 9 - 0
docs/src/searx.engine.archlinux.rst

@@ -0,0 +1,9 @@
+.. _archlinux engine:
+
+==========
+Arch Linux
+==========
+
+.. automodule:: searx.engines.archlinux
+  :members:
+

+ 8 - 0
docs/src/searx.engine.dailymotion.rst

@@ -0,0 +1,8 @@
+.. _dailymotion engine:
+
+===========
+Dailymotion
+===========
+
+.. automodule:: searx.engines.dailymotion
+  :members:

+ 22 - 0
docs/src/searx.engine.duckduckgo.rst

@@ -0,0 +1,22 @@
+.. _duckduckgo engines:
+
+=================
+DukcDukGo engines
+=================
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+.. automodule:: searx.engines.duckduckgo
+   :members:
+
+.. automodule:: searx.engines.duckduckgo_images
+   :members:
+
+.. automodule:: searx.engines.duckduckgo_definitions
+   :members:
+
+.. automodule:: searx.engines.duckduckgo_weather
+   :members:

+ 17 - 0
docs/src/searx.enginelib.rst

@@ -0,0 +1,17 @@
+.. _searx.enginelib:
+
+============
+Engine model
+============
+
+.. automodule:: searx.enginelib
+  :members:
+
+.. _searx.enginelib.traits:
+
+=============
+Engine traits
+=============
+
+.. automodule:: searx.enginelib.traits
+  :members:

+ 43 - 0
docs/src/searx.engines.bing.rst

@@ -0,0 +1,43 @@
+.. _bing engines:
+
+============
+Bing Engines
+============
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+
+.. _bing web engine:
+
+Bing WEB
+========
+
+.. automodule:: searx.engines.bing
+  :members:
+
+.. _bing images engine:
+
+Bing Images
+===========
+
+.. automodule:: searx.engines.bing_images
+  :members:
+
+.. _bing videos engine:
+
+Bing Videos
+===========
+
+.. automodule:: searx.engines.bing_videos
+  :members:
+
+.. _bing news engine:
+
+Bing News
+=========
+
+.. automodule:: searx.engines.bing_news
+  :members:

+ 24 - 3
docs/src/searx.engines.google.rst

@@ -12,15 +12,21 @@ Google Engines
 
 
 .. _google API:
 .. _google API:
 
 
-google API
+Google API
 ==========
 ==========
 
 
 .. _Query Parameter Definitions:
 .. _Query Parameter Definitions:
    https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
    https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
 
 
+SearXNG's implementation of the Google API is mainly done in
+:py:obj:`get_google_info <searx.engines.google.get_google_info>`.
+
 For detailed description of the *REST-full* API see: `Query Parameter
 For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.  Not all parameters can be appied and some engines are *special*
-(e.g. :ref:`google news engine`).
+Definitions`_.  The linked API documentation can sometimes be helpful during
+reverse engineering.  However, we cannot use it in the freely accessible WEB
+services; not all parameters can be applied and some engines are more *special*
+than other (e.g. :ref:`google news engine`).
+
 
 
 .. _google web engine:
 .. _google web engine:
 
 
@@ -30,6 +36,13 @@ Google WEB
 .. automodule:: searx.engines.google
 .. automodule:: searx.engines.google
   :members:
   :members:
 
 
+.. _google autocomplete:
+
+Google Autocomplete
+====================
+
+.. autofunction:: searx.autocomplete.google_complete
+
 .. _google images engine:
 .. _google images engine:
 
 
 Google Images
 Google Images
@@ -53,3 +66,11 @@ Google News
 
 
 .. automodule:: searx.engines.google_news
 .. automodule:: searx.engines.google_news
   :members:
   :members:
+
+.. _google scholar engine:
+
+Google Scholar
+==============
+
+.. automodule:: searx.engines.google_scholar
+  :members:

+ 27 - 0
docs/src/searx.engines.peertube.rst

@@ -0,0 +1,27 @@
+.. _peertube engines:
+
+================
+Peertube Engines
+================
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+
+.. _peertube video engine:
+
+Peertube Video
+==============
+
+.. automodule:: searx.engines.peertube
+  :members:
+
+.. _sepiasearch engine:
+
+SepiaSearch
+===========
+
+.. automodule:: searx.engines.sepiasearch
+  :members:

+ 4 - 4
docs/src/searx.engines.rst

@@ -1,8 +1,8 @@
-.. _load_engines:
+.. _searx.engines:
 
 
-============
-Load Engines
-============
+=================
+SearXNG's engines
+=================
 
 
 .. automodule:: searx.engines
 .. automodule:: searx.engines
   :members:
   :members:

+ 13 - 0
docs/src/searx.engines.startpage.rst

@@ -0,0 +1,13 @@
+.. _startpage engines:
+
+=================
+Startpage engines
+=================
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+.. automodule:: searx.engines.startpage
+   :members:

+ 27 - 0
docs/src/searx.engines.wikipedia.rst

@@ -0,0 +1,27 @@
+.. _wikimedia engines:
+
+=========
+Wikimedia
+=========
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+
+.. _wikipedia engine:
+
+Wikipedia
+=========
+
+.. automodule:: searx.engines.wikipedia
+  :members:
+
+.. _wikidata engine:
+
+Wikidata
+=========
+
+.. automodule:: searx.engines.wikidata
+  :members:

+ 12 - 0
docs/src/searx.locales.rst

@@ -4,5 +4,17 @@
 Locales
 Locales
 =======
 =======
 
 
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
 .. automodule:: searx.locales
 .. automodule:: searx.locales
   :members:
   :members:
+
+
+SearXNG's locale codes
+======================
+
+.. automodule:: searx.sxng_locales
+  :members:

+ 47 - 0
docs/src/searx.search.processors.rst

@@ -0,0 +1,47 @@
+.. _searx.search.processors:
+
+=================
+Search processors
+=================
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+
+Abstract processor class
+========================
+
+.. automodule:: searx.search.processors.abstract
+  :members:
+
+Offline processor
+=================
+
+.. automodule:: searx.search.processors.offline
+  :members:
+
+Online processor
+================
+
+.. automodule:: searx.search.processors.online
+  :members:
+
+Online currency processor
+=========================
+
+.. automodule:: searx.search.processors.online_currency
+  :members:
+
+Online Dictionary processor
+===========================
+
+.. automodule:: searx.search.processors.online_dictionary
+  :members:
+
+Online URL search processor
+===========================
+
+.. automodule:: searx.search.processors.online_url_search
+  :members:

+ 1 - 1
manage

@@ -63,7 +63,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\
 I,C,R,\
 I,C,R,\
 W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
 W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
 E1136"
 E1136"
-PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories"
+PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="traits,supported_languages,language_aliases,logger,categories"
 PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
 PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
 
 
 help() {
 help() {

+ 1 - 1
requirements.txt

@@ -1,5 +1,5 @@
 certifi==2022.12.7
 certifi==2022.12.7
-babel==2.11.0
+babel==2.12.1
 flask-babel==3.0.1
 flask-babel==3.0.1
 flask==2.2.3
 flask==2.2.3
 jinja2==3.1.2
 jinja2==3.1.2

+ 85 - 45
searx/autocomplete.py

@@ -5,20 +5,20 @@
 """
 """
 # pylint: disable=use-dict-literal
 # pylint: disable=use-dict-literal
 
 
-from json import loads
+import json
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
-from lxml import etree
+import lxml
 from httpx import HTTPError
 from httpx import HTTPError
 
 
 from searx import settings
 from searx import settings
-from searx.data import ENGINES_LANGUAGES
+from searx.engines import (
+    engines,
+    google,
+)
 from searx.network import get as http_get
 from searx.network import get as http_get
 from searx.exceptions import SearxEngineResponseException
 from searx.exceptions import SearxEngineResponseException
 
 
-# a fetch_supported_languages() for XPath engines isn't available right now
-# _brave = ENGINES_LANGUAGES['brave'].keys()
-
 
 
 def get(*args, **kwargs):
 def get(*args, **kwargs):
     if 'timeout' not in kwargs:
     if 'timeout' not in kwargs:
@@ -55,34 +55,58 @@ def dbpedia(query, _lang):
     results = []
     results = []
 
 
     if response.ok:
     if response.ok:
-        dom = etree.fromstring(response.content)
+        dom = lxml.etree.fromstring(response.content)
         results = dom.xpath('//Result/Label//text()')
         results = dom.xpath('//Result/Label//text()')
 
 
     return results
     return results
 
 
 
 
-def duckduckgo(query, _lang):
-    # duckduckgo autocompleter
-    url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
+def duckduckgo(query, sxng_locale):
+    """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
 
 
-    resp = loads(get(url.format(urlencode(dict(q=query)))).text)
-    if len(resp) > 1:
-        return resp[1]
-    return []
+    traits = engines['duckduckgo'].traits
+    args = {
+        'q': query,
+        'kl': traits.get_region(sxng_locale, traits.all_locale),
+    }
 
 
+    url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
+    resp = get(url)
 
 
-def google(query, lang):
-    # google autocompleter
-    autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
+    ret_val = []
+    if resp.ok:
+        j = resp.json()
+        if len(j) > 1:
+            ret_val = j[1]
+    return ret_val
 
 
-    response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
 
 
-    results = []
+def google_complete(query, sxng_locale):
+    """Autocomplete from Google.  Supports Google's languages and subdomains
+    (:py:obj:`searx.engines.google.get_google_info`) by using the async REST
+    API::
 
 
-    if response.ok:
-        dom = etree.fromstring(response.text)
-        results = dom.xpath('//suggestion/@data')
+        https://{subdomain}/complete/search?{args}
 
 
+    """
+
+    google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
+
+    url = 'https://{subdomain}/complete/search?{args}'
+    args = urlencode(
+        {
+            'q': query,
+            'client': 'gws-wiz',
+            'hl': google_info['params']['hl'],
+        }
+    )
+    results = []
+    resp = get(url.format(subdomain=google_info['subdomain'], args=args))
+    if resp.ok:
+        json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
+        data = json.loads(json_txt)
+        for item in data[0]:
+            results.append(lxml.html.fromstring(item[0]).text_content())
     return results
     return results
 
 
 
 
@@ -109,9 +133,9 @@ def seznam(query, _lang):
     ]
     ]
 
 
 
 
-def startpage(query, lang):
-    # startpage autocompleter
-    lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
+def startpage(query, sxng_locale):
+    """Autocomplete from Startpage. Supports Startpage's languages"""
+    lui = engines['startpage'].traits.get_language(sxng_locale, 'english')
     url = 'https://startpage.com/suggestions?{query}'
     url = 'https://startpage.com/suggestions?{query}'
     resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
     resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
     data = resp.json()
     data = resp.json()
@@ -122,20 +146,20 @@ def swisscows(query, _lang):
     # swisscows autocompleter
     # swisscows autocompleter
     url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
     url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
 
 
-    resp = loads(get(url.format(query=urlencode({'query': query}))).text)
+    resp = json.loads(get(url.format(query=urlencode({'query': query}))).text)
     return resp
     return resp
 
 
 
 
-def qwant(query, lang):
-    # qwant autocompleter (additional parameter : lang=en_en&count=xxx )
-    url = 'https://api.qwant.com/api/suggest?{query}'
-
-    resp = get(url.format(query=urlencode({'q': query, 'lang': lang})))
-
+def qwant(query, sxng_locale):
+    """Autocomplete from Qwant. Supports Qwant's regions."""
     results = []
     results = []
 
 
+    locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
+    url = 'https://api.qwant.com/v3/suggest?{query}'
+    resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
+
     if resp.ok:
     if resp.ok:
-        data = loads(resp.text)
+        data = resp.json()
         if data['status'] == 'success':
         if data['status'] == 'success':
             for item in data['data']['items']:
             for item in data['data']['items']:
                 results.append(item['value'])
                 results.append(item['value'])
@@ -143,21 +167,38 @@ def qwant(query, lang):
     return results
     return results
 
 
 
 
-def wikipedia(query, lang):
-    # wikipedia autocompleter
-    url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'
+def wikipedia(query, sxng_locale):
+    """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
+    results = []
+    eng_traits = engines['wikipedia'].traits
+    wiki_lang = eng_traits.get_language(sxng_locale, 'en')
+    wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
+
+    url = 'https://{wiki_netloc}/w/api.php?{args}'
+    args = urlencode(
+        {
+            'action': 'opensearch',
+            'format': 'json',
+            'formatversion': '2',
+            'search': query,
+            'namespace': '0',
+            'limit': '10',
+        }
+    )
+    resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
+    if resp.ok:
+        data = resp.json()
+        if len(data) > 1:
+            results = data[1]
 
 
-    resp = loads(get(url.format(urlencode(dict(search=query)))).text)
-    if len(resp) > 1:
-        return resp[1]
-    return []
+    return results
 
 
 
 
 def yandex(query, _lang):
 def yandex(query, _lang):
     # yandex autocompleter
     # yandex autocompleter
     url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
     url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
 
 
-    resp = loads(get(url.format(urlencode(dict(part=query)))).text)
+    resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
     if len(resp) > 1:
     if len(resp) > 1:
         return resp[1]
         return resp[1]
     return []
     return []
@@ -166,7 +207,7 @@ def yandex(query, _lang):
 backends = {
 backends = {
     'dbpedia': dbpedia,
     'dbpedia': dbpedia,
     'duckduckgo': duckduckgo,
     'duckduckgo': duckduckgo,
-    'google': google,
+    'google': google_complete,
     'seznam': seznam,
     'seznam': seznam,
     'startpage': startpage,
     'startpage': startpage,
     'swisscows': swisscows,
     'swisscows': swisscows,
@@ -177,12 +218,11 @@ backends = {
 }
 }
 
 
 
 
-def search_autocomplete(backend_name, query, lang):
+def search_autocomplete(backend_name, query, sxng_locale):
     backend = backends.get(backend_name)
     backend = backends.get(backend_name)
     if backend is None:
     if backend is None:
         return []
         return []
-
     try:
     try:
-        return backend(query, lang)
+        return backend(query, sxng_locale)
     except (HTTPError, SearxEngineResponseException):
     except (HTTPError, SearxEngineResponseException):
         return []
         return []

+ 2 - 2
searx/data/__init__.py

@@ -7,7 +7,7 @@
 """
 """
 
 
 __all__ = [
 __all__ = [
-    'ENGINES_LANGUAGES',
+    'ENGINE_TRAITS',
     'CURRENCIES',
     'CURRENCIES',
     'USER_AGENTS',
     'USER_AGENTS',
     'EXTERNAL_URLS',
     'EXTERNAL_URLS',
@@ -42,7 +42,6 @@ def ahmia_blacklist_loader():
         return f.read().split()
         return f.read().split()
 
 
 
 
-ENGINES_LANGUAGES = _load('engines_languages.json')
 CURRENCIES = _load('currencies.json')
 CURRENCIES = _load('currencies.json')
 USER_AGENTS = _load('useragents.json')
 USER_AGENTS = _load('useragents.json')
 EXTERNAL_URLS = _load('external_urls.json')
 EXTERNAL_URLS = _load('external_urls.json')
@@ -50,3 +49,4 @@ WIKIDATA_UNITS = _load('wikidata_units.json')
 EXTERNAL_BANGS = _load('external_bangs.json')
 EXTERNAL_BANGS = _load('external_bangs.json')
 OSM_KEYS_TAGS = _load('osm_keys_tags.json')
 OSM_KEYS_TAGS = _load('osm_keys_tags.json')
 ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
 ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
+ENGINE_TRAITS = _load('engine_traits.json')

+ 3810 - 0
searx/data/engine_traits.json

@@ -0,0 +1,3810 @@
+{
+  "arch linux wiki": {
+    "all_locale": null,
+    "custom": {
+      "title": {
+        "de": "Spezial:Suche",
+        "fa": "\u0648\u06cc\u0698\u0647:\u062c\u0633\u062a\u062c\u0648",
+        "ja": "\u7279\u5225:\u691c\u7d22",
+        "zh": "Special:\u641c\u7d22"
+      },
+      "wiki_netloc": {
+        "de": "wiki.archlinux.de",
+        "fa": "wiki.archusers.ir",
+        "ja": "wiki.archlinux.jp",
+        "zh": "wiki.archlinuxcn.org"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "ar": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
+      "bg": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438",
+      "bs": "Bosanski",
+      "cs": "\u010ce\u0161tina",
+      "da": "Dansk",
+      "de": "Deutsch",
+      "el": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac",
+      "en": "English",
+      "es": "Espa\u00f1ol",
+      "fa": "\u0641\u0627\u0631\u0633\u06cc",
+      "fi": "Suomi",
+      "fr": "Fran\u00e7ais",
+      "he": "\u05e2\u05d1\u05e8\u05d9\u05ea",
+      "hr": "Hrvatski",
+      "hu": "Magyar",
+      "id": "Bahasa Indonesia",
+      "it": "Italiano",
+      "ja": "\u65e5\u672c\u8a9e",
+      "ko": "\ud55c\uad6d\uc5b4",
+      "lt": "Lietuvi\u0173",
+      "nl": "Nederlands",
+      "pl": "Polski",
+      "pt": "Portugu\u00eas",
+      "ru": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439",
+      "sk": "Sloven\u010dina",
+      "sr": "\u0421\u0440\u043f\u0441\u043a\u0438 / srpski",
+      "sv": "Svenska",
+      "th": "\u0e44\u0e17\u0e22",
+      "tr": "T\u00fcrk\u00e7e",
+      "uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430",
+      "zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09"
+    },
+    "regions": {}
+  },
+  "bing": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ar": "ar",
+      "bg": "bg",
+      "bn": "bn",
+      "ca": "ca",
+      "cs": "cs",
+      "da": "da",
+      "de": "de",
+      "en": "en",
+      "es": "es",
+      "et": "et",
+      "eu": "eu",
+      "fi": "fi",
+      "fr": "fr",
+      "gl": "gl",
+      "gu": "gu",
+      "he": "he",
+      "hi": "hi",
+      "hr": "hr",
+      "hu": "hu",
+      "is": "is",
+      "it": "it",
+      "ja": "jp",
+      "kn": "kn",
+      "ko": "ko",
+      "lt": "lt",
+      "lv": "lv",
+      "ml": "ml",
+      "mr": "mr",
+      "ms": "ms",
+      "nb": "nb",
+      "nl": "nl",
+      "pa": "pa",
+      "pl": "pl",
+      "pt": "pt-pt",
+      "ro": "ro",
+      "ru": "ru",
+      "sk": "sk",
+      "sl": "sl",
+      "sr": "sr",
+      "sv": "sv",
+      "ta": "ta",
+      "te": "te",
+      "th": "th",
+      "tr": "tr",
+      "uk": "uk",
+      "vi": "vi",
+      "zh": "zh-hans",
+      "zh_Hans": "zh-hans",
+      "zh_Hant": "zh-hant"
+    },
+    "regions": {
+      "da-DK": "da-DK",
+      "de-AT": "de-AT",
+      "de-CH": "de-CH",
+      "de-DE": "de-DE",
+      "en-AU": "en-AU",
+      "en-CA": "en-CA",
+      "en-GB": "en-GB",
+      "en-IN": "en-IN",
+      "en-MY": "en-MY",
+      "en-NZ": "en-NZ",
+      "en-PH": "en-PH",
+      "en-US": "en-US",
+      "en-ZA": "en-ZA",
+      "es-AR": "es-AR",
+      "es-CL": "es-CL",
+      "es-ES": "es-ES",
+      "es-MX": "es-MX",
+      "es-US": "es-US",
+      "fi-FI": "fi-FI",
+      "fr-BE": "fr-BE",
+      "fr-CA": "fr-CA",
+      "fr-CH": "fr-CH",
+      "fr-FR": "fr-FR",
+      "id-ID": "en-ID",
+      "it-IT": "it-IT",
+      "ja-JP": "ja-JP",
+      "ko-KR": "ko-KR",
+      "nb-NO": "no-NO",
+      "nl-BE": "nl-BE",
+      "nl-NL": "nl-NL",
+      "pl-PL": "pl-PL",
+      "pt-BR": "pt-BR",
+      "ru-RU": "ru-RU",
+      "sv-SE": "sv-SE",
+      "tr-TR": "tr-TR",
+      "zh-CN": "zh-CN",
+      "zh-HK": "zh-HK",
+      "zh-TW": "zh-TW"
+    }
+  },
+  "bing images": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ar": "ar",
+      "bg": "bg",
+      "bn": "bn",
+      "ca": "ca",
+      "cs": "cs",
+      "da": "da",
+      "de": "de",
+      "en": "en",
+      "es": "es",
+      "et": "et",
+      "eu": "eu",
+      "fi": "fi",
+      "fr": "fr",
+      "gl": "gl",
+      "gu": "gu",
+      "he": "he",
+      "hi": "hi",
+      "hr": "hr",
+      "hu": "hu",
+      "is": "is",
+      "it": "it",
+      "ja": "jp",
+      "kn": "kn",
+      "ko": "ko",
+      "lt": "lt",
+      "lv": "lv",
+      "ml": "ml",
+      "mr": "mr",
+      "ms": "ms",
+      "nb": "nb",
+      "nl": "nl",
+      "pa": "pa",
+      "pl": "pl",
+      "pt": "pt-pt",
+      "ro": "ro",
+      "ru": "ru",
+      "sk": "sk",
+      "sl": "sl",
+      "sr": "sr",
+      "sv": "sv",
+      "ta": "ta",
+      "te": "te",
+      "th": "th",
+      "tr": "tr",
+      "uk": "uk",
+      "vi": "vi",
+      "zh": "zh-hans",
+      "zh_Hans": "zh-hans",
+      "zh_Hant": "zh-hant"
+    },
+    "regions": {
+      "da-DK": "da-DK",
+      "de-AT": "de-AT",
+      "de-CH": "de-CH",
+      "de-DE": "de-DE",
+      "en-AU": "en-AU",
+      "en-CA": "en-CA",
+      "en-GB": "en-GB",
+      "en-IN": "en-IN",
+      "en-MY": "en-MY",
+      "en-NZ": "en-NZ",
+      "en-PH": "en-PH",
+      "en-US": "en-US",
+      "en-ZA": "en-ZA",
+      "es-AR": "es-AR",
+      "es-CL": "es-CL",
+      "es-ES": "es-ES",
+      "es-MX": "es-MX",
+      "es-US": "es-US",
+      "fi-FI": "fi-FI",
+      "fr-BE": "fr-BE",
+      "fr-CA": "fr-CA",
+      "fr-CH": "fr-CH",
+      "fr-FR": "fr-FR",
+      "id-ID": "en-ID",
+      "it-IT": "it-IT",
+      "ja-JP": "ja-JP",
+      "ko-KR": "ko-KR",
+      "nb-NO": "no-NO",
+      "nl-BE": "nl-BE",
+      "nl-NL": "nl-NL",
+      "pl-PL": "pl-PL",
+      "pt-BR": "pt-BR",
+      "ru-RU": "ru-RU",
+      "sv-SE": "sv-SE",
+      "tr-TR": "tr-TR",
+      "zh-CN": "zh-CN",
+      "zh-HK": "zh-HK",
+      "zh-TW": "zh-TW"
+    }
+  },
+  "bing news": {
+    "all_locale": "en-WW",
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ar": "ar",
+      "bg": "bg",
+      "bn": "bn",
+      "ca": "ca",
+      "cs": "cs",
+      "da": "da",
+      "de": "de",
+      "en": "en",
+      "es": "es",
+      "et": "et",
+      "eu": "eu",
+      "fi": "fi",
+      "fr": "fr",
+      "gl": "gl",
+      "gu": "gu",
+      "he": "he",
+      "hi": "hi",
+      "hr": "hr",
+      "hu": "hu",
+      "is": "is",
+      "it": "it",
+      "ja": "jp",
+      "kn": "kn",
+      "ko": "ko",
+      "lt": "lt",
+      "lv": "lv",
+      "ml": "ml",
+      "mr": "mr",
+      "ms": "ms",
+      "nb": "nb",
+      "nl": "nl",
+      "pa": "pa",
+      "pl": "pl",
+      "pt": "pt-pt",
+      "ro": "ro",
+      "ru": "ru",
+      "sk": "sk",
+      "sl": "sl",
+      "sr": "sr",
+      "sv": "sv",
+      "ta": "ta",
+      "te": "te",
+      "th": "th",
+      "tr": "tr",
+      "uk": "uk",
+      "vi": "vi",
+      "zh": "zh-hans",
+      "zh_Hans": "zh-hans",
+      "zh_Hant": "zh-hant"
+    },
+    "regions": {
+      "da-DK": "da-DK",
+      "de-DE": "de-DE",
+      "en-AU": "en-AU",
+      "en-GB": "en-GB",
+      "en-US": "en-US",
+      "es-CL": "es-CL",
+      "es-MX": "es-MX",
+      "es-US": "es-US",
+      "fi-FI": "fi-FI",
+      "fr-CA": "fr-CA",
+      "fr-FR": "fr-FR",
+      "it-IT": "it-IT",
+      "pt-BR": "pt-BR",
+      "zh-CN": "zh-CN"
+    }
+  },
+  "bing videos": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ar": "ar",
+      "bg": "bg",
+      "bn": "bn",
+      "ca": "ca",
+      "cs": "cs",
+      "da": "da",
+      "de": "de",
+      "en": "en",
+      "es": "es",
+      "et": "et",
+      "eu": "eu",
+      "fi": "fi",
+      "fr": "fr",
+      "gl": "gl",
+      "gu": "gu",
+      "he": "he",
+      "hi": "hi",
+      "hr": "hr",
+      "hu": "hu",
+      "is": "is",
+      "it": "it",
+      "ja": "jp",
+      "kn": "kn",
+      "ko": "ko",
+      "lt": "lt",
+      "lv": "lv",
+      "ml": "ml",
+      "mr": "mr",
+      "ms": "ms",
+      "nb": "nb",
+      "nl": "nl",
+      "pa": "pa",
+      "pl": "pl",
+      "pt": "pt-pt",
+      "ro": "ro",
+      "ru": "ru",
+      "sk": "sk",
+      "sl": "sl",
+      "sr": "sr",
+      "sv": "sv",
+      "ta": "ta",
+      "te": "te",
+      "th": "th",
+      "tr": "tr",
+      "uk": "uk",
+      "vi": "vi",
+      "zh": "zh-hans",
+      "zh_Hans": "zh-hans",
+      "zh_Hant": "zh-hant"
+    },
+    "regions": {
+      "da-DK": "da-DK",
+      "de-AT": "de-AT",
+      "de-CH": "de-CH",
+      "de-DE": "de-DE",
+      "en-AU": "en-AU",
+      "en-CA": "en-CA",
+      "en-GB": "en-GB",
+      "en-IN": "en-IN",
+      "en-MY": "en-MY",
+      "en-NZ": "en-NZ",
+      "en-PH": "en-PH",
+      "en-US": "en-US",
+      "en-ZA": "en-ZA",
+      "es-AR": "es-AR",
+      "es-CL": "es-CL",
+      "es-ES": "es-ES",
+      "es-MX": "es-MX",
+      "es-US": "es-US",
+      "fi-FI": "fi-FI",
+      "fr-BE": "fr-BE",
+      "fr-CA": "fr-CA",
+      "fr-CH": "fr-CH",
+      "fr-FR": "fr-FR",
+      "id-ID": "en-ID",
+      "it-IT": "it-IT",
+      "ja-JP": "ja-JP",
+      "ko-KR": "ko-KR",
+      "nb-NO": "no-NO",
+      "nl-BE": "nl-BE",
+      "nl-NL": "nl-NL",
+      "pl-PL": "pl-PL",
+      "pt-BR": "pt-BR",
+      "ru-RU": "ru-RU",
+      "sv-SE": "sv-SE",
+      "tr-TR": "tr-TR",
+      "zh-CN": "zh-CN",
+      "zh-HK": "zh-HK",
+      "zh-TW": "zh-TW"
+    }
+  },
+  "dailymotion": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ar": "ar",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "es": "es",
+      "fr": "fr",
+      "id": "id",
+      "it": "it",
+      "ja": "ja",
+      "ko": "ko",
+      "ms": "ms",
+      "nl": "nl",
+      "pl": "pl",
+      "pt": "pt",
+      "ro": "ro",
+      "ru": "ru",
+      "th": "th",
+      "tr": "tr",
+      "vi": "vi",
+      "zh": "zh"
+    },
+    "regions": {
+      "ar-AE": "ar_AE",
+      "ar-EG": "ar_EG",
+      "ar-SA": "ar_SA",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-HK": "en_HK",
+      "en-IE": "en_IE",
+      "en-IN": "en_IN",
+      "en-NG": "en_NG",
+      "en-PH": "en_PH",
+      "en-PK": "en_PK",
+      "en-SG": "en_SG",
+      "en-US": "en_US",
+      "en-ZA": "en_ZA",
+      "es-AR": "es_AR",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-CI": "fr_CI",
+      "fr-FR": "fr_FR",
+      "fr-MA": "fr_MA",
+      "fr-SN": "fr_SN",
+      "fr-TN": "fr_TN",
+      "id-ID": "id_ID",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ja-JP": "ja_JP",
+      "ko-KR": "ko_KR",
+      "ms-MY": "ms_MY",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-BR": "pt_BR",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "ru-RU": "ru_RU",
+      "th-TH": "th_TH",
+      "tr-TR": "tr_TR",
+      "vi-VN": "vi_VN",
+      "zh-CN": "zh_CN",
+      "zh-TW": "zh_TW"
+    }
+  },
+  "duckduckgo": {
+    "all_locale": "wt-wt",
+    "custom": {
+      "lang_region": {
+        "ar-DZ": "ar_DZ",
+        "ar-JO": "ar_JO",
+        "ar-SA": "ar_SA",
+        "bn-IN": "bn_IN",
+        "de-CH": "de_CH",
+        "en-AU": "en_AU",
+        "en-CA": "en_CA",
+        "en-GB": "en_GB",
+        "es-AR": "es_AR",
+        "es-CL": "es_CL",
+        "es-CO": "es_CO",
+        "es-CR": "es_CR",
+        "es-EC": "es_EC",
+        "es-MX": "es_MX",
+        "es-PE": "es_PE",
+        "es-UY": "es_UY",
+        "es-VE": "es_VE",
+        "fr-BE": "fr_BE",
+        "fr-CA": "fr_CA",
+        "fr-CH": "fr_CH",
+        "nl-BE": "nl_BE",
+        "pt-BR": "pt_BR"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af_ZA",
+      "ar": "ar_EG",
+      "ast": "ast_ES",
+      "az_Latn": "az_AZ",
+      "be": "be_BY",
+      "bg": "bg_BG",
+      "bn": "bn_BD",
+      "br": "br_FR",
+      "bs_Latn": "bs_BA",
+      "ca": "ca_ES",
+      "cs": "cs_CZ",
+      "cy": "cy_GB",
+      "da": "da_DK",
+      "de": "de_DE",
+      "el": "el_GR",
+      "en": "en_US",
+      "eo": "eo_XX",
+      "es": "es_ES",
+      "et": "et_EE",
+      "eu": "eu_ES",
+      "fa": "fa_IR",
+      "fi": "fi_FI",
+      "fil": "tl_PH",
+      "fr": "fr_FR",
+      "ga": "ga_IE",
+      "gd": "gd_GB",
+      "gl": "gl_ES",
+      "he": "he_IL",
+      "hi": "hi_IN",
+      "hr": "hr_HR",
+      "hu": "hu_HU",
+      "hy": "hy_AM",
+      "id": "id_ID",
+      "is": "is_IS",
+      "it": "it_IT",
+      "ja": "ja_JP",
+      "kab": "kab_DZ",
+      "kn": "kn_IN",
+      "ko": "ko_KR",
+      "ku": "ku",
+      "kw": "kw_GB",
+      "lt": "lt_LT",
+      "lv": "lv_LV",
+      "ml": "ml_IN",
+      "mr": "mr_IN",
+      "ms": "ms_MY",
+      "nb": "nb_NO",
+      "nl": "nl_NL",
+      "nn": "nn_NO",
+      "pl": "pl_PL",
+      "pt": "pt_PT",
+      "ro": "ro_RO",
+      "ru": "ru_RU",
+      "sc": "sc_IT",
+      "si": "si_LK",
+      "sk": "sk_SK",
+      "sl": "sl_SI",
+      "sq": "sq_AL",
+      "sr_Cyrl": "sr_RS",
+      "sv": "sv_SE",
+      "ta": "ta_IN",
+      "te": "te_IN",
+      "th": "th_TH",
+      "tr": "tr_TR",
+      "uk": "uk_UA",
+      "ur": "ur_PK",
+      "vi": "vi_VN",
+      "zh_Hans": "zh_CN",
+      "zh_Hant": "zh_TW"
+    },
+    "regions": {
+      "ar-SA": "xa-ar",
+      "bg-BG": "bg-bg",
+      "ca-ES": "es-ca",
+      "cs-CZ": "cz-cs",
+      "da-DK": "dk-da",
+      "de-AT": "at-de",
+      "de-CH": "ch-de",
+      "de-DE": "de-de",
+      "el-GR": "gr-el",
+      "en-AU": "au-en",
+      "en-CA": "ca-en",
+      "en-GB": "uk-en",
+      "en-IE": "ie-en",
+      "en-IL": "il-en",
+      "en-IN": "in-en",
+      "en-MY": "my-en",
+      "en-NZ": "nz-en",
+      "en-PH": "ph-en",
+      "en-PK": "pk-en",
+      "en-SG": "sg-en",
+      "en-US": "us-en",
+      "en-ZA": "za-en",
+      "es-AR": "ar-es",
+      "es-CL": "cl-es",
+      "es-CO": "co-es",
+      "es-ES": "es-es",
+      "es-MX": "mx-es",
+      "es-PE": "pe-es",
+      "es-US": "us-es",
+      "et-EE": "ee-et",
+      "fi-FI": "fi-fi",
+      "fr-BE": "be-fr",
+      "fr-CA": "ca-fr",
+      "fr-CH": "ch-fr",
+      "fr-FR": "fr-fr",
+      "hr-HR": "hr-hr",
+      "hu-HU": "hu-hu",
+      "id-ID": "id-en",
+      "it-IT": "it-it",
+      "ja-JP": "jp-jp",
+      "ko-KR": "kr-kr",
+      "lt-LT": "lt-lt",
+      "lv-LV": "lv-lv",
+      "nb-NO": "no-no",
+      "nl-BE": "be-nl",
+      "nl-NL": "nl-nl",
+      "pl-PL": "pl-pl",
+      "pt-BR": "br-pt",
+      "pt-PT": "pt-pt",
+      "ro-RO": "ro-ro",
+      "ru-RU": "ru-ru",
+      "sk-SK": "sk-sk",
+      "sl-SI": "sl-sl",
+      "sv-SE": "se-sv",
+      "th-TH": "th-en",
+      "tr-TR": "tr-tr",
+      "uk-UA": "ua-uk",
+      "vi-VN": "vn-en",
+      "zh-CN": "cn-zh",
+      "zh-HK": "hk-tzh",
+      "zh-TW": "tw-tzh"
+    }
+  },
+  "duckduckgo images": {
+    "all_locale": "wt-wt",
+    "custom": {
+      "lang_region": {
+        "ar-DZ": "ar_DZ",
+        "ar-JO": "ar_JO",
+        "ar-SA": "ar_SA",
+        "bn-IN": "bn_IN",
+        "de-CH": "de_CH",
+        "en-AU": "en_AU",
+        "en-CA": "en_CA",
+        "en-GB": "en_GB",
+        "es-AR": "es_AR",
+        "es-CL": "es_CL",
+        "es-CO": "es_CO",
+        "es-CR": "es_CR",
+        "es-EC": "es_EC",
+        "es-MX": "es_MX",
+        "es-PE": "es_PE",
+        "es-UY": "es_UY",
+        "es-VE": "es_VE",
+        "fr-BE": "fr_BE",
+        "fr-CA": "fr_CA",
+        "fr-CH": "fr_CH",
+        "nl-BE": "nl_BE",
+        "pt-BR": "pt_BR"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af_ZA",
+      "ar": "ar_EG",
+      "ast": "ast_ES",
+      "az_Latn": "az_AZ",
+      "be": "be_BY",
+      "bg": "bg_BG",
+      "bn": "bn_BD",
+      "br": "br_FR",
+      "bs_Latn": "bs_BA",
+      "ca": "ca_ES",
+      "cs": "cs_CZ",
+      "cy": "cy_GB",
+      "da": "da_DK",
+      "de": "de_DE",
+      "el": "el_GR",
+      "en": "en_US",
+      "eo": "eo_XX",
+      "es": "es_ES",
+      "et": "et_EE",
+      "eu": "eu_ES",
+      "fa": "fa_IR",
+      "fi": "fi_FI",
+      "fil": "tl_PH",
+      "fr": "fr_FR",
+      "ga": "ga_IE",
+      "gd": "gd_GB",
+      "gl": "gl_ES",
+      "he": "he_IL",
+      "hi": "hi_IN",
+      "hr": "hr_HR",
+      "hu": "hu_HU",
+      "hy": "hy_AM",
+      "id": "id_ID",
+      "is": "is_IS",
+      "it": "it_IT",
+      "ja": "ja_JP",
+      "kab": "kab_DZ",
+      "kn": "kn_IN",
+      "ko": "ko_KR",
+      "ku": "ku",
+      "kw": "kw_GB",
+      "lt": "lt_LT",
+      "lv": "lv_LV",
+      "ml": "ml_IN",
+      "mr": "mr_IN",
+      "ms": "ms_MY",
+      "nb": "nb_NO",
+      "nl": "nl_NL",
+      "nn": "nn_NO",
+      "pl": "pl_PL",
+      "pt": "pt_PT",
+      "ro": "ro_RO",
+      "ru": "ru_RU",
+      "sc": "sc_IT",
+      "si": "si_LK",
+      "sk": "sk_SK",
+      "sl": "sl_SI",
+      "sq": "sq_AL",
+      "sr_Cyrl": "sr_RS",
+      "sv": "sv_SE",
+      "ta": "ta_IN",
+      "te": "te_IN",
+      "th": "th_TH",
+      "tr": "tr_TR",
+      "uk": "uk_UA",
+      "ur": "ur_PK",
+      "vi": "vi_VN",
+      "zh_Hans": "zh_CN",
+      "zh_Hant": "zh_TW"
+    },
+    "regions": {
+      "ar-SA": "xa-ar",
+      "bg-BG": "bg-bg",
+      "ca-ES": "es-ca",
+      "cs-CZ": "cz-cs",
+      "da-DK": "dk-da",
+      "de-AT": "at-de",
+      "de-CH": "ch-de",
+      "de-DE": "de-de",
+      "el-GR": "gr-el",
+      "en-AU": "au-en",
+      "en-CA": "ca-en",
+      "en-GB": "uk-en",
+      "en-IE": "ie-en",
+      "en-IL": "il-en",
+      "en-IN": "in-en",
+      "en-MY": "my-en",
+      "en-NZ": "nz-en",
+      "en-PH": "ph-en",
+      "en-PK": "pk-en",
+      "en-SG": "sg-en",
+      "en-US": "us-en",
+      "en-ZA": "za-en",
+      "es-AR": "ar-es",
+      "es-CL": "cl-es",
+      "es-CO": "co-es",
+      "es-ES": "es-es",
+      "es-MX": "mx-es",
+      "es-PE": "pe-es",
+      "es-US": "us-es",
+      "et-EE": "ee-et",
+      "fi-FI": "fi-fi",
+      "fr-BE": "be-fr",
+      "fr-CA": "ca-fr",
+      "fr-CH": "ch-fr",
+      "fr-FR": "fr-fr",
+      "hr-HR": "hr-hr",
+      "hu-HU": "hu-hu",
+      "id-ID": "id-en",
+      "it-IT": "it-it",
+      "ja-JP": "jp-jp",
+      "ko-KR": "kr-kr",
+      "lt-LT": "lt-lt",
+      "lv-LV": "lv-lv",
+      "nb-NO": "no-no",
+      "nl-BE": "be-nl",
+      "nl-NL": "nl-nl",
+      "pl-PL": "pl-pl",
+      "pt-BR": "br-pt",
+      "pt-PT": "pt-pt",
+      "ro-RO": "ro-ro",
+      "ru-RU": "ru-ru",
+      "sk-SK": "sk-sk",
+      "sl-SI": "sl-sl",
+      "sv-SE": "se-sv",
+      "th-TH": "th-en",
+      "tr-TR": "tr-tr",
+      "uk-UA": "ua-uk",
+      "vi-VN": "vn-en",
+      "zh-CN": "cn-zh",
+      "zh-HK": "hk-tzh",
+      "zh-TW": "tw-tzh"
+    }
+  },
+  "duckduckgo weather": {
+    "all_locale": "wt-wt",
+    "custom": {
+      "lang_region": {
+        "ar-DZ": "ar_DZ",
+        "ar-JO": "ar_JO",
+        "ar-SA": "ar_SA",
+        "bn-IN": "bn_IN",
+        "de-CH": "de_CH",
+        "en-AU": "en_AU",
+        "en-CA": "en_CA",
+        "en-GB": "en_GB",
+        "es-AR": "es_AR",
+        "es-CL": "es_CL",
+        "es-CO": "es_CO",
+        "es-CR": "es_CR",
+        "es-EC": "es_EC",
+        "es-MX": "es_MX",
+        "es-PE": "es_PE",
+        "es-UY": "es_UY",
+        "es-VE": "es_VE",
+        "fr-BE": "fr_BE",
+        "fr-CA": "fr_CA",
+        "fr-CH": "fr_CH",
+        "nl-BE": "nl_BE",
+        "pt-BR": "pt_BR"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af_ZA",
+      "ar": "ar_EG",
+      "ast": "ast_ES",
+      "az_Latn": "az_AZ",
+      "be": "be_BY",
+      "bg": "bg_BG",
+      "bn": "bn_BD",
+      "br": "br_FR",
+      "bs_Latn": "bs_BA",
+      "ca": "ca_ES",
+      "cs": "cs_CZ",
+      "cy": "cy_GB",
+      "da": "da_DK",
+      "de": "de_DE",
+      "el": "el_GR",
+      "en": "en_US",
+      "eo": "eo_XX",
+      "es": "es_ES",
+      "et": "et_EE",
+      "eu": "eu_ES",
+      "fa": "fa_IR",
+      "fi": "fi_FI",
+      "fil": "tl_PH",
+      "fr": "fr_FR",
+      "ga": "ga_IE",
+      "gd": "gd_GB",
+      "gl": "gl_ES",
+      "he": "he_IL",
+      "hi": "hi_IN",
+      "hr": "hr_HR",
+      "hu": "hu_HU",
+      "hy": "hy_AM",
+      "id": "id_ID",
+      "is": "is_IS",
+      "it": "it_IT",
+      "ja": "ja_JP",
+      "kab": "kab_DZ",
+      "kn": "kn_IN",
+      "ko": "ko_KR",
+      "ku": "ku",
+      "kw": "kw_GB",
+      "lt": "lt_LT",
+      "lv": "lv_LV",
+      "ml": "ml_IN",
+      "mr": "mr_IN",
+      "ms": "ms_MY",
+      "nb": "nb_NO",
+      "nl": "nl_NL",
+      "nn": "nn_NO",
+      "pl": "pl_PL",
+      "pt": "pt_PT",
+      "ro": "ro_RO",
+      "ru": "ru_RU",
+      "sc": "sc_IT",
+      "si": "si_LK",
+      "sk": "sk_SK",
+      "sl": "sl_SI",
+      "sq": "sq_AL",
+      "sr_Cyrl": "sr_RS",
+      "sv": "sv_SE",
+      "ta": "ta_IN",
+      "te": "te_IN",
+      "th": "th_TH",
+      "tr": "tr_TR",
+      "uk": "uk_UA",
+      "ur": "ur_PK",
+      "vi": "vi_VN",
+      "zh_Hans": "zh_CN",
+      "zh_Hant": "zh_TW"
+    },
+    "regions": {
+      "ar-SA": "xa-ar",
+      "bg-BG": "bg-bg",
+      "ca-ES": "es-ca",
+      "cs-CZ": "cz-cs",
+      "da-DK": "dk-da",
+      "de-AT": "at-de",
+      "de-CH": "ch-de",
+      "de-DE": "de-de",
+      "el-GR": "gr-el",
+      "en-AU": "au-en",
+      "en-CA": "ca-en",
+      "en-GB": "uk-en",
+      "en-IE": "ie-en",
+      "en-IL": "il-en",
+      "en-IN": "in-en",
+      "en-MY": "my-en",
+      "en-NZ": "nz-en",
+      "en-PH": "ph-en",
+      "en-PK": "pk-en",
+      "en-SG": "sg-en",
+      "en-US": "us-en",
+      "en-ZA": "za-en",
+      "es-AR": "ar-es",
+      "es-CL": "cl-es",
+      "es-CO": "co-es",
+      "es-ES": "es-es",
+      "es-MX": "mx-es",
+      "es-PE": "pe-es",
+      "es-US": "us-es",
+      "et-EE": "ee-et",
+      "fi-FI": "fi-fi",
+      "fr-BE": "be-fr",
+      "fr-CA": "ca-fr",
+      "fr-CH": "ch-fr",
+      "fr-FR": "fr-fr",
+      "hr-HR": "hr-hr",
+      "hu-HU": "hu-hu",
+      "id-ID": "id-en",
+      "it-IT": "it-it",
+      "ja-JP": "jp-jp",
+      "ko-KR": "kr-kr",
+      "lt-LT": "lt-lt",
+      "lv-LV": "lv-lv",
+      "nb-NO": "no-no",
+      "nl-BE": "be-nl",
+      "nl-NL": "nl-nl",
+      "pl-PL": "pl-pl",
+      "pt-BR": "br-pt",
+      "pt-PT": "pt-pt",
+      "ro-RO": "ro-ro",
+      "ru-RU": "ru-ru",
+      "sk-SK": "sk-sk",
+      "sl-SI": "sl-sl",
+      "sv-SE": "se-sv",
+      "th-TH": "th-en",
+      "tr-TR": "tr-tr",
+      "uk-UA": "ua-uk",
+      "vi-VN": "vn-en",
+      "zh-CN": "cn-zh",
+      "zh-HK": "hk-tzh",
+      "zh-TW": "tw-tzh"
+    }
+  },
+  "google": {
+    "all_locale": "ZZ",
+    "custom": {
+      "supported_domains": {
+        "AD": "www.google.ad",
+        "AE": "www.google.ae",
+        "AF": "www.google.com.af",
+        "AG": "www.google.com.ag",
+        "AI": "www.google.com.ai",
+        "AL": "www.google.al",
+        "AM": "www.google.am",
+        "AO": "www.google.co.ao",
+        "AR": "www.google.com.ar",
+        "AS": "www.google.as",
+        "AT": "www.google.at",
+        "AU": "www.google.com.au",
+        "AZ": "www.google.az",
+        "BA": "www.google.ba",
+        "BD": "www.google.com.bd",
+        "BE": "www.google.be",
+        "BF": "www.google.bf",
+        "BG": "www.google.bg",
+        "BH": "www.google.com.bh",
+        "BI": "www.google.bi",
+        "BJ": "www.google.bj",
+        "BN": "www.google.com.bn",
+        "BO": "www.google.com.bo",
+        "BR": "www.google.com.br",
+        "BS": "www.google.bs",
+        "BT": "www.google.bt",
+        "BW": "www.google.co.bw",
+        "BY": "www.google.by",
+        "BZ": "www.google.com.bz",
+        "CA": "www.google.ca",
+        "CAT": "www.google.cat",
+        "CD": "www.google.cd",
+        "CF": "www.google.cf",
+        "CG": "www.google.cg",
+        "CH": "www.google.ch",
+        "CI": "www.google.ci",
+        "CK": "www.google.co.ck",
+        "CL": "www.google.cl",
+        "CM": "www.google.cm",
+        "CN": "www.google.com.hk",
+        "CO": "www.google.com.co",
+        "CR": "www.google.co.cr",
+        "CU": "www.google.com.cu",
+        "CV": "www.google.cv",
+        "CY": "www.google.com.cy",
+        "CZ": "www.google.cz",
+        "DE": "www.google.de",
+        "DJ": "www.google.dj",
+        "DK": "www.google.dk",
+        "DM": "www.google.dm",
+        "DO": "www.google.com.do",
+        "DZ": "www.google.dz",
+        "EC": "www.google.com.ec",
+        "EE": "www.google.ee",
+        "EG": "www.google.com.eg",
+        "ES": "www.google.es",
+        "ET": "www.google.com.et",
+        "FI": "www.google.fi",
+        "FJ": "www.google.com.fj",
+        "FM": "www.google.fm",
+        "FR": "www.google.fr",
+        "GA": "www.google.ga",
+        "GE": "www.google.ge",
+        "GG": "www.google.gg",
+        "GH": "www.google.com.gh",
+        "GI": "www.google.com.gi",
+        "GL": "www.google.gl",
+        "GM": "www.google.gm",
+        "GR": "www.google.gr",
+        "GT": "www.google.com.gt",
+        "GY": "www.google.gy",
+        "HK": "www.google.com.hk",
+        "HN": "www.google.hn",
+        "HR": "www.google.hr",
+        "HT": "www.google.ht",
+        "HU": "www.google.hu",
+        "ID": "www.google.co.id",
+        "IE": "www.google.ie",
+        "IL": "www.google.co.il",
+        "IM": "www.google.im",
+        "IN": "www.google.co.in",
+        "IQ": "www.google.iq",
+        "IS": "www.google.is",
+        "IT": "www.google.it",
+        "JE": "www.google.je",
+        "JM": "www.google.com.jm",
+        "JO": "www.google.jo",
+        "JP": "www.google.co.jp",
+        "KE": "www.google.co.ke",
+        "KG": "www.google.kg",
+        "KH": "www.google.com.kh",
+        "KI": "www.google.ki",
+        "KR": "www.google.co.kr",
+        "KW": "www.google.com.kw",
+        "KZ": "www.google.kz",
+        "LA": "www.google.la",
+        "LB": "www.google.com.lb",
+        "LI": "www.google.li",
+        "LK": "www.google.lk",
+        "LS": "www.google.co.ls",
+        "LT": "www.google.lt",
+        "LU": "www.google.lu",
+        "LV": "www.google.lv",
+        "LY": "www.google.com.ly",
+        "MA": "www.google.co.ma",
+        "MD": "www.google.md",
+        "ME": "www.google.me",
+        "MG": "www.google.mg",
+        "MK": "www.google.mk",
+        "ML": "www.google.ml",
+        "MM": "www.google.com.mm",
+        "MN": "www.google.mn",
+        "MS": "www.google.ms",
+        "MT": "www.google.com.mt",
+        "MU": "www.google.mu",
+        "MV": "www.google.mv",
+        "MW": "www.google.mw",
+        "MX": "www.google.com.mx",
+        "MY": "www.google.com.my",
+        "MZ": "www.google.co.mz",
+        "NA": "www.google.com.na",
+        "NE": "www.google.ne",
+        "NG": "www.google.com.ng",
+        "NI": "www.google.com.ni",
+        "NL": "www.google.nl",
+        "NO": "www.google.no",
+        "NP": "www.google.com.np",
+        "NR": "www.google.nr",
+        "NU": "www.google.nu",
+        "NZ": "www.google.co.nz",
+        "OM": "www.google.com.om",
+        "PA": "www.google.com.pa",
+        "PE": "www.google.com.pe",
+        "PG": "www.google.com.pg",
+        "PH": "www.google.com.ph",
+        "PK": "www.google.com.pk",
+        "PL": "www.google.pl",
+        "PN": "www.google.pn",
+        "PR": "www.google.com.pr",
+        "PS": "www.google.ps",
+        "PT": "www.google.pt",
+        "PY": "www.google.com.py",
+        "QA": "www.google.com.qa",
+        "RO": "www.google.ro",
+        "RS": "www.google.rs",
+        "RU": "www.google.ru",
+        "RW": "www.google.rw",
+        "SA": "www.google.com.sa",
+        "SB": "www.google.com.sb",
+        "SC": "www.google.sc",
+        "SE": "www.google.se",
+        "SG": "www.google.com.sg",
+        "SH": "www.google.sh",
+        "SI": "www.google.si",
+        "SK": "www.google.sk",
+        "SL": "www.google.com.sl",
+        "SM": "www.google.sm",
+        "SN": "www.google.sn",
+        "SO": "www.google.so",
+        "SR": "www.google.sr",
+        "ST": "www.google.st",
+        "SV": "www.google.com.sv",
+        "TD": "www.google.td",
+        "TG": "www.google.tg",
+        "TH": "www.google.co.th",
+        "TJ": "www.google.com.tj",
+        "TL": "www.google.tl",
+        "TM": "www.google.tm",
+        "TN": "www.google.tn",
+        "TO": "www.google.to",
+        "TR": "www.google.com.tr",
+        "TT": "www.google.tt",
+        "TW": "www.google.com.tw",
+        "TZ": "www.google.co.tz",
+        "UA": "www.google.com.ua",
+        "UG": "www.google.co.ug",
+        "UK": "www.google.co.uk",
+        "UY": "www.google.com.uy",
+        "UZ": "www.google.co.uz",
+        "VC": "www.google.com.vc",
+        "VE": "www.google.co.ve",
+        "VG": "www.google.vg",
+        "VI": "www.google.co.vi",
+        "VN": "www.google.com.vn",
+        "VU": "www.google.vu",
+        "WS": "www.google.ws",
+        "ZA": "www.google.co.za",
+        "ZM": "www.google.co.zm",
+        "ZW": "www.google.co.zw"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "lang_af",
+      "ar": "lang_ar",
+      "be": "lang_be",
+      "bg": "lang_bg",
+      "ca": "lang_ca",
+      "cs": "lang_cs",
+      "da": "lang_da",
+      "de": "lang_de",
+      "el": "lang_el",
+      "en": "lang_en",
+      "eo": "lang_eo",
+      "es": "lang_es",
+      "et": "lang_et",
+      "fa": "lang_fa",
+      "fi": "lang_fi",
+      "fil": "lang_tl",
+      "fr": "lang_fr",
+      "he": "lang_iw",
+      "hi": "lang_hi",
+      "hr": "lang_hr",
+      "hu": "lang_hu",
+      "hy": "lang_hy",
+      "id": "lang_id",
+      "is": "lang_is",
+      "it": "lang_it",
+      "ja": "lang_ja",
+      "ko": "lang_ko",
+      "lt": "lang_lt",
+      "lv": "lang_lv",
+      "nb": "lang_no",
+      "nl": "lang_nl",
+      "pl": "lang_pl",
+      "pt": "lang_pt",
+      "ro": "lang_ro",
+      "ru": "lang_ru",
+      "sk": "lang_sk",
+      "sl": "lang_sl",
+      "sr": "lang_sr",
+      "sv": "lang_sv",
+      "sw": "lang_sw",
+      "th": "lang_th",
+      "tr": "lang_tr",
+      "uk": "lang_uk",
+      "vi": "lang_vi",
+      "zh": "lang_zh-CN",
+      "zh_Hans": "lang_zh-CN",
+      "zh_Hant": "lang_zh-TW"
+    },
+    "regions": {
+      "af-ZA": "ZA",
+      "ar-AE": "AE",
+      "ar-BH": "BH",
+      "ar-DJ": "DJ",
+      "ar-DZ": "DZ",
+      "ar-EG": "EG",
+      "ar-IL": "IL",
+      "ar-IQ": "IQ",
+      "ar-JO": "JO",
+      "ar-KW": "KW",
+      "ar-LB": "LB",
+      "ar-LY": "LY",
+      "ar-MA": "MA",
+      "ar-OM": "OM",
+      "ar-PS": "PS",
+      "ar-QA": "QA",
+      "ar-SA": "SA",
+      "ar-SO": "SO",
+      "ar-TD": "TD",
+      "ar-TN": "TN",
+      "be-BY": "BY",
+      "bg-BG": "BG",
+      "ca-AD": "AD",
+      "ca-ES": "ES",
+      "cs-CZ": "CZ",
+      "da-DK": "DK",
+      "de-AT": "AT",
+      "de-BE": "BE",
+      "de-CH": "CH",
+      "de-DE": "DE",
+      "de-LI": "LI",
+      "de-LU": "LU",
+      "el-CY": "CY",
+      "el-GR": "GR",
+      "en-AG": "AG",
+      "en-AI": "AI",
+      "en-AS": "AS",
+      "en-AU": "AU",
+      "en-BI": "BI",
+      "en-BS": "BS",
+      "en-BW": "BW",
+      "en-BZ": "BZ",
+      "en-CA": "CA",
+      "en-CK": "CK",
+      "en-CM": "CM",
+      "en-DM": "DM",
+      "en-FJ": "FJ",
+      "en-FM": "FM",
+      "en-GB": "GB",
+      "en-GG": "GG",
+      "en-GH": "GH",
+      "en-GI": "GI",
+      "en-GM": "GM",
+      "en-GY": "GY",
+      "en-HK": "HK",
+      "en-IE": "IE",
+      "en-IM": "IM",
+      "en-IN": "IN",
+      "en-JE": "JE",
+      "en-JM": "JM",
+      "en-KE": "KE",
+      "en-KI": "KI",
+      "en-LS": "LS",
+      "en-MG": "MG",
+      "en-MS": "MS",
+      "en-MT": "MT",
+      "en-MU": "MU",
+      "en-MW": "MW",
+      "en-NA": "NA",
+      "en-NG": "NG",
+      "en-NR": "NR",
+      "en-NU": "NU",
+      "en-NZ": "NZ",
+      "en-PG": "PG",
+      "en-PH": "PH",
+      "en-PK": "PK",
+      "en-PN": "PN",
+      "en-PR": "PR",
+      "en-RW": "RW",
+      "en-SB": "SB",
+      "en-SC": "SC",
+      "en-SG": "SG",
+      "en-SH": "SH",
+      "en-SL": "SL",
+      "en-TO": "TO",
+      "en-TT": "TT",
+      "en-TZ": "TZ",
+      "en-UG": "UG",
+      "en-US": "US",
+      "en-VC": "VC",
+      "en-VG": "VG",
+      "en-VI": "VI",
+      "en-VU": "VU",
+      "en-WS": "WS",
+      "en-ZA": "ZA",
+      "en-ZM": "ZM",
+      "en-ZW": "ZW",
+      "es-AR": "AR",
+      "es-BO": "BO",
+      "es-CL": "CL",
+      "es-CO": "CO",
+      "es-CR": "CR",
+      "es-CU": "CU",
+      "es-DO": "DO",
+      "es-EC": "EC",
+      "es-ES": "ES",
+      "es-GT": "GT",
+      "es-HN": "HN",
+      "es-MX": "MX",
+      "es-NI": "NI",
+      "es-PA": "PA",
+      "es-PE": "PE",
+      "es-PR": "PR",
+      "es-PY": "PY",
+      "es-SV": "SV",
+      "es-US": "US",
+      "es-UY": "UY",
+      "es-VE": "VE",
+      "et-EE": "EE",
+      "fa-AF": "AF",
+      "fi-FI": "FI",
+      "fil-PH": "PH",
+      "fr-BE": "BE",
+      "fr-BF": "BF",
+      "fr-BI": "BI",
+      "fr-BJ": "BJ",
+      "fr-CA": "CA",
+      "fr-CD": "CD",
+      "fr-CF": "CF",
+      "fr-CG": "CG",
+      "fr-CH": "CH",
+      "fr-CI": "CI",
+      "fr-CM": "CM",
+      "fr-DJ": "DJ",
+      "fr-DZ": "DZ",
+      "fr-FR": "FR",
+      "fr-GA": "GA",
+      "fr-HT": "HT",
+      "fr-LU": "LU",
+      "fr-MA": "MA",
+      "fr-MG": "MG",
+      "fr-ML": "ML",
+      "fr-MU": "MU",
+      "fr-NE": "NE",
+      "fr-RW": "RW",
+      "fr-SC": "SC",
+      "fr-SN": "SN",
+      "fr-TD": "TD",
+      "fr-TG": "TG",
+      "fr-TN": "TN",
+      "fr-VU": "VU",
+      "he-IL": "IL",
+      "hi-IN": "IN",
+      "hr-BA": "BA",
+      "hr-HR": "HR",
+      "hu-HU": "HU",
+      "hy-AM": "AM",
+      "id-ID": "ID",
+      "is-IS": "IS",
+      "it-CH": "CH",
+      "it-IT": "IT",
+      "it-SM": "SM",
+      "ja-JP": "JP",
+      "ko-KR": "KR",
+      "lt-LT": "LT",
+      "lv-LV": "LV",
+      "nb-NO": "NO",
+      "nl-BE": "BE",
+      "nl-NL": "NL",
+      "nl-SR": "SR",
+      "pl-PL": "PL",
+      "pt-AO": "AO",
+      "pt-BR": "BR",
+      "pt-CV": "CV",
+      "pt-MZ": "MZ",
+      "pt-PT": "PT",
+      "pt-ST": "ST",
+      "pt-TL": "TL",
+      "ro-MD": "MD",
+      "ro-RO": "RO",
+      "ru-BY": "BY",
+      "ru-KG": "KG",
+      "ru-KZ": "KZ",
+      "ru-RU": "RU",
+      "ru-UA": "UA",
+      "sk-SK": "SK",
+      "sl-SI": "SI",
+      "sr-BA": "BA",
+      "sr-RS": "RS",
+      "sv-FI": "FI",
+      "sv-SE": "SE",
+      "sw-CD": "CD",
+      "sw-KE": "KE",
+      "sw-TZ": "TZ",
+      "sw-UG": "UG",
+      "th-TH": "TH",
+      "tr-CY": "CY",
+      "tr-TR": "TR",
+      "uk-UA": "UA",
+      "vi-VN": "VN",
+      "zh-CN": "HK",
+      "zh-HK": "HK",
+      "zh-SG": "SG",
+      "zh-TW": "TW"
+    }
+  },
+  "google images": {
+    "all_locale": "ZZ",
+    "custom": {
+      "supported_domains": {
+        "AD": "www.google.ad",
+        "AE": "www.google.ae",
+        "AF": "www.google.com.af",
+        "AG": "www.google.com.ag",
+        "AI": "www.google.com.ai",
+        "AL": "www.google.al",
+        "AM": "www.google.am",
+        "AO": "www.google.co.ao",
+        "AR": "www.google.com.ar",
+        "AS": "www.google.as",
+        "AT": "www.google.at",
+        "AU": "www.google.com.au",
+        "AZ": "www.google.az",
+        "BA": "www.google.ba",
+        "BD": "www.google.com.bd",
+        "BE": "www.google.be",
+        "BF": "www.google.bf",
+        "BG": "www.google.bg",
+        "BH": "www.google.com.bh",
+        "BI": "www.google.bi",
+        "BJ": "www.google.bj",
+        "BN": "www.google.com.bn",
+        "BO": "www.google.com.bo",
+        "BR": "www.google.com.br",
+        "BS": "www.google.bs",
+        "BT": "www.google.bt",
+        "BW": "www.google.co.bw",
+        "BY": "www.google.by",
+        "BZ": "www.google.com.bz",
+        "CA": "www.google.ca",
+        "CAT": "www.google.cat",
+        "CD": "www.google.cd",
+        "CF": "www.google.cf",
+        "CG": "www.google.cg",
+        "CH": "www.google.ch",
+        "CI": "www.google.ci",
+        "CK": "www.google.co.ck",
+        "CL": "www.google.cl",
+        "CM": "www.google.cm",
+        "CN": "www.google.com.hk",
+        "CO": "www.google.com.co",
+        "CR": "www.google.co.cr",
+        "CU": "www.google.com.cu",
+        "CV": "www.google.cv",
+        "CY": "www.google.com.cy",
+        "CZ": "www.google.cz",
+        "DE": "www.google.de",
+        "DJ": "www.google.dj",
+        "DK": "www.google.dk",
+        "DM": "www.google.dm",
+        "DO": "www.google.com.do",
+        "DZ": "www.google.dz",
+        "EC": "www.google.com.ec",
+        "EE": "www.google.ee",
+        "EG": "www.google.com.eg",
+        "ES": "www.google.es",
+        "ET": "www.google.com.et",
+        "FI": "www.google.fi",
+        "FJ": "www.google.com.fj",
+        "FM": "www.google.fm",
+        "FR": "www.google.fr",
+        "GA": "www.google.ga",
+        "GE": "www.google.ge",
+        "GG": "www.google.gg",
+        "GH": "www.google.com.gh",
+        "GI": "www.google.com.gi",
+        "GL": "www.google.gl",
+        "GM": "www.google.gm",
+        "GR": "www.google.gr",
+        "GT": "www.google.com.gt",
+        "GY": "www.google.gy",
+        "HK": "www.google.com.hk",
+        "HN": "www.google.hn",
+        "HR": "www.google.hr",
+        "HT": "www.google.ht",
+        "HU": "www.google.hu",
+        "ID": "www.google.co.id",
+        "IE": "www.google.ie",
+        "IL": "www.google.co.il",
+        "IM": "www.google.im",
+        "IN": "www.google.co.in",
+        "IQ": "www.google.iq",
+        "IS": "www.google.is",
+        "IT": "www.google.it",
+        "JE": "www.google.je",
+        "JM": "www.google.com.jm",
+        "JO": "www.google.jo",
+        "JP": "www.google.co.jp",
+        "KE": "www.google.co.ke",
+        "KG": "www.google.kg",
+        "KH": "www.google.com.kh",
+        "KI": "www.google.ki",
+        "KR": "www.google.co.kr",
+        "KW": "www.google.com.kw",
+        "KZ": "www.google.kz",
+        "LA": "www.google.la",
+        "LB": "www.google.com.lb",
+        "LI": "www.google.li",
+        "LK": "www.google.lk",
+        "LS": "www.google.co.ls",
+        "LT": "www.google.lt",
+        "LU": "www.google.lu",
+        "LV": "www.google.lv",
+        "LY": "www.google.com.ly",
+        "MA": "www.google.co.ma",
+        "MD": "www.google.md",
+        "ME": "www.google.me",
+        "MG": "www.google.mg",
+        "MK": "www.google.mk",
+        "ML": "www.google.ml",
+        "MM": "www.google.com.mm",
+        "MN": "www.google.mn",
+        "MS": "www.google.ms",
+        "MT": "www.google.com.mt",
+        "MU": "www.google.mu",
+        "MV": "www.google.mv",
+        "MW": "www.google.mw",
+        "MX": "www.google.com.mx",
+        "MY": "www.google.com.my",
+        "MZ": "www.google.co.mz",
+        "NA": "www.google.com.na",
+        "NE": "www.google.ne",
+        "NG": "www.google.com.ng",
+        "NI": "www.google.com.ni",
+        "NL": "www.google.nl",
+        "NO": "www.google.no",
+        "NP": "www.google.com.np",
+        "NR": "www.google.nr",
+        "NU": "www.google.nu",
+        "NZ": "www.google.co.nz",
+        "OM": "www.google.com.om",
+        "PA": "www.google.com.pa",
+        "PE": "www.google.com.pe",
+        "PG": "www.google.com.pg",
+        "PH": "www.google.com.ph",
+        "PK": "www.google.com.pk",
+        "PL": "www.google.pl",
+        "PN": "www.google.pn",
+        "PR": "www.google.com.pr",
+        "PS": "www.google.ps",
+        "PT": "www.google.pt",
+        "PY": "www.google.com.py",
+        "QA": "www.google.com.qa",
+        "RO": "www.google.ro",
+        "RS": "www.google.rs",
+        "RU": "www.google.ru",
+        "RW": "www.google.rw",
+        "SA": "www.google.com.sa",
+        "SB": "www.google.com.sb",
+        "SC": "www.google.sc",
+        "SE": "www.google.se",
+        "SG": "www.google.com.sg",
+        "SH": "www.google.sh",
+        "SI": "www.google.si",
+        "SK": "www.google.sk",
+        "SL": "www.google.com.sl",
+        "SM": "www.google.sm",
+        "SN": "www.google.sn",
+        "SO": "www.google.so",
+        "SR": "www.google.sr",
+        "ST": "www.google.st",
+        "SV": "www.google.com.sv",
+        "TD": "www.google.td",
+        "TG": "www.google.tg",
+        "TH": "www.google.co.th",
+        "TJ": "www.google.com.tj",
+        "TL": "www.google.tl",
+        "TM": "www.google.tm",
+        "TN": "www.google.tn",
+        "TO": "www.google.to",
+        "TR": "www.google.com.tr",
+        "TT": "www.google.tt",
+        "TW": "www.google.com.tw",
+        "TZ": "www.google.co.tz",
+        "UA": "www.google.com.ua",
+        "UG": "www.google.co.ug",
+        "UK": "www.google.co.uk",
+        "UY": "www.google.com.uy",
+        "UZ": "www.google.co.uz",
+        "VC": "www.google.com.vc",
+        "VE": "www.google.co.ve",
+        "VG": "www.google.vg",
+        "VI": "www.google.co.vi",
+        "VN": "www.google.com.vn",
+        "VU": "www.google.vu",
+        "WS": "www.google.ws",
+        "ZA": "www.google.co.za",
+        "ZM": "www.google.co.zm",
+        "ZW": "www.google.co.zw"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "lang_af",
+      "ar": "lang_ar",
+      "be": "lang_be",
+      "bg": "lang_bg",
+      "ca": "lang_ca",
+      "cs": "lang_cs",
+      "da": "lang_da",
+      "de": "lang_de",
+      "el": "lang_el",
+      "en": "lang_en",
+      "eo": "lang_eo",
+      "es": "lang_es",
+      "et": "lang_et",
+      "fa": "lang_fa",
+      "fi": "lang_fi",
+      "fil": "lang_tl",
+      "fr": "lang_fr",
+      "he": "lang_iw",
+      "hi": "lang_hi",
+      "hr": "lang_hr",
+      "hu": "lang_hu",
+      "hy": "lang_hy",
+      "id": "lang_id",
+      "is": "lang_is",
+      "it": "lang_it",
+      "ja": "lang_ja",
+      "ko": "lang_ko",
+      "lt": "lang_lt",
+      "lv": "lang_lv",
+      "nb": "lang_no",
+      "nl": "lang_nl",
+      "pl": "lang_pl",
+      "pt": "lang_pt",
+      "ro": "lang_ro",
+      "ru": "lang_ru",
+      "sk": "lang_sk",
+      "sl": "lang_sl",
+      "sr": "lang_sr",
+      "sv": "lang_sv",
+      "sw": "lang_sw",
+      "th": "lang_th",
+      "tr": "lang_tr",
+      "uk": "lang_uk",
+      "vi": "lang_vi",
+      "zh": "lang_zh-CN",
+      "zh_Hans": "lang_zh-CN",
+      "zh_Hant": "lang_zh-TW"
+    },
+    "regions": {
+      "af-ZA": "ZA",
+      "ar-AE": "AE",
+      "ar-BH": "BH",
+      "ar-DJ": "DJ",
+      "ar-DZ": "DZ",
+      "ar-EG": "EG",
+      "ar-IL": "IL",
+      "ar-IQ": "IQ",
+      "ar-JO": "JO",
+      "ar-KW": "KW",
+      "ar-LB": "LB",
+      "ar-LY": "LY",
+      "ar-MA": "MA",
+      "ar-OM": "OM",
+      "ar-PS": "PS",
+      "ar-QA": "QA",
+      "ar-SA": "SA",
+      "ar-SO": "SO",
+      "ar-TD": "TD",
+      "ar-TN": "TN",
+      "be-BY": "BY",
+      "bg-BG": "BG",
+      "ca-AD": "AD",
+      "ca-ES": "ES",
+      "cs-CZ": "CZ",
+      "da-DK": "DK",
+      "de-AT": "AT",
+      "de-BE": "BE",
+      "de-CH": "CH",
+      "de-DE": "DE",
+      "de-LI": "LI",
+      "de-LU": "LU",
+      "el-CY": "CY",
+      "el-GR": "GR",
+      "en-AG": "AG",
+      "en-AI": "AI",
+      "en-AS": "AS",
+      "en-AU": "AU",
+      "en-BI": "BI",
+      "en-BS": "BS",
+      "en-BW": "BW",
+      "en-BZ": "BZ",
+      "en-CA": "CA",
+      "en-CK": "CK",
+      "en-CM": "CM",
+      "en-DM": "DM",
+      "en-FJ": "FJ",
+      "en-FM": "FM",
+      "en-GB": "GB",
+      "en-GG": "GG",
+      "en-GH": "GH",
+      "en-GI": "GI",
+      "en-GM": "GM",
+      "en-GY": "GY",
+      "en-HK": "HK",
+      "en-IE": "IE",
+      "en-IM": "IM",
+      "en-IN": "IN",
+      "en-JE": "JE",
+      "en-JM": "JM",
+      "en-KE": "KE",
+      "en-KI": "KI",
+      "en-LS": "LS",
+      "en-MG": "MG",
+      "en-MS": "MS",
+      "en-MT": "MT",
+      "en-MU": "MU",
+      "en-MW": "MW",
+      "en-NA": "NA",
+      "en-NG": "NG",
+      "en-NR": "NR",
+      "en-NU": "NU",
+      "en-NZ": "NZ",
+      "en-PG": "PG",
+      "en-PH": "PH",
+      "en-PK": "PK",
+      "en-PN": "PN",
+      "en-PR": "PR",
+      "en-RW": "RW",
+      "en-SB": "SB",
+      "en-SC": "SC",
+      "en-SG": "SG",
+      "en-SH": "SH",
+      "en-SL": "SL",
+      "en-TO": "TO",
+      "en-TT": "TT",
+      "en-TZ": "TZ",
+      "en-UG": "UG",
+      "en-US": "US",
+      "en-VC": "VC",
+      "en-VG": "VG",
+      "en-VI": "VI",
+      "en-VU": "VU",
+      "en-WS": "WS",
+      "en-ZA": "ZA",
+      "en-ZM": "ZM",
+      "en-ZW": "ZW",
+      "es-AR": "AR",
+      "es-BO": "BO",
+      "es-CL": "CL",
+      "es-CO": "CO",
+      "es-CR": "CR",
+      "es-CU": "CU",
+      "es-DO": "DO",
+      "es-EC": "EC",
+      "es-ES": "ES",
+      "es-GT": "GT",
+      "es-HN": "HN",
+      "es-MX": "MX",
+      "es-NI": "NI",
+      "es-PA": "PA",
+      "es-PE": "PE",
+      "es-PR": "PR",
+      "es-PY": "PY",
+      "es-SV": "SV",
+      "es-US": "US",
+      "es-UY": "UY",
+      "es-VE": "VE",
+      "et-EE": "EE",
+      "fa-AF": "AF",
+      "fi-FI": "FI",
+      "fil-PH": "PH",
+      "fr-BE": "BE",
+      "fr-BF": "BF",
+      "fr-BI": "BI",
+      "fr-BJ": "BJ",
+      "fr-CA": "CA",
+      "fr-CD": "CD",
+      "fr-CF": "CF",
+      "fr-CG": "CG",
+      "fr-CH": "CH",
+      "fr-CI": "CI",
+      "fr-CM": "CM",
+      "fr-DJ": "DJ",
+      "fr-DZ": "DZ",
+      "fr-FR": "FR",
+      "fr-GA": "GA",
+      "fr-HT": "HT",
+      "fr-LU": "LU",
+      "fr-MA": "MA",
+      "fr-MG": "MG",
+      "fr-ML": "ML",
+      "fr-MU": "MU",
+      "fr-NE": "NE",
+      "fr-RW": "RW",
+      "fr-SC": "SC",
+      "fr-SN": "SN",
+      "fr-TD": "TD",
+      "fr-TG": "TG",
+      "fr-TN": "TN",
+      "fr-VU": "VU",
+      "he-IL": "IL",
+      "hi-IN": "IN",
+      "hr-BA": "BA",
+      "hr-HR": "HR",
+      "hu-HU": "HU",
+      "hy-AM": "AM",
+      "id-ID": "ID",
+      "is-IS": "IS",
+      "it-CH": "CH",
+      "it-IT": "IT",
+      "it-SM": "SM",
+      "ja-JP": "JP",
+      "ko-KR": "KR",
+      "lt-LT": "LT",
+      "lv-LV": "LV",
+      "nb-NO": "NO",
+      "nl-BE": "BE",
+      "nl-NL": "NL",
+      "nl-SR": "SR",
+      "pl-PL": "PL",
+      "pt-AO": "AO",
+      "pt-BR": "BR",
+      "pt-CV": "CV",
+      "pt-MZ": "MZ",
+      "pt-PT": "PT",
+      "pt-ST": "ST",
+      "pt-TL": "TL",
+      "ro-MD": "MD",
+      "ro-RO": "RO",
+      "ru-BY": "BY",
+      "ru-KG": "KG",
+      "ru-KZ": "KZ",
+      "ru-RU": "RU",
+      "ru-UA": "UA",
+      "sk-SK": "SK",
+      "sl-SI": "SI",
+      "sr-BA": "BA",
+      "sr-RS": "RS",
+      "sv-FI": "FI",
+      "sv-SE": "SE",
+      "sw-CD": "CD",
+      "sw-KE": "KE",
+      "sw-TZ": "TZ",
+      "sw-UG": "UG",
+      "th-TH": "TH",
+      "tr-CY": "CY",
+      "tr-TR": "TR",
+      "uk-UA": "UA",
+      "vi-VN": "VN",
+      "zh-CN": "HK",
+      "zh-HK": "HK",
+      "zh-SG": "SG",
+      "zh-TW": "TW"
+    }
+  },
+  "google news": {
+    "all_locale": "ZZ",
+    "custom": {
+      "ceid": {
+        "ar-AE": "AE:ar",
+        "ar-EG": "EG:ar",
+        "ar-LB": "LB:ar",
+        "ar-SA": "SA:ar",
+        "bg-BG": "BG:bg",
+        "bn-BD": "BD:bn",
+        "bn-IN": "IN:bn",
+        "cs-CZ": "CZ:cs",
+        "de-AT": "AT:de",
+        "de-CH": "CH:de",
+        "de-DE": "DE:de",
+        "el-GR": "GR:el",
+        "en-AU": "AU:en",
+        "en-BW": "BW:en",
+        "en-CA": "CA:en",
+        "en-GB": "GB:en",
+        "en-GH": "GH:en",
+        "en-IE": "IE:en",
+        "en-IL": "IL:en",
+        "en-IN": "IN:en",
+        "en-KE": "KE:en",
+        "en-MY": "MY:en",
+        "en-NA": "NA:en",
+        "en-NG": "NG:en",
+        "en-NZ": "NZ:en",
+        "en-PH": "PH:en",
+        "en-PK": "PK:en",
+        "en-SG": "SG:en",
+        "en-TZ": "TZ:en",
+        "en-UG": "UG:en",
+        "en-US": "US:en",
+        "en-ZA": "ZA:en",
+        "en-ZW": "ZW:en",
+        "es-AR": "AR:es-419",
+        "es-CL": "CL:es-419",
+        "es-CO": "CO:es-419",
+        "es-CU": "CU:es-419",
+        "es-ES": "ES:es",
+        "es-MX": "MX:es-419",
+        "es-PE": "PE:es-419",
+        "es-US": "US:es-419",
+        "es-VE": "VE:es-419",
+        "fr-BE": "BE:fr",
+        "fr-CA": "CA:fr",
+        "fr-CH": "CH:fr",
+        "fr-FR": "FR:fr",
+        "fr-MA": "MA:fr",
+        "fr-SN": "SN:fr",
+        "he-IL": "IL:he",
+        "hi-IN": "IN:hi",
+        "hu-HU": "HU:hu",
+        "id-ID": "ID:id",
+        "it-IT": "IT:it",
+        "ja-JP": "JP:ja",
+        "ko-KR": "KR:ko",
+        "lt-LT": "LT:lt",
+        "lv-LV": "LV:lv",
+        "ml-IN": "IN:ml",
+        "mr-IN": "IN:mr",
+        "nb-NO": "NO:no",
+        "nl-BE": "BE:nl",
+        "nl-NL": "NL:nl",
+        "pl-PL": "PL:pl",
+        "pt-BR": "BR:pt-419",
+        "pt-PT": "PT:pt-150",
+        "ro-RO": "RO:ro",
+        "ru-RU": "RU:ru",
+        "ru-UA": "UA:ru",
+        "sk-SK": "SK:sk",
+        "sl-SI": "SI:sl",
+        "sr-RS": "RS:sr",
+        "sv-SE": "SE:sv",
+        "ta-IN": "IN:ta",
+        "te-IN": "IN:te",
+        "th-TH": "TH:th",
+        "tr-TR": "TR:tr",
+        "uk-UA": "UA:uk",
+        "vi-VN": "VN:vi",
+        "zh-CN": "CN:zh-Hans",
+        "zh-HK": "HK:zh-Hant",
+        "zh-TW": "TW:zh-Hant"
+      },
+      "supported_domains": {}
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "lang_af",
+      "ar": "lang_ar",
+      "be": "lang_be",
+      "bg": "lang_bg",
+      "ca": "lang_ca",
+      "cs": "lang_cs",
+      "da": "lang_da",
+      "de": "lang_de",
+      "el": "lang_el",
+      "en": "lang_en",
+      "eo": "lang_eo",
+      "es": "lang_es",
+      "et": "lang_et",
+      "fa": "lang_fa",
+      "fi": "lang_fi",
+      "fil": "lang_tl",
+      "fr": "lang_fr",
+      "he": "lang_iw",
+      "hi": "lang_hi",
+      "hr": "lang_hr",
+      "hu": "lang_hu",
+      "hy": "lang_hy",
+      "id": "lang_id",
+      "is": "lang_is",
+      "it": "lang_it",
+      "ja": "lang_ja",
+      "ko": "lang_ko",
+      "lt": "lang_lt",
+      "lv": "lang_lv",
+      "nb": "lang_no",
+      "nl": "lang_nl",
+      "pl": "lang_pl",
+      "pt": "lang_pt",
+      "ro": "lang_ro",
+      "ru": "lang_ru",
+      "sk": "lang_sk",
+      "sl": "lang_sl",
+      "sr": "lang_sr",
+      "sv": "lang_sv",
+      "sw": "lang_sw",
+      "th": "lang_th",
+      "tr": "lang_tr",
+      "uk": "lang_uk",
+      "vi": "lang_vi",
+      "zh": "lang_zh-CN",
+      "zh_Hans": "lang_zh-CN",
+      "zh_Hant": "lang_zh-TW"
+    },
+    "regions": {
+      "af-ZA": "ZA",
+      "ar-AE": "AE",
+      "ar-BH": "BH",
+      "ar-DJ": "DJ",
+      "ar-DZ": "DZ",
+      "ar-EG": "EG",
+      "ar-IL": "IL",
+      "ar-IQ": "IQ",
+      "ar-JO": "JO",
+      "ar-KW": "KW",
+      "ar-LB": "LB",
+      "ar-LY": "LY",
+      "ar-MA": "MA",
+      "ar-OM": "OM",
+      "ar-PS": "PS",
+      "ar-QA": "QA",
+      "ar-SA": "SA",
+      "ar-SO": "SO",
+      "ar-TD": "TD",
+      "ar-TN": "TN",
+      "be-BY": "BY",
+      "bg-BG": "BG",
+      "ca-AD": "AD",
+      "ca-ES": "ES",
+      "cs-CZ": "CZ",
+      "da-DK": "DK",
+      "de-AT": "AT",
+      "de-BE": "BE",
+      "de-CH": "CH",
+      "de-DE": "DE",
+      "de-LI": "LI",
+      "de-LU": "LU",
+      "el-CY": "CY",
+      "el-GR": "GR",
+      "en-AG": "AG",
+      "en-AI": "AI",
+      "en-AS": "AS",
+      "en-AU": "AU",
+      "en-BI": "BI",
+      "en-BS": "BS",
+      "en-BW": "BW",
+      "en-BZ": "BZ",
+      "en-CA": "CA",
+      "en-CK": "CK",
+      "en-CM": "CM",
+      "en-DM": "DM",
+      "en-FJ": "FJ",
+      "en-FM": "FM",
+      "en-GB": "GB",
+      "en-GG": "GG",
+      "en-GH": "GH",
+      "en-GI": "GI",
+      "en-GM": "GM",
+      "en-GY": "GY",
+      "en-HK": "HK",
+      "en-IE": "IE",
+      "en-IM": "IM",
+      "en-IN": "IN",
+      "en-JE": "JE",
+      "en-JM": "JM",
+      "en-KE": "KE",
+      "en-KI": "KI",
+      "en-LS": "LS",
+      "en-MG": "MG",
+      "en-MS": "MS",
+      "en-MT": "MT",
+      "en-MU": "MU",
+      "en-MW": "MW",
+      "en-NA": "NA",
+      "en-NG": "NG",
+      "en-NR": "NR",
+      "en-NU": "NU",
+      "en-NZ": "NZ",
+      "en-PG": "PG",
+      "en-PH": "PH",
+      "en-PK": "PK",
+      "en-PN": "PN",
+      "en-PR": "PR",
+      "en-RW": "RW",
+      "en-SB": "SB",
+      "en-SC": "SC",
+      "en-SG": "SG",
+      "en-SH": "SH",
+      "en-SL": "SL",
+      "en-TO": "TO",
+      "en-TT": "TT",
+      "en-TZ": "TZ",
+      "en-UG": "UG",
+      "en-US": "US",
+      "en-VC": "VC",
+      "en-VG": "VG",
+      "en-VI": "VI",
+      "en-VU": "VU",
+      "en-WS": "WS",
+      "en-ZA": "ZA",
+      "en-ZM": "ZM",
+      "en-ZW": "ZW",
+      "es-AR": "AR",
+      "es-BO": "BO",
+      "es-CL": "CL",
+      "es-CO": "CO",
+      "es-CR": "CR",
+      "es-CU": "CU",
+      "es-DO": "DO",
+      "es-EC": "EC",
+      "es-ES": "ES",
+      "es-GT": "GT",
+      "es-HN": "HN",
+      "es-MX": "MX",
+      "es-NI": "NI",
+      "es-PA": "PA",
+      "es-PE": "PE",
+      "es-PR": "PR",
+      "es-PY": "PY",
+      "es-SV": "SV",
+      "es-US": "US",
+      "es-UY": "UY",
+      "es-VE": "VE",
+      "et-EE": "EE",
+      "fa-AF": "AF",
+      "fi-FI": "FI",
+      "fil-PH": "PH",
+      "fr-BE": "BE",
+      "fr-BF": "BF",
+      "fr-BI": "BI",
+      "fr-BJ": "BJ",
+      "fr-CA": "CA",
+      "fr-CD": "CD",
+      "fr-CF": "CF",
+      "fr-CG": "CG",
+      "fr-CH": "CH",
+      "fr-CI": "CI",
+      "fr-CM": "CM",
+      "fr-DJ": "DJ",
+      "fr-DZ": "DZ",
+      "fr-FR": "FR",
+      "fr-GA": "GA",
+      "fr-HT": "HT",
+      "fr-LU": "LU",
+      "fr-MA": "MA",
+      "fr-MG": "MG",
+      "fr-ML": "ML",
+      "fr-MU": "MU",
+      "fr-NE": "NE",
+      "fr-RW": "RW",
+      "fr-SC": "SC",
+      "fr-SN": "SN",
+      "fr-TD": "TD",
+      "fr-TG": "TG",
+      "fr-TN": "TN",
+      "fr-VU": "VU",
+      "he-IL": "IL",
+      "hi-IN": "IN",
+      "hr-BA": "BA",
+      "hr-HR": "HR",
+      "hu-HU": "HU",
+      "hy-AM": "AM",
+      "id-ID": "ID",
+      "is-IS": "IS",
+      "it-CH": "CH",
+      "it-IT": "IT",
+      "it-SM": "SM",
+      "ja-JP": "JP",
+      "ko-KR": "KR",
+      "lt-LT": "LT",
+      "lv-LV": "LV",
+      "nb-NO": "NO",
+      "nl-BE": "BE",
+      "nl-NL": "NL",
+      "nl-SR": "SR",
+      "pl-PL": "PL",
+      "pt-AO": "AO",
+      "pt-BR": "BR",
+      "pt-CV": "CV",
+      "pt-MZ": "MZ",
+      "pt-PT": "PT",
+      "pt-ST": "ST",
+      "pt-TL": "TL",
+      "ro-MD": "MD",
+      "ro-RO": "RO",
+      "ru-BY": "BY",
+      "ru-KG": "KG",
+      "ru-KZ": "KZ",
+      "ru-RU": "RU",
+      "ru-UA": "UA",
+      "sk-SK": "SK",
+      "sl-SI": "SI",
+      "sr-BA": "BA",
+      "sr-RS": "RS",
+      "sv-FI": "FI",
+      "sv-SE": "SE",
+      "sw-CD": "CD",
+      "sw-KE": "KE",
+      "sw-TZ": "TZ",
+      "sw-UG": "UG",
+      "th-TH": "TH",
+      "tr-CY": "CY",
+      "tr-TR": "TR",
+      "uk-UA": "UA",
+      "vi-VN": "VN",
+      "zh-CN": "HK",
+      "zh-HK": "HK",
+      "zh-SG": "SG",
+      "zh-TW": "TW"
+    }
+  },
+  "google scholar": {
+    "all_locale": "ZZ",
+    "custom": {
+      "supported_domains": {
+        "AD": "www.google.ad",
+        "AE": "www.google.ae",
+        "AF": "www.google.com.af",
+        "AG": "www.google.com.ag",
+        "AI": "www.google.com.ai",
+        "AL": "www.google.al",
+        "AM": "www.google.am",
+        "AO": "www.google.co.ao",
+        "AR": "www.google.com.ar",
+        "AS": "www.google.as",
+        "AT": "www.google.at",
+        "AU": "www.google.com.au",
+        "AZ": "www.google.az",
+        "BA": "www.google.ba",
+        "BD": "www.google.com.bd",
+        "BE": "www.google.be",
+        "BF": "www.google.bf",
+        "BG": "www.google.bg",
+        "BH": "www.google.com.bh",
+        "BI": "www.google.bi",
+        "BJ": "www.google.bj",
+        "BN": "www.google.com.bn",
+        "BO": "www.google.com.bo",
+        "BR": "www.google.com.br",
+        "BS": "www.google.bs",
+        "BT": "www.google.bt",
+        "BW": "www.google.co.bw",
+        "BY": "www.google.by",
+        "BZ": "www.google.com.bz",
+        "CA": "www.google.ca",
+        "CAT": "www.google.cat",
+        "CD": "www.google.cd",
+        "CF": "www.google.cf",
+        "CG": "www.google.cg",
+        "CH": "www.google.ch",
+        "CI": "www.google.ci",
+        "CK": "www.google.co.ck",
+        "CL": "www.google.cl",
+        "CM": "www.google.cm",
+        "CN": "www.google.com.hk",
+        "CO": "www.google.com.co",
+        "CR": "www.google.co.cr",
+        "CU": "www.google.com.cu",
+        "CV": "www.google.cv",
+        "CY": "www.google.com.cy",
+        "CZ": "www.google.cz",
+        "DE": "www.google.de",
+        "DJ": "www.google.dj",
+        "DK": "www.google.dk",
+        "DM": "www.google.dm",
+        "DO": "www.google.com.do",
+        "DZ": "www.google.dz",
+        "EC": "www.google.com.ec",
+        "EE": "www.google.ee",
+        "EG": "www.google.com.eg",
+        "ES": "www.google.es",
+        "ET": "www.google.com.et",
+        "FI": "www.google.fi",
+        "FJ": "www.google.com.fj",
+        "FM": "www.google.fm",
+        "FR": "www.google.fr",
+        "GA": "www.google.ga",
+        "GE": "www.google.ge",
+        "GG": "www.google.gg",
+        "GH": "www.google.com.gh",
+        "GI": "www.google.com.gi",
+        "GL": "www.google.gl",
+        "GM": "www.google.gm",
+        "GR": "www.google.gr",
+        "GT": "www.google.com.gt",
+        "GY": "www.google.gy",
+        "HK": "www.google.com.hk",
+        "HN": "www.google.hn",
+        "HR": "www.google.hr",
+        "HT": "www.google.ht",
+        "HU": "www.google.hu",
+        "ID": "www.google.co.id",
+        "IE": "www.google.ie",
+        "IL": "www.google.co.il",
+        "IM": "www.google.im",
+        "IN": "www.google.co.in",
+        "IQ": "www.google.iq",
+        "IS": "www.google.is",
+        "IT": "www.google.it",
+        "JE": "www.google.je",
+        "JM": "www.google.com.jm",
+        "JO": "www.google.jo",
+        "JP": "www.google.co.jp",
+        "KE": "www.google.co.ke",
+        "KG": "www.google.kg",
+        "KH": "www.google.com.kh",
+        "KI": "www.google.ki",
+        "KR": "www.google.co.kr",
+        "KW": "www.google.com.kw",
+        "KZ": "www.google.kz",
+        "LA": "www.google.la",
+        "LB": "www.google.com.lb",
+        "LI": "www.google.li",
+        "LK": "www.google.lk",
+        "LS": "www.google.co.ls",
+        "LT": "www.google.lt",
+        "LU": "www.google.lu",
+        "LV": "www.google.lv",
+        "LY": "www.google.com.ly",
+        "MA": "www.google.co.ma",
+        "MD": "www.google.md",
+        "ME": "www.google.me",
+        "MG": "www.google.mg",
+        "MK": "www.google.mk",
+        "ML": "www.google.ml",
+        "MM": "www.google.com.mm",
+        "MN": "www.google.mn",
+        "MS": "www.google.ms",
+        "MT": "www.google.com.mt",
+        "MU": "www.google.mu",
+        "MV": "www.google.mv",
+        "MW": "www.google.mw",
+        "MX": "www.google.com.mx",
+        "MY": "www.google.com.my",
+        "MZ": "www.google.co.mz",
+        "NA": "www.google.com.na",
+        "NE": "www.google.ne",
+        "NG": "www.google.com.ng",
+        "NI": "www.google.com.ni",
+        "NL": "www.google.nl",
+        "NO": "www.google.no",
+        "NP": "www.google.com.np",
+        "NR": "www.google.nr",
+        "NU": "www.google.nu",
+        "NZ": "www.google.co.nz",
+        "OM": "www.google.com.om",
+        "PA": "www.google.com.pa",
+        "PE": "www.google.com.pe",
+        "PG": "www.google.com.pg",
+        "PH": "www.google.com.ph",
+        "PK": "www.google.com.pk",
+        "PL": "www.google.pl",
+        "PN": "www.google.pn",
+        "PR": "www.google.com.pr",
+        "PS": "www.google.ps",
+        "PT": "www.google.pt",
+        "PY": "www.google.com.py",
+        "QA": "www.google.com.qa",
+        "RO": "www.google.ro",
+        "RS": "www.google.rs",
+        "RU": "www.google.ru",
+        "RW": "www.google.rw",
+        "SA": "www.google.com.sa",
+        "SB": "www.google.com.sb",
+        "SC": "www.google.sc",
+        "SE": "www.google.se",
+        "SG": "www.google.com.sg",
+        "SH": "www.google.sh",
+        "SI": "www.google.si",
+        "SK": "www.google.sk",
+        "SL": "www.google.com.sl",
+        "SM": "www.google.sm",
+        "SN": "www.google.sn",
+        "SO": "www.google.so",
+        "SR": "www.google.sr",
+        "ST": "www.google.st",
+        "SV": "www.google.com.sv",
+        "TD": "www.google.td",
+        "TG": "www.google.tg",
+        "TH": "www.google.co.th",
+        "TJ": "www.google.com.tj",
+        "TL": "www.google.tl",
+        "TM": "www.google.tm",
+        "TN": "www.google.tn",
+        "TO": "www.google.to",
+        "TR": "www.google.com.tr",
+        "TT": "www.google.tt",
+        "TW": "www.google.com.tw",
+        "TZ": "www.google.co.tz",
+        "UA": "www.google.com.ua",
+        "UG": "www.google.co.ug",
+        "UK": "www.google.co.uk",
+        "UY": "www.google.com.uy",
+        "UZ": "www.google.co.uz",
+        "VC": "www.google.com.vc",
+        "VE": "www.google.co.ve",
+        "VG": "www.google.vg",
+        "VI": "www.google.co.vi",
+        "VN": "www.google.com.vn",
+        "VU": "www.google.vu",
+        "WS": "www.google.ws",
+        "ZA": "www.google.co.za",
+        "ZM": "www.google.co.zm",
+        "ZW": "www.google.co.zw"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "lang_af",
+      "ar": "lang_ar",
+      "be": "lang_be",
+      "bg": "lang_bg",
+      "ca": "lang_ca",
+      "cs": "lang_cs",
+      "da": "lang_da",
+      "de": "lang_de",
+      "el": "lang_el",
+      "en": "lang_en",
+      "eo": "lang_eo",
+      "es": "lang_es",
+      "et": "lang_et",
+      "fa": "lang_fa",
+      "fi": "lang_fi",
+      "fil": "lang_tl",
+      "fr": "lang_fr",
+      "he": "lang_iw",
+      "hi": "lang_hi",
+      "hr": "lang_hr",
+      "hu": "lang_hu",
+      "hy": "lang_hy",
+      "id": "lang_id",
+      "is": "lang_is",
+      "it": "lang_it",
+      "ja": "lang_ja",
+      "ko": "lang_ko",
+      "lt": "lang_lt",
+      "lv": "lang_lv",
+      "nb": "lang_no",
+      "nl": "lang_nl",
+      "pl": "lang_pl",
+      "pt": "lang_pt",
+      "ro": "lang_ro",
+      "ru": "lang_ru",
+      "sk": "lang_sk",
+      "sl": "lang_sl",
+      "sr": "lang_sr",
+      "sv": "lang_sv",
+      "sw": "lang_sw",
+      "th": "lang_th",
+      "tr": "lang_tr",
+      "uk": "lang_uk",
+      "vi": "lang_vi",
+      "zh": "lang_zh-CN",
+      "zh_Hans": "lang_zh-CN",
+      "zh_Hant": "lang_zh-TW"
+    },
+    "regions": {
+      "af-ZA": "ZA",
+      "ar-AE": "AE",
+      "ar-BH": "BH",
+      "ar-DJ": "DJ",
+      "ar-DZ": "DZ",
+      "ar-EG": "EG",
+      "ar-IL": "IL",
+      "ar-IQ": "IQ",
+      "ar-JO": "JO",
+      "ar-KW": "KW",
+      "ar-LB": "LB",
+      "ar-LY": "LY",
+      "ar-MA": "MA",
+      "ar-OM": "OM",
+      "ar-PS": "PS",
+      "ar-QA": "QA",
+      "ar-SA": "SA",
+      "ar-SO": "SO",
+      "ar-TD": "TD",
+      "ar-TN": "TN",
+      "be-BY": "BY",
+      "bg-BG": "BG",
+      "ca-AD": "AD",
+      "ca-ES": "ES",
+      "cs-CZ": "CZ",
+      "da-DK": "DK",
+      "de-AT": "AT",
+      "de-BE": "BE",
+      "de-CH": "CH",
+      "de-DE": "DE",
+      "de-LI": "LI",
+      "de-LU": "LU",
+      "el-CY": "CY",
+      "el-GR": "GR",
+      "en-AG": "AG",
+      "en-AI": "AI",
+      "en-AS": "AS",
+      "en-AU": "AU",
+      "en-BI": "BI",
+      "en-BS": "BS",
+      "en-BW": "BW",
+      "en-BZ": "BZ",
+      "en-CA": "CA",
+      "en-CK": "CK",
+      "en-CM": "CM",
+      "en-DM": "DM",
+      "en-FJ": "FJ",
+      "en-FM": "FM",
+      "en-GB": "GB",
+      "en-GG": "GG",
+      "en-GH": "GH",
+      "en-GI": "GI",
+      "en-GM": "GM",
+      "en-GY": "GY",
+      "en-HK": "HK",
+      "en-IE": "IE",
+      "en-IM": "IM",
+      "en-IN": "IN",
+      "en-JE": "JE",
+      "en-JM": "JM",
+      "en-KE": "KE",
+      "en-KI": "KI",
+      "en-LS": "LS",
+      "en-MG": "MG",
+      "en-MS": "MS",
+      "en-MT": "MT",
+      "en-MU": "MU",
+      "en-MW": "MW",
+      "en-NA": "NA",
+      "en-NG": "NG",
+      "en-NR": "NR",
+      "en-NU": "NU",
+      "en-NZ": "NZ",
+      "en-PG": "PG",
+      "en-PH": "PH",
+      "en-PK": "PK",
+      "en-PN": "PN",
+      "en-PR": "PR",
+      "en-RW": "RW",
+      "en-SB": "SB",
+      "en-SC": "SC",
+      "en-SG": "SG",
+      "en-SH": "SH",
+      "en-SL": "SL",
+      "en-TO": "TO",
+      "en-TT": "TT",
+      "en-TZ": "TZ",
+      "en-UG": "UG",
+      "en-US": "US",
+      "en-VC": "VC",
+      "en-VG": "VG",
+      "en-VI": "VI",
+      "en-VU": "VU",
+      "en-WS": "WS",
+      "en-ZA": "ZA",
+      "en-ZM": "ZM",
+      "en-ZW": "ZW",
+      "es-AR": "AR",
+      "es-BO": "BO",
+      "es-CL": "CL",
+      "es-CO": "CO",
+      "es-CR": "CR",
+      "es-CU": "CU",
+      "es-DO": "DO",
+      "es-EC": "EC",
+      "es-ES": "ES",
+      "es-GT": "GT",
+      "es-HN": "HN",
+      "es-MX": "MX",
+      "es-NI": "NI",
+      "es-PA": "PA",
+      "es-PE": "PE",
+      "es-PR": "PR",
+      "es-PY": "PY",
+      "es-SV": "SV",
+      "es-US": "US",
+      "es-UY": "UY",
+      "es-VE": "VE",
+      "et-EE": "EE",
+      "fa-AF": "AF",
+      "fi-FI": "FI",
+      "fil-PH": "PH",
+      "fr-BE": "BE",
+      "fr-BF": "BF",
+      "fr-BI": "BI",
+      "fr-BJ": "BJ",
+      "fr-CA": "CA",
+      "fr-CD": "CD",
+      "fr-CF": "CF",
+      "fr-CG": "CG",
+      "fr-CH": "CH",
+      "fr-CI": "CI",
+      "fr-CM": "CM",
+      "fr-DJ": "DJ",
+      "fr-DZ": "DZ",
+      "fr-FR": "FR",
+      "fr-GA": "GA",
+      "fr-HT": "HT",
+      "fr-LU": "LU",
+      "fr-MA": "MA",
+      "fr-MG": "MG",
+      "fr-ML": "ML",
+      "fr-MU": "MU",
+      "fr-NE": "NE",
+      "fr-RW": "RW",
+      "fr-SC": "SC",
+      "fr-SN": "SN",
+      "fr-TD": "TD",
+      "fr-TG": "TG",
+      "fr-TN": "TN",
+      "fr-VU": "VU",
+      "he-IL": "IL",
+      "hi-IN": "IN",
+      "hr-BA": "BA",
+      "hr-HR": "HR",
+      "hu-HU": "HU",
+      "hy-AM": "AM",
+      "id-ID": "ID",
+      "is-IS": "IS",
+      "it-CH": "CH",
+      "it-IT": "IT",
+      "it-SM": "SM",
+      "ja-JP": "JP",
+      "ko-KR": "KR",
+      "lt-LT": "LT",
+      "lv-LV": "LV",
+      "nb-NO": "NO",
+      "nl-BE": "BE",
+      "nl-NL": "NL",
+      "nl-SR": "SR",
+      "pl-PL": "PL",
+      "pt-AO": "AO",
+      "pt-BR": "BR",
+      "pt-CV": "CV",
+      "pt-MZ": "MZ",
+      "pt-PT": "PT",
+      "pt-ST": "ST",
+      "pt-TL": "TL",
+      "ro-MD": "MD",
+      "ro-RO": "RO",
+      "ru-BY": "BY",
+      "ru-KG": "KG",
+      "ru-KZ": "KZ",
+      "ru-RU": "RU",
+      "ru-UA": "UA",
+      "sk-SK": "SK",
+      "sl-SI": "SI",
+      "sr-BA": "BA",
+      "sr-RS": "RS",
+      "sv-FI": "FI",
+      "sv-SE": "SE",
+      "sw-CD": "CD",
+      "sw-KE": "KE",
+      "sw-TZ": "TZ",
+      "sw-UG": "UG",
+      "th-TH": "TH",
+      "tr-CY": "CY",
+      "tr-TR": "TR",
+      "uk-UA": "UA",
+      "vi-VN": "VN",
+      "zh-CN": "HK",
+      "zh-HK": "HK",
+      "zh-SG": "SG",
+      "zh-TW": "TW"
+    }
+  },
+  "google videos": {
+    "all_locale": "ZZ",
+    "custom": {
+      "supported_domains": {
+        "AD": "www.google.ad",
+        "AE": "www.google.ae",
+        "AF": "www.google.com.af",
+        "AG": "www.google.com.ag",
+        "AI": "www.google.com.ai",
+        "AL": "www.google.al",
+        "AM": "www.google.am",
+        "AO": "www.google.co.ao",
+        "AR": "www.google.com.ar",
+        "AS": "www.google.as",
+        "AT": "www.google.at",
+        "AU": "www.google.com.au",
+        "AZ": "www.google.az",
+        "BA": "www.google.ba",
+        "BD": "www.google.com.bd",
+        "BE": "www.google.be",
+        "BF": "www.google.bf",
+        "BG": "www.google.bg",
+        "BH": "www.google.com.bh",
+        "BI": "www.google.bi",
+        "BJ": "www.google.bj",
+        "BN": "www.google.com.bn",
+        "BO": "www.google.com.bo",
+        "BR": "www.google.com.br",
+        "BS": "www.google.bs",
+        "BT": "www.google.bt",
+        "BW": "www.google.co.bw",
+        "BY": "www.google.by",
+        "BZ": "www.google.com.bz",
+        "CA": "www.google.ca",
+        "CAT": "www.google.cat",
+        "CD": "www.google.cd",
+        "CF": "www.google.cf",
+        "CG": "www.google.cg",
+        "CH": "www.google.ch",
+        "CI": "www.google.ci",
+        "CK": "www.google.co.ck",
+        "CL": "www.google.cl",
+        "CM": "www.google.cm",
+        "CN": "www.google.com.hk",
+        "CO": "www.google.com.co",
+        "CR": "www.google.co.cr",
+        "CU": "www.google.com.cu",
+        "CV": "www.google.cv",
+        "CY": "www.google.com.cy",
+        "CZ": "www.google.cz",
+        "DE": "www.google.de",
+        "DJ": "www.google.dj",
+        "DK": "www.google.dk",
+        "DM": "www.google.dm",
+        "DO": "www.google.com.do",
+        "DZ": "www.google.dz",
+        "EC": "www.google.com.ec",
+        "EE": "www.google.ee",
+        "EG": "www.google.com.eg",
+        "ES": "www.google.es",
+        "ET": "www.google.com.et",
+        "FI": "www.google.fi",
+        "FJ": "www.google.com.fj",
+        "FM": "www.google.fm",
+        "FR": "www.google.fr",
+        "GA": "www.google.ga",
+        "GE": "www.google.ge",
+        "GG": "www.google.gg",
+        "GH": "www.google.com.gh",
+        "GI": "www.google.com.gi",
+        "GL": "www.google.gl",
+        "GM": "www.google.gm",
+        "GR": "www.google.gr",
+        "GT": "www.google.com.gt",
+        "GY": "www.google.gy",
+        "HK": "www.google.com.hk",
+        "HN": "www.google.hn",
+        "HR": "www.google.hr",
+        "HT": "www.google.ht",
+        "HU": "www.google.hu",
+        "ID": "www.google.co.id",
+        "IE": "www.google.ie",
+        "IL": "www.google.co.il",
+        "IM": "www.google.im",
+        "IN": "www.google.co.in",
+        "IQ": "www.google.iq",
+        "IS": "www.google.is",
+        "IT": "www.google.it",
+        "JE": "www.google.je",
+        "JM": "www.google.com.jm",
+        "JO": "www.google.jo",
+        "JP": "www.google.co.jp",
+        "KE": "www.google.co.ke",
+        "KG": "www.google.kg",
+        "KH": "www.google.com.kh",
+        "KI": "www.google.ki",
+        "KR": "www.google.co.kr",
+        "KW": "www.google.com.kw",
+        "KZ": "www.google.kz",
+        "LA": "www.google.la",
+        "LB": "www.google.com.lb",
+        "LI": "www.google.li",
+        "LK": "www.google.lk",
+        "LS": "www.google.co.ls",
+        "LT": "www.google.lt",
+        "LU": "www.google.lu",
+        "LV": "www.google.lv",
+        "LY": "www.google.com.ly",
+        "MA": "www.google.co.ma",
+        "MD": "www.google.md",
+        "ME": "www.google.me",
+        "MG": "www.google.mg",
+        "MK": "www.google.mk",
+        "ML": "www.google.ml",
+        "MM": "www.google.com.mm",
+        "MN": "www.google.mn",
+        "MS": "www.google.ms",
+        "MT": "www.google.com.mt",
+        "MU": "www.google.mu",
+        "MV": "www.google.mv",
+        "MW": "www.google.mw",
+        "MX": "www.google.com.mx",
+        "MY": "www.google.com.my",
+        "MZ": "www.google.co.mz",
+        "NA": "www.google.com.na",
+        "NE": "www.google.ne",
+        "NG": "www.google.com.ng",
+        "NI": "www.google.com.ni",
+        "NL": "www.google.nl",
+        "NO": "www.google.no",
+        "NP": "www.google.com.np",
+        "NR": "www.google.nr",
+        "NU": "www.google.nu",
+        "NZ": "www.google.co.nz",
+        "OM": "www.google.com.om",
+        "PA": "www.google.com.pa",
+        "PE": "www.google.com.pe",
+        "PG": "www.google.com.pg",
+        "PH": "www.google.com.ph",
+        "PK": "www.google.com.pk",
+        "PL": "www.google.pl",
+        "PN": "www.google.pn",
+        "PR": "www.google.com.pr",
+        "PS": "www.google.ps",
+        "PT": "www.google.pt",
+        "PY": "www.google.com.py",
+        "QA": "www.google.com.qa",
+        "RO": "www.google.ro",
+        "RS": "www.google.rs",
+        "RU": "www.google.ru",
+        "RW": "www.google.rw",
+        "SA": "www.google.com.sa",
+        "SB": "www.google.com.sb",
+        "SC": "www.google.sc",
+        "SE": "www.google.se",
+        "SG": "www.google.com.sg",
+        "SH": "www.google.sh",
+        "SI": "www.google.si",
+        "SK": "www.google.sk",
+        "SL": "www.google.com.sl",
+        "SM": "www.google.sm",
+        "SN": "www.google.sn",
+        "SO": "www.google.so",
+        "SR": "www.google.sr",
+        "ST": "www.google.st",
+        "SV": "www.google.com.sv",
+        "TD": "www.google.td",
+        "TG": "www.google.tg",
+        "TH": "www.google.co.th",
+        "TJ": "www.google.com.tj",
+        "TL": "www.google.tl",
+        "TM": "www.google.tm",
+        "TN": "www.google.tn",
+        "TO": "www.google.to",
+        "TR": "www.google.com.tr",
+        "TT": "www.google.tt",
+        "TW": "www.google.com.tw",
+        "TZ": "www.google.co.tz",
+        "UA": "www.google.com.ua",
+        "UG": "www.google.co.ug",
+        "UK": "www.google.co.uk",
+        "UY": "www.google.com.uy",
+        "UZ": "www.google.co.uz",
+        "VC": "www.google.com.vc",
+        "VE": "www.google.co.ve",
+        "VG": "www.google.vg",
+        "VI": "www.google.co.vi",
+        "VN": "www.google.com.vn",
+        "VU": "www.google.vu",
+        "WS": "www.google.ws",
+        "ZA": "www.google.co.za",
+        "ZM": "www.google.co.zm",
+        "ZW": "www.google.co.zw"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "lang_af",
+      "ar": "lang_ar",
+      "be": "lang_be",
+      "bg": "lang_bg",
+      "ca": "lang_ca",
+      "cs": "lang_cs",
+      "da": "lang_da",
+      "de": "lang_de",
+      "el": "lang_el",
+      "en": "lang_en",
+      "eo": "lang_eo",
+      "es": "lang_es",
+      "et": "lang_et",
+      "fa": "lang_fa",
+      "fi": "lang_fi",
+      "fil": "lang_tl",
+      "fr": "lang_fr",
+      "he": "lang_iw",
+      "hi": "lang_hi",
+      "hr": "lang_hr",
+      "hu": "lang_hu",
+      "hy": "lang_hy",
+      "id": "lang_id",
+      "is": "lang_is",
+      "it": "lang_it",
+      "ja": "lang_ja",
+      "ko": "lang_ko",
+      "lt": "lang_lt",
+      "lv": "lang_lv",
+      "nb": "lang_no",
+      "nl": "lang_nl",
+      "pl": "lang_pl",
+      "pt": "lang_pt",
+      "ro": "lang_ro",
+      "ru": "lang_ru",
+      "sk": "lang_sk",
+      "sl": "lang_sl",
+      "sr": "lang_sr",
+      "sv": "lang_sv",
+      "sw": "lang_sw",
+      "th": "lang_th",
+      "tr": "lang_tr",
+      "uk": "lang_uk",
+      "vi": "lang_vi",
+      "zh": "lang_zh-CN",
+      "zh_Hans": "lang_zh-CN",
+      "zh_Hant": "lang_zh-TW"
+    },
+    "regions": {
+      "af-ZA": "ZA",
+      "ar-AE": "AE",
+      "ar-BH": "BH",
+      "ar-DJ": "DJ",
+      "ar-DZ": "DZ",
+      "ar-EG": "EG",
+      "ar-IL": "IL",
+      "ar-IQ": "IQ",
+      "ar-JO": "JO",
+      "ar-KW": "KW",
+      "ar-LB": "LB",
+      "ar-LY": "LY",
+      "ar-MA": "MA",
+      "ar-OM": "OM",
+      "ar-PS": "PS",
+      "ar-QA": "QA",
+      "ar-SA": "SA",
+      "ar-SO": "SO",
+      "ar-TD": "TD",
+      "ar-TN": "TN",
+      "be-BY": "BY",
+      "bg-BG": "BG",
+      "ca-AD": "AD",
+      "ca-ES": "ES",
+      "cs-CZ": "CZ",
+      "da-DK": "DK",
+      "de-AT": "AT",
+      "de-BE": "BE",
+      "de-CH": "CH",
+      "de-DE": "DE",
+      "de-LI": "LI",
+      "de-LU": "LU",
+      "el-CY": "CY",
+      "el-GR": "GR",
+      "en-AG": "AG",
+      "en-AI": "AI",
+      "en-AS": "AS",
+      "en-AU": "AU",
+      "en-BI": "BI",
+      "en-BS": "BS",
+      "en-BW": "BW",
+      "en-BZ": "BZ",
+      "en-CA": "CA",
+      "en-CK": "CK",
+      "en-CM": "CM",
+      "en-DM": "DM",
+      "en-FJ": "FJ",
+      "en-FM": "FM",
+      "en-GB": "GB",
+      "en-GG": "GG",
+      "en-GH": "GH",
+      "en-GI": "GI",
+      "en-GM": "GM",
+      "en-GY": "GY",
+      "en-HK": "HK",
+      "en-IE": "IE",
+      "en-IM": "IM",
+      "en-IN": "IN",
+      "en-JE": "JE",
+      "en-JM": "JM",
+      "en-KE": "KE",
+      "en-KI": "KI",
+      "en-LS": "LS",
+      "en-MG": "MG",
+      "en-MS": "MS",
+      "en-MT": "MT",
+      "en-MU": "MU",
+      "en-MW": "MW",
+      "en-NA": "NA",
+      "en-NG": "NG",
+      "en-NR": "NR",
+      "en-NU": "NU",
+      "en-NZ": "NZ",
+      "en-PG": "PG",
+      "en-PH": "PH",
+      "en-PK": "PK",
+      "en-PN": "PN",
+      "en-PR": "PR",
+      "en-RW": "RW",
+      "en-SB": "SB",
+      "en-SC": "SC",
+      "en-SG": "SG",
+      "en-SH": "SH",
+      "en-SL": "SL",
+      "en-TO": "TO",
+      "en-TT": "TT",
+      "en-TZ": "TZ",
+      "en-UG": "UG",
+      "en-US": "US",
+      "en-VC": "VC",
+      "en-VG": "VG",
+      "en-VI": "VI",
+      "en-VU": "VU",
+      "en-WS": "WS",
+      "en-ZA": "ZA",
+      "en-ZM": "ZM",
+      "en-ZW": "ZW",
+      "es-AR": "AR",
+      "es-BO": "BO",
+      "es-CL": "CL",
+      "es-CO": "CO",
+      "es-CR": "CR",
+      "es-CU": "CU",
+      "es-DO": "DO",
+      "es-EC": "EC",
+      "es-ES": "ES",
+      "es-GT": "GT",
+      "es-HN": "HN",
+      "es-MX": "MX",
+      "es-NI": "NI",
+      "es-PA": "PA",
+      "es-PE": "PE",
+      "es-PR": "PR",
+      "es-PY": "PY",
+      "es-SV": "SV",
+      "es-US": "US",
+      "es-UY": "UY",
+      "es-VE": "VE",
+      "et-EE": "EE",
+      "fa-AF": "AF",
+      "fi-FI": "FI",
+      "fil-PH": "PH",
+      "fr-BE": "BE",
+      "fr-BF": "BF",
+      "fr-BI": "BI",
+      "fr-BJ": "BJ",
+      "fr-CA": "CA",
+      "fr-CD": "CD",
+      "fr-CF": "CF",
+      "fr-CG": "CG",
+      "fr-CH": "CH",
+      "fr-CI": "CI",
+      "fr-CM": "CM",
+      "fr-DJ": "DJ",
+      "fr-DZ": "DZ",
+      "fr-FR": "FR",
+      "fr-GA": "GA",
+      "fr-HT": "HT",
+      "fr-LU": "LU",
+      "fr-MA": "MA",
+      "fr-MG": "MG",
+      "fr-ML": "ML",
+      "fr-MU": "MU",
+      "fr-NE": "NE",
+      "fr-RW": "RW",
+      "fr-SC": "SC",
+      "fr-SN": "SN",
+      "fr-TD": "TD",
+      "fr-TG": "TG",
+      "fr-TN": "TN",
+      "fr-VU": "VU",
+      "he-IL": "IL",
+      "hi-IN": "IN",
+      "hr-BA": "BA",
+      "hr-HR": "HR",
+      "hu-HU": "HU",
+      "hy-AM": "AM",
+      "id-ID": "ID",
+      "is-IS": "IS",
+      "it-CH": "CH",
+      "it-IT": "IT",
+      "it-SM": "SM",
+      "ja-JP": "JP",
+      "ko-KR": "KR",
+      "lt-LT": "LT",
+      "lv-LV": "LV",
+      "nb-NO": "NO",
+      "nl-BE": "BE",
+      "nl-NL": "NL",
+      "nl-SR": "SR",
+      "pl-PL": "PL",
+      "pt-AO": "AO",
+      "pt-BR": "BR",
+      "pt-CV": "CV",
+      "pt-MZ": "MZ",
+      "pt-PT": "PT",
+      "pt-ST": "ST",
+      "pt-TL": "TL",
+      "ro-MD": "MD",
+      "ro-RO": "RO",
+      "ru-BY": "BY",
+      "ru-KG": "KG",
+      "ru-KZ": "KZ",
+      "ru-RU": "RU",
+      "ru-UA": "UA",
+      "sk-SK": "SK",
+      "sl-SI": "SI",
+      "sr-BA": "BA",
+      "sr-RS": "RS",
+      "sv-FI": "FI",
+      "sv-SE": "SE",
+      "sw-CD": "CD",
+      "sw-KE": "KE",
+      "sw-TZ": "TZ",
+      "sw-UG": "UG",
+      "th-TH": "TH",
+      "tr-CY": "CY",
+      "tr-TR": "TR",
+      "uk-UA": "UA",
+      "vi-VN": "VN",
+      "zh-CN": "HK",
+      "zh-HK": "HK",
+      "zh-SG": "SG",
+      "zh-TW": "TW"
+    }
+  },
+  "peertube": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ca": "ca",
+      "cs": "cs",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "eo": "eo",
+      "es": "es",
+      "eu": "eu",
+      "fi": "fi",
+      "fr": "fr",
+      "gd": "gd",
+      "it": "it",
+      "ja": "ja",
+      "nl": "nl",
+      "pl": "pl",
+      "pt": "pt",
+      "ru": "ru",
+      "sv": "sv",
+      "zh": "zh",
+      "zh_Hans": "zh",
+      "zh_Hant": "zh"
+    },
+    "regions": {}
+  },
+  "qwant": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {},
+    "regions": {
+      "bg-BG": "bg_BG",
+      "ca-ES": "ca_ES",
+      "cs-CZ": "cs_CZ",
+      "da-DK": "da_DK",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "et-EE": "et_EE",
+      "fi-FI": "fi_FI",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "hu-HU": "hu_HU",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ko-KR": "ko_KR",
+      "nb-NO": "nb_NO",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "sv-SE": "sv_SE",
+      "th-TH": "th_TH",
+      "zh-CN": "zh_CN",
+      "zh-HK": "zh_HK"
+    }
+  },
+  "qwant images": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {},
+    "regions": {
+      "bg-BG": "bg_BG",
+      "ca-ES": "ca_ES",
+      "cs-CZ": "cs_CZ",
+      "da-DK": "da_DK",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "et-EE": "et_EE",
+      "fi-FI": "fi_FI",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "hu-HU": "hu_HU",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ko-KR": "ko_KR",
+      "nb-NO": "nb_NO",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "sv-SE": "sv_SE",
+      "th-TH": "th_TH",
+      "zh-CN": "zh_CN",
+      "zh-HK": "zh_HK"
+    }
+  },
+  "qwant news": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {},
+    "regions": {
+      "ca-ES": "ca_ES",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pt-PT": "pt_PT"
+    }
+  },
+  "qwant videos": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {},
+    "regions": {
+      "bg-BG": "bg_BG",
+      "ca-ES": "ca_ES",
+      "cs-CZ": "cs_CZ",
+      "da-DK": "da_DK",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en_GB",
+      "en-IE": "en_IE",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-US": "en_US",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "et-EE": "et_EE",
+      "fi-FI": "fi_FI",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "hu-HU": "hu_HU",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ko-KR": "ko_KR",
+      "nb-NO": "nb_NO",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "sv-SE": "sv_SE",
+      "th-TH": "th_TH",
+      "zh-CN": "zh_CN",
+      "zh-HK": "zh_HK"
+    }
+  },
+  "sepiasearch": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ca": "ca",
+      "cs": "cs",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "eo": "eo",
+      "es": "es",
+      "eu": "eu",
+      "fi": "fi",
+      "fr": "fr",
+      "gd": "gd",
+      "it": "it",
+      "ja": "ja",
+      "nl": "nl",
+      "pl": "pl",
+      "pt": "pt",
+      "ru": "ru",
+      "sv": "sv",
+      "zh": "zh",
+      "zh_Hans": "zh",
+      "zh_Hant": "zh"
+    },
+    "regions": {}
+  },
+  "startpage": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "afrikaans",
+      "am": "amharic",
+      "ar": "arabic",
+      "az": "azerbaijani",
+      "be": "belarusian",
+      "bg": "bulgarian",
+      "bn": "bengali",
+      "bs": "bosnian",
+      "ca": "catalan",
+      "cs": "czech",
+      "cy": "welsh",
+      "da": "dansk",
+      "de": "deutsch",
+      "el": "greek",
+      "en": "english",
+      "eo": "esperanto",
+      "es": "espanol",
+      "et": "estonian",
+      "eu": "basque",
+      "fa": "persian",
+      "fi": "suomi",
+      "fo": "faroese",
+      "fr": "francais",
+      "fy": "frisian",
+      "ga": "irish",
+      "gd": "gaelic",
+      "gl": "galician",
+      "gu": "gujarati",
+      "he": "hebrew",
+      "hi": "hindi",
+      "hr": "croatian",
+      "hu": "hungarian",
+      "ia": "interlingua",
+      "id": "indonesian",
+      "is": "icelandic",
+      "it": "italiano",
+      "ja": "nihongo",
+      "jv": "javanese",
+      "ka": "georgian",
+      "kn": "kannada",
+      "ko": "hangul",
+      "la": "latin",
+      "lt": "lithuanian",
+      "lv": "latvian",
+      "mai": "bihari",
+      "mk": "macedonian",
+      "ml": "malayalam",
+      "mr": "marathi",
+      "ms": "malay",
+      "mt": "maltese",
+      "nb": "norsk",
+      "ne": "nepali",
+      "nl": "nederlands",
+      "oc": "occitan",
+      "pa": "punjabi",
+      "pl": "polski",
+      "pt": "portugues",
+      "ro": "romanian",
+      "ru": "russian",
+      "si": "sinhalese",
+      "sk": "slovak",
+      "sl": "slovenian",
+      "sq": "albanian",
+      "sr": "serbian",
+      "su": "sudanese",
+      "sv": "svenska",
+      "sw": "swahili",
+      "ta": "tamil",
+      "te": "telugu",
+      "th": "thai",
+      "ti": "tigrinya",
+      "tl": "tagalog",
+      "tr": "turkce",
+      "uk": "ukrainian",
+      "ur": "urdu",
+      "uz": "uzbek",
+      "vi": "vietnamese",
+      "xh": "xhosa",
+      "zh": "jiantizhongwen",
+      "zh_Hant": "fantizhengwen",
+      "zu": "zulu"
+    },
+    "regions": {
+      "ar-EG": "ar_EG",
+      "bg-BG": "bg_BG",
+      "ca-ES": "ca_ES",
+      "cs-CZ": "cs_CZ",
+      "da-DK": "da_DK",
+      "de-AT": "de_AT",
+      "de-CH": "de_CH",
+      "de-DE": "de_DE",
+      "el-GR": "el_GR",
+      "en-AU": "en_AU",
+      "en-CA": "en_CA",
+      "en-GB": "en-GB_GB",
+      "en-IE": "en_IE",
+      "en-IN": "en_IN",
+      "en-MY": "en_MY",
+      "en-NZ": "en_NZ",
+      "en-PH": "en_PH",
+      "en-US": "en_US",
+      "en-ZA": "en_ZA",
+      "es-AR": "es_AR",
+      "es-CL": "es_CL",
+      "es-CO": "es_CO",
+      "es-ES": "es_ES",
+      "es-MX": "es_MX",
+      "es-PE": "es_PE",
+      "es-US": "es_US",
+      "es-UY": "es_UY",
+      "es-VE": "es_VE",
+      "fi-FI": "fi_FI",
+      "fil-PH": "fil_PH",
+      "fr-BE": "fr_BE",
+      "fr-CA": "fr_CA",
+      "fr-CH": "fr_CH",
+      "fr-FR": "fr_FR",
+      "hi-IN": "hi_IN",
+      "id-ID": "id_ID",
+      "it-CH": "it_CH",
+      "it-IT": "it_IT",
+      "ja-JP": "ja_JP",
+      "ko-KR": "ko_KR",
+      "ms-MY": "ms_MY",
+      "nb-NO": "no_NO",
+      "nl-BE": "nl_BE",
+      "nl-NL": "nl_NL",
+      "pl-PL": "pl_PL",
+      "pt-BR": "pt-BR_BR",
+      "pt-PT": "pt_PT",
+      "ro-RO": "ro_RO",
+      "ru-BY": "ru_BY",
+      "ru-RU": "ru_RU",
+      "sv-SE": "sv_SE",
+      "tr-TR": "tr_TR",
+      "uk-UA": "uk_UA",
+      "vi-VN": "vi_VN",
+      "zh-CN": "zh-CN_CN",
+      "zh-HK": "zh-TW_HK",
+      "zh-TW": "zh-TW_TW"
+    }
+  },
+  "wikidata": {
+    "all_locale": null,
+    "custom": {
+      "wiki_netloc": {}
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af",
+      "am": "am",
+      "ar": "ar",
+      "as": "as",
+      "az": "az",
+      "be": "be",
+      "bg": "bg",
+      "bn": "bn",
+      "bs": "bs",
+      "ca": "ca",
+      "ckb": "ckb",
+      "cs": "cs",
+      "da": "da",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "es": "es",
+      "et": "et",
+      "fa": "fa",
+      "fi": "fi",
+      "fil": "tl",
+      "fo": "fo",
+      "fr": "fr",
+      "fy": "fy",
+      "gl": "gl",
+      "gsw": "als",
+      "gu": "gu",
+      "he": "he",
+      "hi": "hi",
+      "hsb": "hsb",
+      "hu": "hu",
+      "hy": "hy",
+      "id": "id",
+      "is": "is",
+      "it": "it",
+      "ja": "ja",
+      "jv": "jv",
+      "ka": "ka",
+      "kn": "kn",
+      "ko": "ko",
+      "lb": "lb",
+      "lt": "lt",
+      "lv": "lv",
+      "mai": "mai",
+      "mk": "mk",
+      "ml": "ml",
+      "mn": "mn",
+      "mr": "mr",
+      "ne": "ne",
+      "no": "no",
+      "or": "or",
+      "os": "os",
+      "pa": "pa",
+      "pl": "pl",
+      "ps": "ps",
+      "pt": "pt",
+      "qu": "qu",
+      "ro": "ro",
+      "ru": "ru",
+      "sa": "sa",
+      "sah": "sah",
+      "sd": "sd",
+      "si": "si",
+      "sk": "sk",
+      "sl": "sl",
+      "sq": "sq",
+      "sr": "sr",
+      "ta": "ta",
+      "te": "te",
+      "th": "th",
+      "tr": "tr",
+      "uk": "uk",
+      "ur": "ur",
+      "uz": "uz",
+      "vi": "vi",
+      "yi": "yi",
+      "zh": "zh",
+      "zh_Hant": "zh-classical"
+    },
+    "regions": {}
+  },
+  "wikipedia": {
+    "all_locale": null,
+    "custom": {
+      "wiki_netloc": {
+        "af": "af.wikipedia.org",
+        "als": "als.wikipedia.org",
+        "am": "am.wikipedia.org",
+        "ar": "ar.wikipedia.org",
+        "as": "as.wikipedia.org",
+        "az": "az.wikipedia.org",
+        "be": "be.wikipedia.org",
+        "bg": "bg.wikipedia.org",
+        "bn": "bn.wikipedia.org",
+        "bs": "bs.wikipedia.org",
+        "ca": "ca.wikipedia.org",
+        "ckb": "ckb.wikipedia.org",
+        "cs": "cs.wikipedia.org",
+        "da": "da.wikipedia.org",
+        "de": "de.wikipedia.org",
+        "el": "el.wikipedia.org",
+        "en": "en.wikipedia.org",
+        "es": "es.wikipedia.org",
+        "et": "et.wikipedia.org",
+        "fa": "fa.wikipedia.org",
+        "fi": "fi.wikipedia.org",
+        "fo": "fo.wikipedia.org",
+        "fr": "fr.wikipedia.org",
+        "fy": "fy.wikipedia.org",
+        "gl": "gl.wikipedia.org",
+        "gu": "gu.wikipedia.org",
+        "he": "he.wikipedia.org",
+        "hi": "hi.wikipedia.org",
+        "hsb": "hsb.wikipedia.org",
+        "hu": "hu.wikipedia.org",
+        "hy": "hy.wikipedia.org",
+        "id": "id.wikipedia.org",
+        "is": "is.wikipedia.org",
+        "it": "it.wikipedia.org",
+        "ja": "ja.wikipedia.org",
+        "jv": "jv.wikipedia.org",
+        "ka": "ka.wikipedia.org",
+        "kn": "kn.wikipedia.org",
+        "ko": "ko.wikipedia.org",
+        "lb": "lb.wikipedia.org",
+        "lt": "lt.wikipedia.org",
+        "lv": "lv.wikipedia.org",
+        "mai": "mai.wikipedia.org",
+        "mk": "mk.wikipedia.org",
+        "ml": "ml.wikipedia.org",
+        "mn": "mn.wikipedia.org",
+        "mr": "mr.wikipedia.org",
+        "ne": "ne.wikipedia.org",
+        "no": "no.wikipedia.org",
+        "or": "or.wikipedia.org",
+        "os": "os.wikipedia.org",
+        "pa": "pa.wikipedia.org",
+        "pl": "pl.wikipedia.org",
+        "ps": "ps.wikipedia.org",
+        "pt": "pt.wikipedia.org",
+        "qu": "qu.wikipedia.org",
+        "ro": "ro.wikipedia.org",
+        "ru": "ru.wikipedia.org",
+        "sa": "sa.wikipedia.org",
+        "sah": "sah.wikipedia.org",
+        "sd": "sd.wikipedia.org",
+        "si": "si.wikipedia.org",
+        "sk": "sk.wikipedia.org",
+        "sl": "sl.wikipedia.org",
+        "sq": "sq.wikipedia.org",
+        "sr": "sr.wikipedia.org",
+        "ta": "ta.wikipedia.org",
+        "te": "te.wikipedia.org",
+        "th": "th.wikipedia.org",
+        "tl": "tl.wikipedia.org",
+        "tr": "tr.wikipedia.org",
+        "uk": "uk.wikipedia.org",
+        "ur": "ur.wikipedia.org",
+        "uz": "uz.wikipedia.org",
+        "vi": "vi.wikipedia.org",
+        "yi": "yi.wikipedia.org",
+        "zh": "zh.wikipedia.org",
+        "zh-classical": "zh-classical.wikipedia.org"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af",
+      "am": "am",
+      "ar": "ar",
+      "as": "as",
+      "az": "az",
+      "be": "be",
+      "bg": "bg",
+      "bn": "bn",
+      "bs": "bs",
+      "ca": "ca",
+      "ckb": "ckb",
+      "cs": "cs",
+      "da": "da",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "es": "es",
+      "et": "et",
+      "fa": "fa",
+      "fi": "fi",
+      "fil": "tl",
+      "fo": "fo",
+      "fr": "fr",
+      "fy": "fy",
+      "gl": "gl",
+      "gsw": "als",
+      "gu": "gu",
+      "he": "he",
+      "hi": "hi",
+      "hsb": "hsb",
+      "hu": "hu",
+      "hy": "hy",
+      "id": "id",
+      "is": "is",
+      "it": "it",
+      "ja": "ja",
+      "jv": "jv",
+      "ka": "ka",
+      "kn": "kn",
+      "ko": "ko",
+      "lb": "lb",
+      "lt": "lt",
+      "lv": "lv",
+      "mai": "mai",
+      "mk": "mk",
+      "ml": "ml",
+      "mn": "mn",
+      "mr": "mr",
+      "ne": "ne",
+      "no": "no",
+      "or": "or",
+      "os": "os",
+      "pa": "pa",
+      "pl": "pl",
+      "ps": "ps",
+      "pt": "pt",
+      "qu": "qu",
+      "ro": "ro",
+      "ru": "ru",
+      "sa": "sa",
+      "sah": "sah",
+      "sd": "sd",
+      "si": "si",
+      "sk": "sk",
+      "sl": "sl",
+      "sq": "sq",
+      "sr": "sr",
+      "ta": "ta",
+      "te": "te",
+      "th": "th",
+      "tr": "tr",
+      "uk": "uk",
+      "ur": "ur",
+      "uz": "uz",
+      "vi": "vi",
+      "yi": "yi",
+      "zh": "zh",
+      "zh_Hans": "zh",
+      "zh_Hant": "zh-classical"
+    },
+    "regions": {}
+  },
+  "yahoo": {
+    "all_locale": "any",
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ar": "ar",
+      "bg": "bg",
+      "cs": "cs",
+      "da": "da",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "es": "es",
+      "et": "et",
+      "fi": "fi",
+      "fr": "fr",
+      "he": "he",
+      "hr": "hr",
+      "hu": "hu",
+      "it": "it",
+      "ja": "ja",
+      "ko": "ko",
+      "lt": "lt",
+      "lv": "lv",
+      "nl": "nl",
+      "no": "no",
+      "pl": "pl",
+      "pt": "pt",
+      "ro": "ro",
+      "ru": "ru",
+      "sk": "sk",
+      "sl": "sl",
+      "sv": "sv",
+      "th": "th",
+      "tr": "tr",
+      "zh_Hans": "zh_chs",
+      "zh_Hant": "zh_cht"
+    },
+    "regions": {}
+  }
+}

+ 0 - 4381
searx/data/engines_languages.json

@@ -1,4381 +0,0 @@
-{
-  "bing": [
-    "af",
-    "am",
-    "ar",
-    "as",
-    "az-latn",
-    "be",
-    "bg",
-    "bn",
-    "bs-latn",
-    "ca",
-    "ca-es-valencia",
-    "chr-cher",
-    "cs",
-    "cy",
-    "da",
-    "de",
-    "el",
-    "en",
-    "es",
-    "et",
-    "eu",
-    "fa",
-    "fi",
-    "fil",
-    "fr",
-    "ga",
-    "gd",
-    "gl",
-    "gu",
-    "ha-latn",
-    "he",
-    "hi",
-    "hr",
-    "hu",
-    "hy",
-    "id",
-    "ig",
-    "is",
-    "it",
-    "ja",
-    "ka",
-    "kk",
-    "km",
-    "kn",
-    "ko",
-    "kok",
-    "ku-arab",
-    "ky",
-    "lb",
-    "lo",
-    "lt",
-    "lv",
-    "mi",
-    "mk",
-    "ml",
-    "mn-Cyrl-MN",
-    "mr",
-    "ms",
-    "mt",
-    "nb",
-    "ne",
-    "nl",
-    "nn",
-    "nso",
-    "or",
-    "pa-arab",
-    "pa-guru",
-    "pl",
-    "prs",
-    "pt-BR",
-    "pt-PT",
-    "quc",
-    "quz",
-    "ro",
-    "ru",
-    "rw",
-    "sd-arab",
-    "si",
-    "sk",
-    "sl",
-    "sq",
-    "sr-cyrl",
-    "sr-latn",
-    "sv",
-    "sw",
-    "ta",
-    "te",
-    "tg-cyrl",
-    "th",
-    "ti",
-    "tk",
-    "tn",
-    "tr",
-    "tt",
-    "ug",
-    "uk",
-    "ur",
-    "uz-latn",
-    "vi",
-    "wo",
-    "xh",
-    "yo",
-    "zh-Hans",
-    "zh-Hant",
-    "zu"
-  ],
-  "bing images": [
-    "af",
-    "am",
-    "ar",
-    "as",
-    "az-latn",
-    "be",
-    "bg",
-    "bn",
-    "bs-latn",
-    "ca",
-    "ca-es-valencia",
-    "chr-cher",
-    "cs",
-    "cy",
-    "da",
-    "de",
-    "el",
-    "en",
-    "es",
-    "et",
-    "eu",
-    "fa",
-    "fi",
-    "fil",
-    "fr",
-    "ga",
-    "gd",
-    "gl",
-    "gu",
-    "ha-latn",
-    "he",
-    "hi",
-    "hr",
-    "hu",
-    "hy",
-    "id",
-    "ig",
-    "is",
-    "it",
-    "ja",
-    "ka",
-    "kk",
-    "km",
-    "kn",
-    "ko",
-    "kok",
-    "ku-arab",
-    "ky",
-    "lb",
-    "lo",
-    "lt",
-    "lv",
-    "mi",
-    "mk",
-    "ml",
-    "mn-Cyrl-MN",
-    "mr",
-    "ms",
-    "mt",
-    "nb",
-    "ne",
-    "nl",
-    "nn",
-    "nso",
-    "or",
-    "pa-arab",
-    "pa-guru",
-    "pl",
-    "prs",
-    "pt-BR",
-    "pt-PT",
-    "quc",
-    "quz",
-    "ro",
-    "ru",
-    "rw",
-    "sd-arab",
-    "si",
-    "sk",
-    "sl",
-    "sq",
-    "sr-cyrl",
-    "sr-latn",
-    "sv",
-    "sw",
-    "ta",
-    "te",
-    "tg-cyrl",
-    "th",
-    "ti",
-    "tk",
-    "tn",
-    "tr",
-    "tt",
-    "ug",
-    "uk",
-    "ur",
-    "uz-latn",
-    "vi",
-    "wo",
-    "xh",
-    "yo",
-    "zh-Hans",
-    "zh-Hant",
-    "zu"
-  ],
-  "bing news": [
-    "af",
-    "am",
-    "ar",
-    "as",
-    "az-latn",
-    "be",
-    "bg",
-    "bn",
-    "bs-latn",
-    "ca",
-    "ca-es-valencia",
-    "chr-cher",
-    "cs",
-    "cy",
-    "da",
-    "de",
-    "el",
-    "en",
-    "es",
-    "et",
-    "eu",
-    "fa",
-    "fi",
-    "fil",
-    "fr",
-    "ga",
-    "gd",
-    "gl",
-    "gu",
-    "ha-latn",
-    "he",
-    "hi",
-    "hr",
-    "hu",
-    "hy",
-    "id",
-    "ig",
-    "is",
-    "it",
-    "ja",
-    "ka",
-    "kk",
-    "km",
-    "kn",
-    "ko",
-    "kok",
-    "ku-arab",
-    "ky",
-    "lb",
-    "lo",
-    "lt",
-    "lv",
-    "mi",
-    "mk",
-    "ml",
-    "mn-Cyrl-MN",
-    "mr",
-    "ms",
-    "mt",
-    "nb",
-    "ne",
-    "nl",
-    "nn",
-    "nso",
-    "or",
-    "pa-arab",
-    "pa-guru",
-    "pl",
-    "prs",
-    "pt-BR",
-    "pt-PT",
-    "quc",
-    "quz",
-    "ro",
-    "ru",
-    "rw",
-    "sd-arab",
-    "si",
-    "sk",
-    "sl",
-    "sq",
-    "sr-cyrl",
-    "sr-latn",
-    "sv",
-    "sw",
-    "ta",
-    "te",
-    "tg-cyrl",
-    "th",
-    "ti",
-    "tk",
-    "tn",
-    "tr",
-    "tt",
-    "ug",
-    "uk",
-    "ur",
-    "uz-latn",
-    "vi",
-    "wo",
-    "xh",
-    "yo",
-    "zh-Hans",
-    "zh-Hant",
-    "zu"
-  ],
-  "bing videos": [
-    "af",
-    "am",
-    "ar",
-    "as",
-    "az-latn",
-    "be",
-    "bg",
-    "bn",
-    "bs-latn",
-    "ca",
-    "ca-es-valencia",
-    "chr-cher",
-    "cs",
-    "cy",
-    "da",
-    "de",
-    "el",
-    "en",
-    "es",
-    "et",
-    "eu",
-    "fa",
-    "fi",
-    "fil",
-    "fr",
-    "ga",
-    "gd",
-    "gl",
-    "gu",
-    "ha-latn",
-    "he",
-    "hi",
-    "hr",
-    "hu",
-    "hy",
-    "id",
-    "ig",
-    "is",
-    "it",
-    "ja",
-    "ka",
-    "kk",
-    "km",
-    "kn",
-    "ko",
-    "kok",
-    "ku-arab",
-    "ky",
-    "lb",
-    "lo",
-    "lt",
-    "lv",
-    "mi",
-    "mk",
-    "ml",
-    "mn-Cyrl-MN",
-    "mr",
-    "ms",
-    "mt",
-    "nb",
-    "ne",
-    "nl",
-    "nn",
-    "nso",
-    "or",
-    "pa-arab",
-    "pa-guru",
-    "pl",
-    "prs",
-    "pt-BR",
-    "pt-PT",
-    "quc",
-    "quz",
-    "ro",
-    "ru",
-    "rw",
-    "sd-arab",
-    "si",
-    "sk",
-    "sl",
-    "sq",
-    "sr-cyrl",
-    "sr-latn",
-    "sv",
-    "sw",
-    "ta",
-    "te",
-    "tg-cyrl",
-    "th",
-    "ti",
-    "tk",
-    "tn",
-    "tr",
-    "tt",
-    "ug",
-    "uk",
-    "ur",
-    "uz-latn",
-    "vi",
-    "wo",
-    "xh",
-    "yo",
-    "zh-Hans",
-    "zh-Hant",
-    "zu"
-  ],
-  "dailymotion": [
-    "ar_AA",
-    "ar_AE",
-    "ar_EG",
-    "ar_SA",
-    "de_AT",
-    "de_CH",
-    "de_DE",
-    "el_GR",
-    "en_AU",
-    "en_CA",
-    "en_EN",
-    "en_GB",
-    "en_HK",
-    "en_IE",
-    "en_IN",
-    "en_NG",
-    "en_PH",
-    "en_PK",
-    "en_SG",
-    "en_US",
-    "en_ZA",
-    "es_AR",
-    "es_ES",
-    "es_MX",
-    "fr_BE",
-    "fr_CA",
-    "fr_CH",
-    "fr_CI",
-    "fr_FR",
-    "fr_MA",
-    "fr_SN",
-    "fr_TN",
-    "id_ID",
-    "it_CH",
-    "it_IT",
-    "ja_JP",
-    "ko_KR",
-    "ms_MY",
-    "nl_BE",
-    "nl_NL",
-    "pl_PL",
-    "pt_BR",
-    "pt_PT",
-    "ro_RO",
-    "ru_RU",
-    "th_TH",
-    "tr_TR",
-    "vi_VN",
-    "zh_CN",
-    "zh_TW"
-  ],
-  "ddg definitions": [
-    "ar-XA",
-    "bg-BG",
-    "ca-CT",
-    "ca-ES",
-    "cs-CZ",
-    "da-DK",
-    "de-AT",
-    "de-CH",
-    "de-DE",
-    "el-GR",
-    "en-AU",
-    "en-CA",
-    "en-ID",
-    "en-IE",
-    "en-IL",
-    "en-IN",
-    "en-MY",
-    "en-NZ",
-    "en-PH",
-    "en-PK",
-    "en-SG",
-    "en-TH",
-    "en-UK",
-    "en-US",
-    "en-VN",
-    "en-ZA",
-    "es-AR",
-    "es-CL",
-    "es-CO",
-    "es-ES",
-    "es-MX",
-    "es-PE",
-    "es-US",
-    "et-EE",
-    "fi-FI",
-    "fr-BE",
-    "fr-CA",
-    "fr-CH",
-    "fr-FR",
-    "hr-HR",
-    "hu-HU",
-    "it-IT",
-    "jp-JP",
-    "kr-KR",
-    "lt-LT",
-    "lv-LV",
-    "nl-BE",
-    "nl-NL",
-    "no-NO",
-    "pl-PL",
-    "pt-BR",
-    "pt-PT",
-    "ro-RO",
-    "ru-RU",
-    "sk-SK",
-    "sl-SL",
-    "sv-SE",
-    "tr-TR",
-    "tzh-HK",
-    "tzh-TW",
-    "uk-UA",
-    "wt-WT",
-    "zh-CN"
-  ],
-  "duckduckgo": [
-    "ar-XA",
-    "bg-BG",
-    "ca-CT",
-    "ca-ES",
-    "cs-CZ",
-    "da-DK",
-    "de-AT",
-    "de-CH",
-    "de-DE",
-    "el-GR",
-    "en-AU",
-    "en-CA",
-    "en-ID",
-    "en-IE",
-    "en-IL",
-    "en-IN",
-    "en-MY",
-    "en-NZ",
-    "en-PH",
-    "en-PK",
-    "en-SG",
-    "en-TH",
-    "en-UK",
-    "en-US",
-    "en-VN",
-    "en-ZA",
-    "es-AR",
-    "es-CL",
-    "es-CO",
-    "es-ES",
-    "es-MX",
-    "es-PE",
-    "es-US",
-    "et-EE",
-    "fi-FI",
-    "fr-BE",
-    "fr-CA",
-    "fr-CH",
-    "fr-FR",
-    "hr-HR",
-    "hu-HU",
-    "it-IT",
-    "jp-JP",
-    "kr-KR",
-    "lt-LT",
-    "lv-LV",
-    "nl-BE",
-    "nl-NL",
-    "no-NO",
-    "pl-PL",
-    "pt-BR",
-    "pt-PT",
-    "ro-RO",
-    "ru-RU",
-    "sk-SK",
-    "sl-SL",
-    "sv-SE",
-    "tr-TR",
-    "tzh-HK",
-    "tzh-TW",
-    "uk-UA",
-    "wt-WT",
-    "zh-CN"
-  ],
-  "duckduckgo images": [
-    "ar-XA",
-    "bg-BG",
-    "ca-CT",
-    "ca-ES",
-    "cs-CZ",
-    "da-DK",
-    "de-AT",
-    "de-CH",
-    "de-DE",
-    "el-GR",
-    "en-AU",
-    "en-CA",
-    "en-ID",
-    "en-IE",
-    "en-IL",
-    "en-IN",
-    "en-MY",
-    "en-NZ",
-    "en-PH",
-    "en-PK",
-    "en-SG",
-    "en-TH",
-    "en-UK",
-    "en-US",
-    "en-VN",
-    "en-ZA",
-    "es-AR",
-    "es-CL",
-    "es-CO",
-    "es-ES",
-    "es-MX",
-    "es-PE",
-    "es-US",
-    "et-EE",
-    "fi-FI",
-    "fr-BE",
-    "fr-CA",
-    "fr-CH",
-    "fr-FR",
-    "hr-HR",
-    "hu-HU",
-    "it-IT",
-    "jp-JP",
-    "kr-KR",
-    "lt-LT",
-    "lv-LV",
-    "nl-BE",
-    "nl-NL",
-    "no-NO",
-    "pl-PL",
-    "pt-BR",
-    "pt-PT",
-    "ro-RO",
-    "ru-RU",
-    "sk-SK",
-    "sl-SL",
-    "sv-SE",
-    "tr-TR",
-    "tzh-HK",
-    "tzh-TW",
-    "uk-UA",
-    "wt-WT",
-    "zh-CN"
-  ],
-  "google": {
-    "af": {
-      "name": "Afrikaans"
-    },
-    "ar": {
-      "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
-    },
-    "be": {
-      "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
-    },
-    "bg": {
-      "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
-    },
-    "ca": {
-      "name": "catal\u00e0"
-    },
-    "cs": {
-      "name": "\u010de\u0161tina"
-    },
-    "da": {
-      "name": "dansk"
-    },
-    "de": {
-      "name": "Deutsch"
-    },
-    "el": {
-      "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
-    },
-    "en": {
-      "name": "English"
-    },
-    "eo": {
-      "name": "esperanto"
-    },
-    "es": {
-      "name": "espa\u00f1ol"
-    },
-    "et": {
-      "name": "eesti"
-    },
-    "fa": {
-      "name": "\u0641\u0627\u0631\u0633\u06cc"
-    },
-    "fi": {
-      "name": "suomi"
-    },
-    "fr": {
-      "name": "fran\u00e7ais"
-    },
-    "hi": {
-      "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
-    },
-    "hr": {
-      "name": "hrvatski"
-    },
-    "hu": {
-      "name": "magyar"
-    },
-    "hy": {
-      "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
-    },
-    "id": {
-      "name": "Indonesia"
-    },
-    "is": {
-      "name": "\u00edslenska"
-    },
-    "it": {
-      "name": "italiano"
-    },
-    "iw": {
-      "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
-    },
-    "ja": {
-      "name": "\u65e5\u672c\u8a9e"
-    },
-    "ko": {
-      "name": "\ud55c\uad6d\uc5b4"
-    },
-    "lt": {
-      "name": "lietuvi\u0173"
-    },
-    "lv": {
-      "name": "latvie\u0161u"
-    },
-    "nl": {
-      "name": "Nederlands"
-    },
-    "no": {
-      "name": "norsk"
-    },
-    "pl": {
-      "name": "polski"
-    },
-    "pt": {
-      "name": "portugu\u00eas"
-    },
-    "ro": {
-      "name": "rom\u00e2n\u0103"
-    },
-    "ru": {
-      "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
-    },
-    "sk": {
-      "name": "sloven\u010dina"
-    },
-    "sl": {
-      "name": "sloven\u0161\u010dina"
-    },
-    "sr": {
-      "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
-    },
-    "sv": {
-      "name": "svenska"
-    },
-    "sw": {
-      "name": "Kiswahili"
-    },
-    "th": {
-      "name": "\u0e44\u0e17\u0e22"
-    },
-    "tl": {
-      "name": "Filipino"
-    },
-    "tr": {
-      "name": "T\u00fcrk\u00e7e"
-    },
-    "uk": {
-      "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
-    },
-    "vi": {
-      "name": "Ti\u1ebfng Vi\u1ec7t"
-    },
-    "zh-CN": {
-      "name": "\u4e2d\u6587 (\u7b80\u4f53)"
-    },
-    "zh-TW": {
-      "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
-    }
-  },
-  "google images": {
-    "af": {
-      "name": "Afrikaans"
-    },
-    "ar": {
-      "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
-    },
-    "be": {
-      "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
-    },
-    "bg": {
-      "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
-    },
-    "ca": {
-      "name": "catal\u00e0"
-    },
-    "cs": {
-      "name": "\u010de\u0161tina"
-    },
-    "da": {
-      "name": "dansk"
-    },
-    "de": {
-      "name": "Deutsch"
-    },
-    "el": {
-      "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
-    },
-    "en": {
-      "name": "English"
-    },
-    "eo": {
-      "name": "esperanto"
-    },
-    "es": {
-      "name": "espa\u00f1ol"
-    },
-    "et": {
-      "name": "eesti"
-    },
-    "fa": {
-      "name": "\u0641\u0627\u0631\u0633\u06cc"
-    },
-    "fi": {
-      "name": "suomi"
-    },
-    "fr": {
-      "name": "fran\u00e7ais"
-    },
-    "hi": {
-      "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
-    },
-    "hr": {
-      "name": "hrvatski"
-    },
-    "hu": {
-      "name": "magyar"
-    },
-    "hy": {
-      "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
-    },
-    "id": {
-      "name": "Indonesia"
-    },
-    "is": {
-      "name": "\u00edslenska"
-    },
-    "it": {
-      "name": "italiano"
-    },
-    "iw": {
-      "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
-    },
-    "ja": {
-      "name": "\u65e5\u672c\u8a9e"
-    },
-    "ko": {
-      "name": "\ud55c\uad6d\uc5b4"
-    },
-    "lt": {
-      "name": "lietuvi\u0173"
-    },
-    "lv": {
-      "name": "latvie\u0161u"
-    },
-    "nl": {
-      "name": "Nederlands"
-    },
-    "no": {
-      "name": "norsk"
-    },
-    "pl": {
-      "name": "polski"
-    },
-    "pt": {
-      "name": "portugu\u00eas"
-    },
-    "ro": {
-      "name": "rom\u00e2n\u0103"
-    },
-    "ru": {
-      "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
-    },
-    "sk": {
-      "name": "sloven\u010dina"
-    },
-    "sl": {
-      "name": "sloven\u0161\u010dina"
-    },
-    "sr": {
-      "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
-    },
-    "sv": {
-      "name": "svenska"
-    },
-    "sw": {
-      "name": "Kiswahili"
-    },
-    "th": {
-      "name": "\u0e44\u0e17\u0e22"
-    },
-    "tl": {
-      "name": "Filipino"
-    },
-    "tr": {
-      "name": "T\u00fcrk\u00e7e"
-    },
-    "uk": {
-      "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
-    },
-    "vi": {
-      "name": "Ti\u1ebfng Vi\u1ec7t"
-    },
-    "zh-CN": {
-      "name": "\u4e2d\u6587 (\u7b80\u4f53)"
-    },
-    "zh-TW": {
-      "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
-    }
-  },
-  "google news": {
-    "af": {
-      "name": "Afrikaans"
-    },
-    "ar": {
-      "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
-    },
-    "be": {
-      "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
-    },
-    "bg": {
-      "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
-    },
-    "ca": {
-      "name": "catal\u00e0"
-    },
-    "cs": {
-      "name": "\u010de\u0161tina"
-    },
-    "da": {
-      "name": "dansk"
-    },
-    "de": {
-      "name": "Deutsch"
-    },
-    "el": {
-      "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
-    },
-    "en": {
-      "name": "English"
-    },
-    "eo": {
-      "name": "esperanto"
-    },
-    "es": {
-      "name": "espa\u00f1ol"
-    },
-    "et": {
-      "name": "eesti"
-    },
-    "fa": {
-      "name": "\u0641\u0627\u0631\u0633\u06cc"
-    },
-    "fi": {
-      "name": "suomi"
-    },
-    "fr": {
-      "name": "fran\u00e7ais"
-    },
-    "hi": {
-      "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
-    },
-    "hr": {
-      "name": "hrvatski"
-    },
-    "hu": {
-      "name": "magyar"
-    },
-    "hy": {
-      "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
-    },
-    "id": {
-      "name": "Indonesia"
-    },
-    "is": {
-      "name": "\u00edslenska"
-    },
-    "it": {
-      "name": "italiano"
-    },
-    "iw": {
-      "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
-    },
-    "ja": {
-      "name": "\u65e5\u672c\u8a9e"
-    },
-    "ko": {
-      "name": "\ud55c\uad6d\uc5b4"
-    },
-    "lt": {
-      "name": "lietuvi\u0173"
-    },
-    "lv": {
-      "name": "latvie\u0161u"
-    },
-    "nl": {
-      "name": "Nederlands"
-    },
-    "no": {
-      "name": "norsk"
-    },
-    "pl": {
-      "name": "polski"
-    },
-    "pt": {
-      "name": "portugu\u00eas"
-    },
-    "ro": {
-      "name": "rom\u00e2n\u0103"
-    },
-    "ru": {
-      "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
-    },
-    "sk": {
-      "name": "sloven\u010dina"
-    },
-    "sl": {
-      "name": "sloven\u0161\u010dina"
-    },
-    "sr": {
-      "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
-    },
-    "sv": {
-      "name": "svenska"
-    },
-    "sw": {
-      "name": "Kiswahili"
-    },
-    "th": {
-      "name": "\u0e44\u0e17\u0e22"
-    },
-    "tl": {
-      "name": "Filipino"
-    },
-    "tr": {
-      "name": "T\u00fcrk\u00e7e"
-    },
-    "uk": {
-      "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
-    },
-    "vi": {
-      "name": "Ti\u1ebfng Vi\u1ec7t"
-    },
-    "zh-CN": {
-      "name": "\u4e2d\u6587 (\u7b80\u4f53)"
-    },
-    "zh-TW": {
-      "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
-    }
-  },
-  "google scholar": {
-    "af": {
-      "name": "Afrikaans"
-    },
-    "ar": {
-      "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
-    },
-    "be": {
-      "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
-    },
-    "bg": {
-      "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
-    },
-    "ca": {
-      "name": "catal\u00e0"
-    },
-    "cs": {
-      "name": "\u010de\u0161tina"
-    },
-    "da": {
-      "name": "dansk"
-    },
-    "de": {
-      "name": "Deutsch"
-    },
-    "el": {
-      "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
-    },
-    "en": {
-      "name": "English"
-    },
-    "eo": {
-      "name": "esperanto"
-    },
-    "es": {
-      "name": "espa\u00f1ol"
-    },
-    "et": {
-      "name": "eesti"
-    },
-    "fa": {
-      "name": "\u0641\u0627\u0631\u0633\u06cc"
-    },
-    "fi": {
-      "name": "suomi"
-    },
-    "fr": {
-      "name": "fran\u00e7ais"
-    },
-    "hi": {
-      "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
-    },
-    "hr": {
-      "name": "hrvatski"
-    },
-    "hu": {
-      "name": "magyar"
-    },
-    "hy": {
-      "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
-    },
-    "id": {
-      "name": "Indonesia"
-    },
-    "is": {
-      "name": "\u00edslenska"
-    },
-    "it": {
-      "name": "italiano"
-    },
-    "iw": {
-      "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
-    },
-    "ja": {
-      "name": "\u65e5\u672c\u8a9e"
-    },
-    "ko": {
-      "name": "\ud55c\uad6d\uc5b4"
-    },
-    "lt": {
-      "name": "lietuvi\u0173"
-    },
-    "lv": {
-      "name": "latvie\u0161u"
-    },
-    "nl": {
-      "name": "Nederlands"
-    },
-    "no": {
-      "name": "norsk"
-    },
-    "pl": {
-      "name": "polski"
-    },
-    "pt": {
-      "name": "portugu\u00eas"
-    },
-    "ro": {
-      "name": "rom\u00e2n\u0103"
-    },
-    "ru": {
-      "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
-    },
-    "sk": {
-      "name": "sloven\u010dina"
-    },
-    "sl": {
-      "name": "sloven\u0161\u010dina"
-    },
-    "sr": {
-      "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
-    },
-    "sv": {
-      "name": "svenska"
-    },
-    "sw": {
-      "name": "Kiswahili"
-    },
-    "th": {
-      "name": "\u0e44\u0e17\u0e22"
-    },
-    "tl": {
-      "name": "Filipino"
-    },
-    "tr": {
-      "name": "T\u00fcrk\u00e7e"
-    },
-    "uk": {
-      "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
-    },
-    "vi": {
-      "name": "Ti\u1ebfng Vi\u1ec7t"
-    },
-    "zh-CN": {
-      "name": "\u4e2d\u6587 (\u7b80\u4f53)"
-    },
-    "zh-TW": {
-      "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
-    }
-  },
-  "google videos": {
-    "af": {
-      "name": "Afrikaans"
-    },
-    "ar": {
-      "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
-    },
-    "be": {
-      "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
-    },
-    "bg": {
-      "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
-    },
-    "ca": {
-      "name": "catal\u00e0"
-    },
-    "cs": {
-      "name": "\u010de\u0161tina"
-    },
-    "da": {
-      "name": "dansk"
-    },
-    "de": {
-      "name": "Deutsch"
-    },
-    "el": {
-      "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
-    },
-    "en": {
-      "name": "English"
-    },
-    "eo": {
-      "name": "esperanto"
-    },
-    "es": {
-      "name": "espa\u00f1ol"
-    },
-    "et": {
-      "name": "eesti"
-    },
-    "fa": {
-      "name": "\u0641\u0627\u0631\u0633\u06cc"
-    },
-    "fi": {
-      "name": "suomi"
-    },
-    "fr": {
-      "name": "fran\u00e7ais"
-    },
-    "hi": {
-      "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
-    },
-    "hr": {
-      "name": "hrvatski"
-    },
-    "hu": {
-      "name": "magyar"
-    },
-    "hy": {
-      "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576"
-    },
-    "id": {
-      "name": "Indonesia"
-    },
-    "is": {
-      "name": "\u00edslenska"
-    },
-    "it": {
-      "name": "italiano"
-    },
-    "iw": {
-      "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
-    },
-    "ja": {
-      "name": "\u65e5\u672c\u8a9e"
-    },
-    "ko": {
-      "name": "\ud55c\uad6d\uc5b4"
-    },
-    "lt": {
-      "name": "lietuvi\u0173"
-    },
-    "lv": {
-      "name": "latvie\u0161u"
-    },
-    "nl": {
-      "name": "Nederlands"
-    },
-    "no": {
-      "name": "norsk"
-    },
-    "pl": {
-      "name": "polski"
-    },
-    "pt": {
-      "name": "portugu\u00eas"
-    },
-    "ro": {
-      "name": "rom\u00e2n\u0103"
-    },
-    "ru": {
-      "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439"
-    },
-    "sk": {
-      "name": "sloven\u010dina"
-    },
-    "sl": {
-      "name": "sloven\u0161\u010dina"
-    },
-    "sr": {
-      "name": "\u0441\u0440\u043f\u0441\u043a\u0438"
-    },
-    "sv": {
-      "name": "svenska"
-    },
-    "sw": {
-      "name": "Kiswahili"
-    },
-    "th": {
-      "name": "\u0e44\u0e17\u0e22"
-    },
-    "tl": {
-      "name": "Filipino"
-    },
-    "tr": {
-      "name": "T\u00fcrk\u00e7e"
-    },
-    "uk": {
-      "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
-    },
-    "vi": {
-      "name": "Ti\u1ebfng Vi\u1ec7t"
-    },
-    "zh-CN": {
-      "name": "\u4e2d\u6587 (\u7b80\u4f53)"
-    },
-    "zh-TW": {
-      "name": "\u4e2d\u6587 (\u7e41\u9ad4)"
-    }
-  },
-  "peertube": [
-    "ca",
-    "cs",
-    "de",
-    "el",
-    "en",
-    "eo",
-    "es",
-    "eu",
-    "fi",
-    "fr",
-    "gd",
-    "it",
-    "ja",
-    "nl",
-    "oc",
-    "pl",
-    "pt",
-    "ru",
-    "sv",
-    "zh"
-  ],
-  "qwant": {
-    "bg-BG": "bg_BG",
-    "ca-ES": "ca_ES",
-    "cs-CZ": "cs_CZ",
-    "da-DK": "da_DK",
-    "de-AT": "de_AT",
-    "de-CH": "de_CH",
-    "de-DE": "de_DE",
-    "el-GR": "el_GR",
-    "en-AU": "en_AU",
-    "en-CA": "en_CA",
-    "en-GB": "en_GB",
-    "en-IE": "en_IE",
-    "en-MY": "en_MY",
-    "en-NZ": "en_NZ",
-    "en-US": "en_US",
-    "es-AR": "es_AR",
-    "es-CL": "es_CL",
-    "es-ES": "es_ES",
-    "es-MX": "es_MX",
-    "et-EE": "et_EE",
-    "fi-FI": "fi_FI",
-    "fr-BE": "fr_BE",
-    "fr-CA": "fr_CA",
-    "fr-CH": "fr_CH",
-    "fr-FR": "fr_FR",
-    "hu-HU": "hu_HU",
-    "it-CH": "it_CH",
-    "it-IT": "it_IT",
-    "ko-KR": "ko_KR",
-    "nb-NO": "nb_NO",
-    "nl-BE": "nl_BE",
-    "nl-NL": "nl_NL",
-    "pl-PL": "pl_PL",
-    "pt-PT": "pt_PT",
-    "ro-RO": "ro_RO",
-    "sv-SE": "sv_SE",
-    "th-TH": "th_TH",
-    "zh-CN": "zh_CN",
-    "zh-HK": "zh_HK"
-  },
-  "qwant images": {
-    "bg-BG": "bg_BG",
-    "ca-ES": "ca_ES",
-    "cs-CZ": "cs_CZ",
-    "da-DK": "da_DK",
-    "de-AT": "de_AT",
-    "de-CH": "de_CH",
-    "de-DE": "de_DE",
-    "el-GR": "el_GR",
-    "en-AU": "en_AU",
-    "en-CA": "en_CA",
-    "en-GB": "en_GB",
-    "en-IE": "en_IE",
-    "en-MY": "en_MY",
-    "en-NZ": "en_NZ",
-    "en-US": "en_US",
-    "es-AR": "es_AR",
-    "es-CL": "es_CL",
-    "es-ES": "es_ES",
-    "es-MX": "es_MX",
-    "et-EE": "et_EE",
-    "fi-FI": "fi_FI",
-    "fr-BE": "fr_BE",
-    "fr-CA": "fr_CA",
-    "fr-CH": "fr_CH",
-    "fr-FR": "fr_FR",
-    "hu-HU": "hu_HU",
-    "it-CH": "it_CH",
-    "it-IT": "it_IT",
-    "ko-KR": "ko_KR",
-    "nb-NO": "nb_NO",
-    "nl-BE": "nl_BE",
-    "nl-NL": "nl_NL",
-    "pl-PL": "pl_PL",
-    "pt-PT": "pt_PT",
-    "ro-RO": "ro_RO",
-    "sv-SE": "sv_SE",
-    "th-TH": "th_TH",
-    "zh-CN": "zh_CN",
-    "zh-HK": "zh_HK"
-  },
-  "qwant news": {
-    "ca-ES": "ca_ES",
-    "de-AT": "de_AT",
-    "de-CH": "de_CH",
-    "de-DE": "de_DE",
-    "en-AU": "en_AU",
-    "en-CA": "en_CA",
-    "en-GB": "en_GB",
-    "en-IE": "en_IE",
-    "en-MY": "en_MY",
-    "en-NZ": "en_NZ",
-    "en-US": "en_US",
-    "es-AR": "es_AR",
-    "es-CL": "es_CL",
-    "es-ES": "es_ES",
-    "es-MX": "es_MX",
-    "fr-BE": "fr_BE",
-    "fr-CA": "fr_CA",
-    "fr-CH": "fr_CH",
-    "fr-FR": "fr_FR",
-    "it-CH": "it_CH",
-    "it-IT": "it_IT",
-    "nl-BE": "nl_BE",
-    "nl-NL": "nl_NL",
-    "pt-PT": "pt_PT"
-  },
-  "qwant videos": {
-    "bg-BG": "bg_BG",
-    "ca-ES": "ca_ES",
-    "cs-CZ": "cs_CZ",
-    "da-DK": "da_DK",
-    "de-AT": "de_AT",
-    "de-CH": "de_CH",
-    "de-DE": "de_DE",
-    "el-GR": "el_GR",
-    "en-AU": "en_AU",
-    "en-CA": "en_CA",
-    "en-GB": "en_GB",
-    "en-IE": "en_IE",
-    "en-MY": "en_MY",
-    "en-NZ": "en_NZ",
-    "en-US": "en_US",
-    "es-AR": "es_AR",
-    "es-CL": "es_CL",
-    "es-ES": "es_ES",
-    "es-MX": "es_MX",
-    "et-EE": "et_EE",
-    "fi-FI": "fi_FI",
-    "fr-BE": "fr_BE",
-    "fr-CA": "fr_CA",
-    "fr-CH": "fr_CH",
-    "fr-FR": "fr_FR",
-    "hu-HU": "hu_HU",
-    "it-CH": "it_CH",
-    "it-IT": "it_IT",
-    "ko-KR": "ko_KR",
-    "nb-NO": "nb_NO",
-    "nl-BE": "nl_BE",
-    "nl-NL": "nl_NL",
-    "pl-PL": "pl_PL",
-    "pt-PT": "pt_PT",
-    "ro-RO": "ro_RO",
-    "sv-SE": "sv_SE",
-    "th-TH": "th_TH",
-    "zh-CN": "zh_CN",
-    "zh-HK": "zh_HK"
-  },
-  "startpage": {
-    "af": {
-      "alias": "afrikaans"
-    },
-    "am": {
-      "alias": "amharic"
-    },
-    "ar": {
-      "alias": "arabic"
-    },
-    "az": {
-      "alias": "azerbaijani"
-    },
-    "be": {
-      "alias": "belarusian"
-    },
-    "bg": {
-      "alias": "bulgarian"
-    },
-    "bn": {
-      "alias": "bengali"
-    },
-    "bs": {
-      "alias": "bosnian"
-    },
-    "ca": {
-      "alias": "catalan"
-    },
-    "cs": {
-      "alias": "czech"
-    },
-    "cy": {
-      "alias": "welsh"
-    },
-    "da": {
-      "alias": "dansk"
-    },
-    "de": {
-      "alias": "deutsch"
-    },
-    "el": {
-      "alias": "greek"
-    },
-    "en": {
-      "alias": "english"
-    },
-    "en-GB": {
-      "alias": "english_uk"
-    },
-    "eo": {
-      "alias": "esperanto"
-    },
-    "es": {
-      "alias": "espanol"
-    },
-    "et": {
-      "alias": "estonian"
-    },
-    "eu": {
-      "alias": "basque"
-    },
-    "fa": {
-      "alias": "persian"
-    },
-    "fi": {
-      "alias": "suomi"
-    },
-    "fo": {
-      "alias": "faroese"
-    },
-    "fr": {
-      "alias": "francais"
-    },
-    "fy": {
-      "alias": "frisian"
-    },
-    "ga": {
-      "alias": "irish"
-    },
-    "gd": {
-      "alias": "gaelic"
-    },
-    "gl": {
-      "alias": "galician"
-    },
-    "gu": {
-      "alias": "gujarati"
-    },
-    "he": {
-      "alias": "hebrew"
-    },
-    "hi": {
-      "alias": "hindi"
-    },
-    "hr": {
-      "alias": "croatian"
-    },
-    "hu": {
-      "alias": "hungarian"
-    },
-    "ia": {
-      "alias": "interlingua"
-    },
-    "id": {
-      "alias": "indonesian"
-    },
-    "is": {
-      "alias": "icelandic"
-    },
-    "it": {
-      "alias": "italiano"
-    },
-    "ja": {
-      "alias": "nihongo"
-    },
-    "jv": {
-      "alias": "javanese"
-    },
-    "ka": {
-      "alias": "georgian"
-    },
-    "kn": {
-      "alias": "kannada"
-    },
-    "ko": {
-      "alias": "hangul"
-    },
-    "la": {
-      "alias": "latin"
-    },
-    "lt": {
-      "alias": "lithuanian"
-    },
-    "lv": {
-      "alias": "latvian"
-    },
-    "mai": {
-      "alias": "bihari"
-    },
-    "mk": {
-      "alias": "macedonian"
-    },
-    "ml": {
-      "alias": "malayalam"
-    },
-    "mr": {
-      "alias": "marathi"
-    },
-    "ms": {
-      "alias": "malay"
-    },
-    "mt": {
-      "alias": "maltese"
-    },
-    "ne": {
-      "alias": "nepali"
-    },
-    "nl": {
-      "alias": "nederlands"
-    },
-    "no": {
-      "alias": "norsk"
-    },
-    "oc": {
-      "alias": "occitan"
-    },
-    "pa": {
-      "alias": "punjabi"
-    },
-    "pl": {
-      "alias": "polski"
-    },
-    "pt": {
-      "alias": "portugues"
-    },
-    "ro": {
-      "alias": "romanian"
-    },
-    "ru": {
-      "alias": "russian"
-    },
-    "si": {
-      "alias": "sinhalese"
-    },
-    "sk": {
-      "alias": "slovak"
-    },
-    "sl": {
-      "alias": "slovenian"
-    },
-    "sq": {
-      "alias": "albanian"
-    },
-    "sr": {
-      "alias": "serbian"
-    },
-    "su": {
-      "alias": "sudanese"
-    },
-    "sv": {
-      "alias": "svenska"
-    },
-    "sw": {
-      "alias": "swahili"
-    },
-    "ta": {
-      "alias": "tamil"
-    },
-    "te": {
-      "alias": "telugu"
-    },
-    "th": {
-      "alias": "thai"
-    },
-    "ti": {
-      "alias": "tigrinya"
-    },
-    "tl": {
-      "alias": "tagalog"
-    },
-    "tr": {
-      "alias": "turkce"
-    },
-    "uk": {
-      "alias": "ukrainian"
-    },
-    "ur": {
-      "alias": "urdu"
-    },
-    "uz": {
-      "alias": "uzbek"
-    },
-    "vi": {
-      "alias": "vietnamese"
-    },
-    "xh": {
-      "alias": "xhosa"
-    },
-    "zh": {
-      "alias": "jiantizhongwen"
-    },
-    "zh-HK": {
-      "alias": "fantizhengwen"
-    },
-    "zh-TW": {
-      "alias": "fantizhengwen"
-    },
-    "zu": {
-      "alias": "zulu"
-    }
-  },
-  "wikidata": {
-    "ab": {
-      "english_name": "Abkhazian",
-      "name": "\u0410\u0525\u0441\u0443\u0430"
-    },
-    "ace": {
-      "english_name": "Acehnese",
-      "name": "Basa Ac\u00e8h"
-    },
-    "ady": {
-      "english_name": "Adyghe",
-      "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d"
-    },
-    "af": {
-      "english_name": "Afrikaans",
-      "name": "Afrikaans"
-    },
-    "ak": {
-      "english_name": "Akan",
-      "name": "Akana"
-    },
-    "als": {
-      "english_name": "Alemannic",
-      "name": "Alemannisch"
-    },
-    "alt": {
-      "english_name": "Southern Altai",
-      "name": "\u0410\u043b\u0442\u0430\u0439"
-    },
-    "am": {
-      "english_name": "Amharic",
-      "name": "\u12a0\u121b\u122d\u129b"
-    },
-    "ami": {
-      "english_name": "Amis",
-      "name": "Pangcah"
-    },
-    "an": {
-      "english_name": "Aragonese",
-      "name": "Aragon\u00e9s"
-    },
-    "ang": {
-      "english_name": "Anglo-Saxon",
-      "name": "\u00c6nglisc"
-    },
-    "ar": {
-      "english_name": "Arabic",
-      "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
-    },
-    "arc": {
-      "english_name": "Aramaic",
-      "name": "\u0710\u072a\u0721\u071d\u0710"
-    },
-    "ary": {
-      "english_name": "Moroccan Arabic",
-      "name": "\u062f\u0627\u0631\u064a\u062c\u0629"
-    },
-    "arz": {
-      "english_name": "Egyptian Arabic",
-      "name": "\u0645\u0635\u0631\u0649 (Ma\u1e63ri)"
-    },
-    "as": {
-      "english_name": "Assamese",
-      "name": "\u0985\u09b8\u09ae\u09c0\u09af\u09bc\u09be"
-    },
-    "ast": {
-      "english_name": "Asturian",
-      "name": "Asturianu"
-    },
-    "atj": {
-      "english_name": "Atikamekw",
-      "name": "Atikamekw"
-    },
-    "av": {
-      "english_name": "Avar",
-      "name": "\u0410\u0432\u0430\u0440"
-    },
-    "avk": {
-      "english_name": "Kotava",
-      "name": "Kotava"
-    },
-    "awa": {
-      "english_name": "Awadhi",
-      "name": "\u0905\u0935\u0927\u0940"
-    },
-    "ay": {
-      "english_name": "Aymara",
-      "name": "Aymar"
-    },
-    "az": {
-      "english_name": "Azerbaijani",
-      "name": "Az\u0259rbaycanca"
-    },
-    "azb": {
-      "english_name": "South Azerbaijani",
-      "name": "\u062a\u06c6\u0631\u06a9\u062c\u0647"
-    },
-    "ba": {
-      "english_name": "Bashkir",
-      "name": "\u0411\u0430\u0448\u04a1\u043e\u0440\u0442"
-    },
-    "ban": {
-      "english_name": "Balinese",
-      "name": "Bali"
-    },
-    "bar": {
-      "english_name": "Bavarian",
-      "name": "Boarisch"
-    },
-    "bat-smg": {
-      "english_name": "Samogitian",
-      "name": "\u017demait\u0117\u0161ka"
-    },
-    "bcl": {
-      "english_name": "Central Bicolano",
-      "name": "Bikol"
-    },
-    "be": {
-      "english_name": "Belarusian",
-      "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
-    },
-    "be-tarask": {
-      "english_name": "Belarusian (Tara\u0161kievica)",
-      "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)"
-    },
-    "bg": {
-      "english_name": "Bulgarian",
-      "name": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
-    },
-    "bh": {
-      "english_name": "Bhojpuri",
-      "name": "\u092d\u094b\u091c\u092a\u0941\u0930\u0940"
-    },
-    "bi": {
-      "english_name": "Bislama",
-      "name": "Bislama"
-    },
-    "bjn": {
-      "english_name": "Banjar",
-      "name": "Bahasa Banjar"
-    },
-    "blk": {
-      "english_name": "Pa'O",
-      "name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f"
-    },
-    "bm": {
-      "english_name": "Bambara",
-      "name": "Bamanankan"
-    },
-    "bn": {
-      "english_name": "Bengali",
-      "name": "\u09ac\u09be\u0982\u09b2\u09be"
-    },
-    "bo": {
-      "english_name": "Tibetan",
-      "name": "\u0f56\u0f7c\u0f51\u0f0b\u0f66\u0f90\u0f51"
-    },
-    "bpy": {
-      "english_name": "Bishnupriya Manipuri",
-      "name": "\u0987\u09ae\u09be\u09b0 \u09a0\u09be\u09b0/\u09ac\u09bf\u09b7\u09cd\u09a3\u09c1\u09aa\u09cd\u09b0\u09bf\u09af\u09bc\u09be \u09ae\u09a3\u09bf\u09aa\u09c1\u09b0\u09c0"
-    },
-    "br": {
-      "english_name": "Breton",
-      "name": "Brezhoneg"
-    },
-    "bs": {
-      "english_name": "Bosnian",
-      "name": "Bosanski"
-    },
-    "bug": {
-      "english_name": "Buginese",
-      "name": "Basa Ugi"
-    },
-    "bxr": {
-      "english_name": "Buryat",
-      "name": "\u0411\u0443\u0440\u044f\u0430\u0434"
-    },
-    "ca": {
-      "english_name": "Catalan",
-      "name": "Catal\u00e0"
-    },
-    "cbk-zam": {
-      "english_name": "Zamboanga Chavacano",
-      "name": "Chavacano de Zamboanga"
-    },
-    "cdo": {
-      "english_name": "Min Dong",
-      "name": "M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304"
-    },
-    "ce": {
-      "english_name": "Chechen",
-      "name": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d"
-    },
-    "ceb": {
-      "english_name": "Cebuano",
-      "name": "Sinugboanong Binisaya"
-    },
-    "ch": {
-      "english_name": "Chamorro",
-      "name": "Chamoru"
-    },
-    "chr": {
-      "english_name": "Cherokee",
-      "name": "\u13e3\u13b3\u13a9"
-    },
-    "chy": {
-      "english_name": "Cheyenne",
-      "name": "Tsets\u00eahest\u00e2hese"
-    },
-    "ckb": {
-      "english_name": "Sorani",
-      "name": "Soran\u00ee / \u06a9\u0648\u0631\u062f\u06cc"
-    },
-    "co": {
-      "english_name": "Corsican",
-      "name": "Corsu"
-    },
-    "cr": {
-      "english_name": "Cree",
-      "name": "Nehiyaw"
-    },
-    "crh": {
-      "english_name": "Crimean Tatar",
-      "name": "Q\u0131r\u0131mtatarca"
-    },
-    "cs": {
-      "english_name": "Czech",
-      "name": "\u010ce\u0161tina"
-    },
-    "csb": {
-      "english_name": "Kashubian",
-      "name": "Kasz\u00ebbsczi"
-    },
-    "cu": {
-      "english_name": "Old Church Slavonic",
-      "name": "\u0421\u043b\u043e\u0432\u0463\u043d\u044c\u0441\u043a\u044a"
-    },
-    "cv": {
-      "english_name": "Chuvash",
-      "name": "\u0427\u0103\u0432\u0430\u0448"
-    },
-    "cy": {
-      "english_name": "Welsh",
-      "name": "Cymraeg"
-    },
-    "da": {
-      "english_name": "Danish",
-      "name": "Dansk"
-    },
-    "dag": {
-      "english_name": "Dagbani",
-      "name": "Dagbanli"
-    },
-    "de": {
-      "english_name": "German",
-      "name": "Deutsch"
-    },
-    "din": {
-      "english_name": "Dinka",
-      "name": "Thu\u0254\u014bj\u00e4\u014b"
-    },
-    "diq": {
-      "english_name": "Zazaki",
-      "name": "Zazaki"
-    },
-    "dsb": {
-      "english_name": "Lower Sorbian",
-      "name": "Dolnoserbski"
-    },
-    "dty": {
-      "english_name": "Doteli",
-      "name": "\u0921\u094b\u091f\u0947\u0932\u0940"
-    },
-    "dv": {
-      "english_name": "Divehi",
-      "name": "\u078b\u07a8\u0788\u07ac\u0780\u07a8\u0784\u07a6\u0790\u07b0"
-    },
-    "dz": {
-      "english_name": "Dzongkha",
-      "name": "\u0f47\u0f7c\u0f44\u0f0b\u0f41"
-    },
-    "ee": {
-      "english_name": "Ewe",
-      "name": "E\u028begbe"
-    },
-    "el": {
-      "english_name": "Greek",
-      "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
-    },
-    "eml": {
-      "english_name": "Emilian-Romagnol",
-      "name": "Emili\u00e0n e rumagn\u00f2l"
-    },
-    "en": {
-      "english_name": "English",
-      "name": "English"
-    },
-    "eo": {
-      "english_name": "Esperanto",
-      "name": "Esperanto"
-    },
-    "es": {
-      "english_name": "Spanish",
-      "name": "Espa\u00f1ol"
-    },
-    "et": {
-      "english_name": "Estonian",
-      "name": "Eesti"
-    },
-    "eu": {
-      "english_name": "Basque",
-      "name": "Euskara"
-    },
-    "ext": {
-      "english_name": "Extremaduran",
-      "name": "Estreme\u00f1u"
-    },
-    "fa": {
-      "english_name": "Persian",
-      "name": "\u0641\u0627\u0631\u0633\u06cc"
-    },
-    "ff": {
-      "english_name": "Fula",
-      "name": "Fulfulde"
-    },
-    "fi": {
-      "english_name": "Finnish",
-      "name": "Suomi"
-    },
-    "fiu-vro": {
-      "english_name": "V\u00f5ro",
-      "name": "V\u00f5ro"
-    },
-    "fj": {
-      "english_name": "Fijian",
-      "name": "Na Vosa Vakaviti"
-    },
-    "fo": {
-      "english_name": "Faroese",
-      "name": "F\u00f8royskt"
-    },
-    "fr": {
-      "english_name": "French",
-      "name": "Fran\u00e7ais"
-    },
-    "frp": {
-      "english_name": "Franco-Proven\u00e7al",
-      "name": "Arpetan"
-    },
-    "frr": {
-      "english_name": "North Frisian",
-      "name": "Nordfrasch"
-    },
-    "fur": {
-      "english_name": "Friulian",
-      "name": "Furlan"
-    },
-    "fy": {
-      "english_name": "West Frisian",
-      "name": "Frysk"
-    },
-    "ga": {
-      "english_name": "Irish",
-      "name": "Gaeilge"
-    },
-    "gag": {
-      "english_name": "Gagauz",
-      "name": "Gagauz"
-    },
-    "gan": {
-      "english_name": "Gan",
-      "name": "\u8d1b\u8a9e"
-    },
-    "gcr": {
-      "english_name": "Guianan Creole",
-      "name": "Kriy\u00f2l Gwiyannen"
-    },
-    "gd": {
-      "english_name": "Scottish Gaelic",
-      "name": "G\u00e0idhlig"
-    },
-    "gl": {
-      "english_name": "Galician",
-      "name": "Galego"
-    },
-    "glk": {
-      "english_name": "Gilaki",
-      "name": "\u06af\u06cc\u0644\u06a9\u06cc"
-    },
-    "gn": {
-      "english_name": "Guarani",
-      "name": "Ava\u00f1e'\u1ebd"
-    },
-    "gom": {
-      "english_name": "Goan Konkani",
-      "name": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni"
-    },
-    "gor": {
-      "english_name": "Gorontalo",
-      "name": "Hulontalo"
-    },
-    "got": {
-      "english_name": "Gothic",
-      "name": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a"
-    },
-    "gu": {
-      "english_name": "Gujarati",
-      "name": "\u0a97\u0ac1\u0a9c\u0ab0\u0abe\u0aa4\u0ac0"
-    },
-    "guw": {
-      "english_name": "Gun",
-      "name": "Gungbe"
-    },
-    "gv": {
-      "english_name": "Manx",
-      "name": "Gaelg"
-    },
-    "ha": {
-      "english_name": "Hausa",
-      "name": "Hausa / \u0647\u064e\u0648\u064f\u0633\u064e"
-    },
-    "hak": {
-      "english_name": "Hakka",
-      "name": "Hak-k\u00e2-fa / \u5ba2\u5bb6\u8a71"
-    },
-    "haw": {
-      "english_name": "Hawaiian",
-      "name": "Hawai\u02bbi"
-    },
-    "he": {
-      "english_name": "Hebrew",
-      "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
-    },
-    "hi": {
-      "english_name": "Hindi",
-      "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
-    },
-    "hif": {
-      "english_name": "Fiji Hindi",
-      "name": "Fiji Hindi"
-    },
-    "hr": {
-      "english_name": "Croatian",
-      "name": "Hrvatski"
-    },
-    "hsb": {
-      "english_name": "Upper Sorbian",
-      "name": "Hornjoserbsce"
-    },
-    "ht": {
-      "english_name": "Haitian",
-      "name": "Kr\u00e8yol ayisyen"
-    },
-    "hu": {
-      "english_name": "Hungarian",
-      "name": "Magyar"
-    },
-    "hy": {
-      "english_name": "Armenian",
-      "name": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576"
-    },
-    "hyw": {
-      "english_name": "Western Armenian",
-      "name": "\u0531\u0580\u0565\u0582\u0574\u057f\u0561\u0570\u0561\u0575\u0565\u0580\u0567\u0576"
-    },
-    "ia": {
-      "english_name": "Interlingua",
-      "name": "Interlingua"
-    },
-    "id": {
-      "english_name": "Indonesian",
-      "name": "Bahasa Indonesia"
-    },
-    "ie": {
-      "english_name": "Interlingue",
-      "name": "Interlingue"
-    },
-    "ig": {
-      "english_name": "Igbo",
-      "name": "\u00ccgb\u00f2"
-    },
-    "ik": {
-      "english_name": "Inupiak",
-      "name": "I\u00f1upiatun"
-    },
-    "ilo": {
-      "english_name": "Ilokano",
-      "name": "Ilokano"
-    },
-    "inh": {
-      "english_name": "Ingush",
-      "name": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439"
-    },
-    "io": {
-      "english_name": "Ido",
-      "name": "Ido"
-    },
-    "is": {
-      "english_name": "Icelandic",
-      "name": "\u00cdslenska"
-    },
-    "it": {
-      "english_name": "Italian",
-      "name": "Italiano"
-    },
-    "iu": {
-      "english_name": "Inuktitut",
-      "name": "\u1403\u14c4\u1483\u144e\u1450\u1466"
-    },
-    "ja": {
-      "english_name": "Japanese",
-      "name": "\u65e5\u672c\u8a9e"
-    },
-    "jam": {
-      "english_name": "Jamaican Patois",
-      "name": "Jumiekan Kryuol"
-    },
-    "jbo": {
-      "english_name": "Lojban",
-      "name": "Lojban"
-    },
-    "jv": {
-      "english_name": "Javanese",
-      "name": "Basa Jawa"
-    },
-    "ka": {
-      "english_name": "Georgian",
-      "name": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8"
-    },
-    "kaa": {
-      "english_name": "Karakalpak",
-      "name": "Qaraqalpaqsha"
-    },
-    "kab": {
-      "english_name": "Kabyle",
-      "name": "Taqbaylit"
-    },
-    "kbd": {
-      "english_name": "Kabardian Circassian",
-      "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d (Adighabze)"
-    },
-    "kbp": {
-      "english_name": "Kabiye",
-      "name": "Kab\u0269y\u025b"
-    },
-    "kcg": {
-      "english_name": "Tyap",
-      "name": "Tyap"
-    },
-    "kg": {
-      "english_name": "Kongo",
-      "name": "Kik\u00f4ngo"
-    },
-    "ki": {
-      "english_name": "Kikuyu",
-      "name": "G\u0129k\u0169y\u0169"
-    },
-    "kk": {
-      "english_name": "Kazakh",
-      "name": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430"
-    },
-    "kl": {
-      "english_name": "Greenlandic",
-      "name": "Kalaallisut"
-    },
-    "km": {
-      "english_name": "Khmer",
-      "name": "\u1797\u17b6\u179f\u17b6\u1781\u17d2\u1798\u17c2\u179a"
-    },
-    "kn": {
-      "english_name": "Kannada",
-      "name": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1"
-    },
-    "ko": {
-      "english_name": "Korean",
-      "name": "\ud55c\uad6d\uc5b4"
-    },
-    "koi": {
-      "english_name": "Komi-Permyak",
-      "name": "\u041f\u0435\u0440\u0435\u043c \u041a\u043e\u043c\u0438 (Perem Komi)"
-    },
-    "krc": {
-      "english_name": "Karachay-Balkar",
-      "name": "\u041a\u044a\u0430\u0440\u0430\u0447\u0430\u0439-\u041c\u0430\u043b\u043a\u044a\u0430\u0440 (Qarachay-Malqar)"
-    },
-    "ks": {
-      "english_name": "Kashmiri",
-      "name": "\u0915\u0936\u094d\u092e\u0940\u0930\u0940 / \u0643\u0634\u0645\u064a\u0631\u064a"
-    },
-    "ksh": {
-      "english_name": "Ripuarian",
-      "name": "Ripoarisch"
-    },
-    "ku": {
-      "english_name": "Kurdish",
-      "name": "Kurd\u00ee / \u0643\u0648\u0631\u062f\u06cc"
-    },
-    "kv": {
-      "english_name": "Komi",
-      "name": "\u041a\u043e\u043c\u0438"
-    },
-    "kw": {
-      "english_name": "Cornish",
-      "name": "Kernowek/Karnuack"
-    },
-    "ky": {
-      "english_name": "Kyrgyz",
-      "name": "\u041a\u044b\u0440\u0433\u044b\u0437\u0447\u0430"
-    },
-    "la": {
-      "english_name": "Latin",
-      "name": "Latina"
-    },
-    "lad": {
-      "english_name": "Ladino",
-      "name": "Dzhudezmo"
-    },
-    "lb": {
-      "english_name": "Luxembourgish",
-      "name": "L\u00ebtzebuergesch"
-    },
-    "lbe": {
-      "english_name": "Lak",
-      "name": "\u041b\u0430\u043a\u043a\u0443"
-    },
-    "lez": {
-      "english_name": "Lezgian",
-      "name": "\u041b\u0435\u0437\u0433\u0438 \u0447\u0406\u0430\u043b (Lezgi \u010d\u2019al)"
-    },
-    "lfn": {
-      "english_name": "Lingua Franca Nova",
-      "name": "Lingua franca nova"
-    },
-    "lg": {
-      "english_name": "Luganda",
-      "name": "Luganda"
-    },
-    "li": {
-      "english_name": "Limburgish",
-      "name": "Limburgs"
-    },
-    "lij": {
-      "english_name": "Ligurian",
-      "name": "L\u00ecgure"
-    },
-    "lld": {
-      "english_name": "Ladin",
-      "name": "Lingaz"
-    },
-    "lmo": {
-      "english_name": "Lombard",
-      "name": "Lumbaart"
-    },
-    "ln": {
-      "english_name": "Lingala",
-      "name": "Lingala"
-    },
-    "lo": {
-      "english_name": "Lao",
-      "name": "\u0ea5\u0eb2\u0ea7"
-    },
-    "lt": {
-      "english_name": "Lithuanian",
-      "name": "Lietuvi\u0173"
-    },
-    "ltg": {
-      "english_name": "Latgalian",
-      "name": "Latga\u013cu"
-    },
-    "lv": {
-      "english_name": "Latvian",
-      "name": "Latvie\u0161u"
-    },
-    "mad": {
-      "english_name": "Madurese",
-      "name": "Madhur\u00e2"
-    },
-    "mai": {
-      "english_name": "Maithili",
-      "name": "\u092e\u0948\u0925\u093f\u0932\u0940"
-    },
-    "map-bms": {
-      "english_name": "Banyumasan",
-      "name": "Basa Banyumasan"
-    },
-    "mdf": {
-      "english_name": "Moksha",
-      "name": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c (Mokshanj K\u00e4lj)"
-    },
-    "mg": {
-      "english_name": "Malagasy",
-      "name": "Malagasy"
-    },
-    "mhr": {
-      "english_name": "Meadow Mari",
-      "name": "\u041e\u043b\u044b\u043a \u041c\u0430\u0440\u0438\u0439 (Olyk Marij)"
-    },
-    "mi": {
-      "english_name": "Maori",
-      "name": "M\u0101ori"
-    },
-    "min": {
-      "english_name": "Minangkabau",
-      "name": "Minangkabau"
-    },
-    "mk": {
-      "english_name": "Macedonian",
-      "name": "\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438"
-    },
-    "ml": {
-      "english_name": "Malayalam",
-      "name": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02"
-    },
-    "mn": {
-      "english_name": "Mongolian",
-      "name": "\u041c\u043e\u043d\u0433\u043e\u043b"
-    },
-    "mni": {
-      "english_name": "Meitei",
-      "name": "\uabc3\uabe4\uabc7\uabe9\uabc2\uabe3\uabdf"
-    },
-    "mnw": {
-      "english_name": "Mon",
-      "name": "\u1019\u1014\u103a"
-    },
-    "mr": {
-      "english_name": "Marathi",
-      "name": "\u092e\u0930\u093e\u0920\u0940"
-    },
-    "mrj": {
-      "english_name": "Hill Mari",
-      "name": "\u041a\u044b\u0440\u044b\u043a \u041c\u0430\u0440\u044b (Kyryk Mary)"
-    },
-    "ms": {
-      "english_name": "Malay",
-      "name": "Bahasa Melayu"
-    },
-    "mt": {
-      "english_name": "Maltese",
-      "name": "Malti"
-    },
-    "mwl": {
-      "english_name": "Mirandese",
-      "name": "Mirand\u00e9s"
-    },
-    "my": {
-      "english_name": "Burmese",
-      "name": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c"
-    },
-    "myv": {
-      "english_name": "Erzya",
-      "name": "\u042d\u0440\u0437\u044f\u043d\u044c (Erzjanj Kelj)"
-    },
-    "mzn": {
-      "english_name": "Mazandarani",
-      "name": "\u0645\u064e\u0632\u0650\u0631\u0648\u0646\u064a"
-    },
-    "na": {
-      "english_name": "Nauruan",
-      "name": "dorerin Naoero"
-    },
-    "nah": {
-      "english_name": "Nahuatl",
-      "name": "N\u0101huatl"
-    },
-    "nap": {
-      "english_name": "Neapolitan",
-      "name": "Nnapulitano"
-    },
-    "nds": {
-      "english_name": "Low Saxon",
-      "name": "Plattd\u00fc\u00fctsch"
-    },
-    "nds-nl": {
-      "english_name": "Dutch Low Saxon",
-      "name": "Nedersaksisch"
-    },
-    "ne": {
-      "english_name": "Nepali",
-      "name": "\u0928\u0947\u092a\u093e\u0932\u0940"
-    },
-    "new": {
-      "english_name": "Newar",
-      "name": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e"
-    },
-    "nia": {
-      "english_name": "Nias",
-      "name": "Li Niha"
-    },
-    "nl": {
-      "english_name": "Dutch",
-      "name": "Nederlands"
-    },
-    "nn": {
-      "english_name": "Norwegian (Nynorsk)",
-      "name": "Nynorsk"
-    },
-    "no": {
-      "english_name": "Norwegian (Bokm\u00e5l)",
-      "name": "Norsk (Bokm\u00e5l)"
-    },
-    "nov": {
-      "english_name": "Novial",
-      "name": "Novial"
-    },
-    "nqo": {
-      "english_name": "N\u2019Ko",
-      "name": "\u07d2\u07de\u07cf"
-    },
-    "nrm": {
-      "english_name": "Norman",
-      "name": "Nouormand/Normaund"
-    },
-    "nso": {
-      "english_name": "Northern Sotho",
-      "name": "Sepedi"
-    },
-    "nv": {
-      "english_name": "Navajo",
-      "name": "Din\u00e9 bizaad"
-    },
-    "ny": {
-      "english_name": "Chichewa",
-      "name": "Chichewa"
-    },
-    "oc": {
-      "english_name": "Occitan",
-      "name": "Occitan"
-    },
-    "olo": {
-      "english_name": "Livvi-Karelian",
-      "name": "Karjalan"
-    },
-    "om": {
-      "english_name": "Oromo",
-      "name": "Oromoo"
-    },
-    "or": {
-      "english_name": "Oriya",
-      "name": "\u0b13\u0b21\u0b3c\u0b3f\u0b06"
-    },
-    "os": {
-      "english_name": "Ossetian",
-      "name": "\u0418\u0440\u043e\u043d\u0430\u0443"
-    },
-    "pa": {
-      "english_name": "Punjabi",
-      "name": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40"
-    },
-    "pag": {
-      "english_name": "Pangasinan",
-      "name": "Pangasinan"
-    },
-    "pam": {
-      "english_name": "Kapampangan",
-      "name": "Kapampangan"
-    },
-    "pap": {
-      "english_name": "Papiamentu",
-      "name": "Papiamentu"
-    },
-    "pcd": {
-      "english_name": "Picard",
-      "name": "Picard"
-    },
-    "pcm": {
-      "english_name": "Nigerian Pidgin",
-      "name": "Naij\u00e1"
-    },
-    "pdc": {
-      "english_name": "Pennsylvania German",
-      "name": "Deitsch"
-    },
-    "pfl": {
-      "english_name": "Palatinate German",
-      "name": "P\u00e4lzisch"
-    },
-    "pi": {
-      "english_name": "Pali",
-      "name": "\u092a\u093e\u0934\u093f"
-    },
-    "pih": {
-      "english_name": "Norfolk",
-      "name": "Norfuk"
-    },
-    "pl": {
-      "english_name": "Polish",
-      "name": "Polski"
-    },
-    "pms": {
-      "english_name": "Piedmontese",
-      "name": "Piemont\u00e8is"
-    },
-    "pnb": {
-      "english_name": "Western Punjabi",
-      "name": "\u0634\u0627\u06c1 \u0645\u06a9\u06be\u06cc \u067e\u0646\u062c\u0627\u0628\u06cc (Sh\u0101hmukh\u012b Pa\u00f1j\u0101b\u012b)"
-    },
-    "pnt": {
-      "english_name": "Pontic",
-      "name": "\u03a0\u03bf\u03bd\u03c4\u03b9\u03b1\u03ba\u03ac"
-    },
-    "ps": {
-      "english_name": "Pashto",
-      "name": "\u067e\u069a\u062a\u0648"
-    },
-    "pt": {
-      "english_name": "Portuguese",
-      "name": "Portugu\u00eas"
-    },
-    "pwn": {
-      "english_name": "Paiwan",
-      "name": "Paiwan"
-    },
-    "qu": {
-      "english_name": "Quechua",
-      "name": "Qichwa simi"
-    },
-    "rm": {
-      "english_name": "Romansh",
-      "name": "Rumantsch"
-    },
-    "rmy": {
-      "english_name": "Romani",
-      "name": "romani - \u0930\u094b\u092e\u093e\u0928\u0940"
-    },
-    "rn": {
-      "english_name": "Kirundi",
-      "name": "Ikirundi"
-    },
-    "ro": {
-      "english_name": "Romanian",
-      "name": "Rom\u00e2n\u0103"
-    },
-    "roa-rup": {
-      "english_name": "Aromanian",
-      "name": "Arm\u00e3neashce"
-    },
-    "roa-tara": {
-      "english_name": "Tarantino",
-      "name": "Tarand\u00edne"
-    },
-    "ru": {
-      "english_name": "Russian",
-      "name": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439"
-    },
-    "rue": {
-      "english_name": "Rusyn",
-      "name": "\u0420\u0443\u0441\u0438\u043d\u044c\u0441\u043a\u044b\u0439"
-    },
-    "rw": {
-      "english_name": "Kinyarwanda",
-      "name": "Ikinyarwanda"
-    },
-    "sa": {
-      "english_name": "Sanskrit",
-      "name": "\u0938\u0902\u0938\u094d\u0915\u0943\u0924\u092e\u094d"
-    },
-    "sah": {
-      "english_name": "Sakha",
-      "name": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430 (Saxa Tyla)"
-    },
-    "sat": {
-      "english_name": "Santali",
-      "name": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64"
-    },
-    "sc": {
-      "english_name": "Sardinian",
-      "name": "Sardu"
-    },
-    "scn": {
-      "english_name": "Sicilian",
-      "name": "Sicilianu"
-    },
-    "sco": {
-      "english_name": "Scots",
-      "name": "Scots"
-    },
-    "sd": {
-      "english_name": "Sindhi",
-      "name": "\u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc \u060c \u0938\u093f\u0928\u094d\u0927"
-    },
-    "se": {
-      "english_name": "Northern Sami",
-      "name": "S\u00e1megiella"
-    },
-    "sg": {
-      "english_name": "Sango",
-      "name": "S\u00e4ng\u00f6"
-    },
-    "sh": {
-      "english_name": "Serbo-Croatian",
-      "name": "Srpskohrvatski / \u0421\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438"
-    },
-    "shi": {
-      "english_name": "Tachelhit",
-      "name": "Tacl\u1e25it"
-    },
-    "shn": {
-      "english_name": "Shan",
-      "name": "\u101c\u102d\u1075\u103a\u1088\u1010\u1086\u1038"
-    },
-    "si": {
-      "english_name": "Sinhalese",
-      "name": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd"
-    },
-    "simple": {
-      "english_name": "Simple English",
-      "name": "Simple English"
-    },
-    "sk": {
-      "english_name": "Slovak",
-      "name": "Sloven\u010dina"
-    },
-    "skr": {
-      "english_name": "Saraiki",
-      "name": "\u0633\u0631\u0627\u0626\u06cc\u06a9\u06cc"
-    },
-    "sl": {
-      "english_name": "Slovenian",
-      "name": "Sloven\u0161\u010dina"
-    },
-    "sm": {
-      "english_name": "Samoan",
-      "name": "Gagana Samoa"
-    },
-    "smn": {
-      "english_name": "Inari Sami",
-      "name": "Anar\u00e2\u0161kiel\u00e2"
-    },
-    "sn": {
-      "english_name": "Shona",
-      "name": "chiShona"
-    },
-    "so": {
-      "english_name": "Somali",
-      "name": "Soomaali"
-    },
-    "sq": {
-      "english_name": "Albanian",
-      "name": "Shqip"
-    },
-    "sr": {
-      "english_name": "Serbian",
-      "name": "\u0421\u0440\u043f\u0441\u043a\u0438 / Srpski"
-    },
-    "srn": {
-      "english_name": "Sranan",
-      "name": "Sranantongo"
-    },
-    "ss": {
-      "english_name": "Swati",
-      "name": "SiSwati"
-    },
-    "st": {
-      "english_name": "Sesotho",
-      "name": "Sesotho"
-    },
-    "stq": {
-      "english_name": "Saterland Frisian",
-      "name": "Seeltersk"
-    },
-    "su": {
-      "english_name": "Sundanese",
-      "name": "Basa Sunda"
-    },
-    "sv": {
-      "english_name": "Swedish",
-      "name": "Svenska"
-    },
-    "sw": {
-      "english_name": "Swahili",
-      "name": "Kiswahili"
-    },
-    "szl": {
-      "english_name": "Silesian",
-      "name": "\u015al\u016fnski"
-    },
-    "szy": {
-      "english_name": "Sakizaya",
-      "name": "Sakizaya"
-    },
-    "ta": {
-      "english_name": "Tamil",
-      "name": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd"
-    },
-    "tay": {
-      "english_name": "Atayal",
-      "name": "Tayal"
-    },
-    "tcy": {
-      "english_name": "Tulu",
-      "name": "\u0ca4\u0cc1\u0cb3\u0cc1"
-    },
-    "te": {
-      "english_name": "Telugu",
-      "name": "\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41"
-    },
-    "tet": {
-      "english_name": "Tetum",
-      "name": "Tetun"
-    },
-    "tg": {
-      "english_name": "Tajik",
-      "name": "\u0422\u043e\u04b7\u0438\u043a\u04e3"
-    },
-    "th": {
-      "english_name": "Thai",
-      "name": "\u0e44\u0e17\u0e22"
-    },
-    "ti": {
-      "english_name": "Tigrinya",
-      "name": "\u1275\u130d\u122d\u129b"
-    },
-    "tk": {
-      "english_name": "Turkmen",
-      "name": "T\u00fcrkmen"
-    },
-    "tl": {
-      "english_name": "Tagalog",
-      "name": "Tagalog"
-    },
-    "tn": {
-      "english_name": "Tswana",
-      "name": "Setswana"
-    },
-    "to": {
-      "english_name": "Tongan",
-      "name": "faka Tonga"
-    },
-    "tpi": {
-      "english_name": "Tok Pisin",
-      "name": "Tok Pisin"
-    },
-    "tr": {
-      "english_name": "Turkish",
-      "name": "T\u00fcrk\u00e7e"
-    },
-    "trv": {
-      "english_name": "Seediq",
-      "name": "Taroko"
-    },
-    "ts": {
-      "english_name": "Tsonga",
-      "name": "Xitsonga"
-    },
-    "tt": {
-      "english_name": "Tatar",
-      "name": "Tatar\u00e7a / \u0422\u0430\u0442\u0430\u0440\u0447\u0430"
-    },
-    "tum": {
-      "english_name": "Tumbuka",
-      "name": "chiTumbuka"
-    },
-    "tw": {
-      "english_name": "Twi",
-      "name": "Twi"
-    },
-    "ty": {
-      "english_name": "Tahitian",
-      "name": "Reo M\u0101`ohi"
-    },
-    "tyv": {
-      "english_name": "Tuvan",
-      "name": "\u0422\u044b\u0432\u0430"
-    },
-    "udm": {
-      "english_name": "Udmurt",
-      "name": "\u0423\u0434\u043c\u0443\u0440\u0442 \u043a\u044b\u043b"
-    },
-    "ug": {
-      "english_name": "Uyghur",
-      "name": "\u0626\u06c7\u064a\u063a\u06c7\u0631 \u062a\u0649\u0644\u0649"
-    },
-    "uk": {
-      "english_name": "Ukrainian",
-      "name": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
-    },
-    "ur": {
-      "english_name": "Urdu",
-      "name": "\u0627\u0631\u062f\u0648"
-    },
-    "uz": {
-      "english_name": "Uzbek",
-      "name": "O\u2018zbek"
-    },
-    "ve": {
-      "english_name": "Venda",
-      "name": "Tshivenda"
-    },
-    "vec": {
-      "english_name": "Venetian",
-      "name": "V\u00e8neto"
-    },
-    "vep": {
-      "english_name": "Vepsian",
-      "name": "Veps\u00e4n"
-    },
-    "vi": {
-      "english_name": "Vietnamese",
-      "name": "Ti\u1ebfng Vi\u1ec7t"
-    },
-    "vls": {
-      "english_name": "West Flemish",
-      "name": "West-Vlams"
-    },
-    "vo": {
-      "english_name": "Volap\u00fck",
-      "name": "Volap\u00fck"
-    },
-    "wa": {
-      "english_name": "Walloon",
-      "name": "Walon"
-    },
-    "war": {
-      "english_name": "Waray-Waray",
-      "name": "Winaray"
-    },
-    "wo": {
-      "english_name": "Wolof",
-      "name": "Wolof"
-    },
-    "wuu": {
-      "english_name": "Wu",
-      "name": "\u5434\u8bed"
-    },
-    "xal": {
-      "english_name": "Kalmyk",
-      "name": "\u0425\u0430\u043b\u044c\u043c\u0433"
-    },
-    "xh": {
-      "english_name": "Xhosa",
-      "name": "isiXhosa"
-    },
-    "xmf": {
-      "english_name": "Mingrelian",
-      "name": "\u10db\u10d0\u10e0\u10d2\u10d0\u10da\u10e3\u10e0\u10d8 (Margaluri)"
-    },
-    "yi": {
-      "english_name": "Yiddish",
-      "name": "\u05d9\u05d9\u05b4\u05d3\u05d9\u05e9"
-    },
-    "yo": {
-      "english_name": "Yoruba",
-      "name": "Yor\u00f9b\u00e1"
-    },
-    "za": {
-      "english_name": "Zhuang",
-      "name": "Cuengh"
-    },
-    "zea": {
-      "english_name": "Zeelandic",
-      "name": "Ze\u00eauws"
-    },
-    "zh": {
-      "english_name": "Chinese",
-      "name": "\u4e2d\u6587"
-    },
-    "zh-classical": {
-      "english_name": "Classical Chinese",
-      "name": "\u53e4\u6587 / \u6587\u8a00\u6587"
-    },
-    "zh-min-nan": {
-      "english_name": "Min Nan",
-      "name": "B\u00e2n-l\u00e2m-g\u00fa"
-    },
-    "zh-yue": {
-      "english_name": "Cantonese",
-      "name": "\u7cb5\u8a9e"
-    },
-    "zu": {
-      "english_name": "Zulu",
-      "name": "isiZulu"
-    }
-  },
-  "wikipedia": {
-    "ab": {
-      "english_name": "Abkhazian",
-      "name": "\u0410\u0525\u0441\u0443\u0430"
-    },
-    "ace": {
-      "english_name": "Acehnese",
-      "name": "Basa Ac\u00e8h"
-    },
-    "ady": {
-      "english_name": "Adyghe",
-      "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d"
-    },
-    "af": {
-      "english_name": "Afrikaans",
-      "name": "Afrikaans"
-    },
-    "ak": {
-      "english_name": "Akan",
-      "name": "Akana"
-    },
-    "als": {
-      "english_name": "Alemannic",
-      "name": "Alemannisch"
-    },
-    "alt": {
-      "english_name": "Southern Altai",
-      "name": "\u0410\u043b\u0442\u0430\u0439"
-    },
-    "am": {
-      "english_name": "Amharic",
-      "name": "\u12a0\u121b\u122d\u129b"
-    },
-    "ami": {
-      "english_name": "Amis",
-      "name": "Pangcah"
-    },
-    "an": {
-      "english_name": "Aragonese",
-      "name": "Aragon\u00e9s"
-    },
-    "ang": {
-      "english_name": "Anglo-Saxon",
-      "name": "\u00c6nglisc"
-    },
-    "ar": {
-      "english_name": "Arabic",
-      "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629"
-    },
-    "arc": {
-      "english_name": "Aramaic",
-      "name": "\u0710\u072a\u0721\u071d\u0710"
-    },
-    "ary": {
-      "english_name": "Moroccan Arabic",
-      "name": "\u062f\u0627\u0631\u064a\u062c\u0629"
-    },
-    "arz": {
-      "english_name": "Egyptian Arabic",
-      "name": "\u0645\u0635\u0631\u0649 (Ma\u1e63ri)"
-    },
-    "as": {
-      "english_name": "Assamese",
-      "name": "\u0985\u09b8\u09ae\u09c0\u09af\u09bc\u09be"
-    },
-    "ast": {
-      "english_name": "Asturian",
-      "name": "Asturianu"
-    },
-    "atj": {
-      "english_name": "Atikamekw",
-      "name": "Atikamekw"
-    },
-    "av": {
-      "english_name": "Avar",
-      "name": "\u0410\u0432\u0430\u0440"
-    },
-    "avk": {
-      "english_name": "Kotava",
-      "name": "Kotava"
-    },
-    "awa": {
-      "english_name": "Awadhi",
-      "name": "\u0905\u0935\u0927\u0940"
-    },
-    "ay": {
-      "english_name": "Aymara",
-      "name": "Aymar"
-    },
-    "az": {
-      "english_name": "Azerbaijani",
-      "name": "Az\u0259rbaycanca"
-    },
-    "azb": {
-      "english_name": "South Azerbaijani",
-      "name": "\u062a\u06c6\u0631\u06a9\u062c\u0647"
-    },
-    "ba": {
-      "english_name": "Bashkir",
-      "name": "\u0411\u0430\u0448\u04a1\u043e\u0440\u0442"
-    },
-    "ban": {
-      "english_name": "Balinese",
-      "name": "Bali"
-    },
-    "bar": {
-      "english_name": "Bavarian",
-      "name": "Boarisch"
-    },
-    "bat-smg": {
-      "english_name": "Samogitian",
-      "name": "\u017demait\u0117\u0161ka"
-    },
-    "bcl": {
-      "english_name": "Central Bicolano",
-      "name": "Bikol"
-    },
-    "be": {
-      "english_name": "Belarusian",
-      "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f"
-    },
-    "be-tarask": {
-      "english_name": "Belarusian (Tara\u0161kievica)",
-      "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)"
-    },
-    "bg": {
-      "english_name": "Bulgarian",
-      "name": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"
-    },
-    "bh": {
-      "english_name": "Bhojpuri",
-      "name": "\u092d\u094b\u091c\u092a\u0941\u0930\u0940"
-    },
-    "bi": {
-      "english_name": "Bislama",
-      "name": "Bislama"
-    },
-    "bjn": {
-      "english_name": "Banjar",
-      "name": "Bahasa Banjar"
-    },
-    "blk": {
-      "english_name": "Pa'O",
-      "name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f"
-    },
-    "bm": {
-      "english_name": "Bambara",
-      "name": "Bamanankan"
-    },
-    "bn": {
-      "english_name": "Bengali",
-      "name": "\u09ac\u09be\u0982\u09b2\u09be"
-    },
-    "bo": {
-      "english_name": "Tibetan",
-      "name": "\u0f56\u0f7c\u0f51\u0f0b\u0f66\u0f90\u0f51"
-    },
-    "bpy": {
-      "english_name": "Bishnupriya Manipuri",
-      "name": "\u0987\u09ae\u09be\u09b0 \u09a0\u09be\u09b0/\u09ac\u09bf\u09b7\u09cd\u09a3\u09c1\u09aa\u09cd\u09b0\u09bf\u09af\u09bc\u09be \u09ae\u09a3\u09bf\u09aa\u09c1\u09b0\u09c0"
-    },
-    "br": {
-      "english_name": "Breton",
-      "name": "Brezhoneg"
-    },
-    "bs": {
-      "english_name": "Bosnian",
-      "name": "Bosanski"
-    },
-    "bug": {
-      "english_name": "Buginese",
-      "name": "Basa Ugi"
-    },
-    "bxr": {
-      "english_name": "Buryat",
-      "name": "\u0411\u0443\u0440\u044f\u0430\u0434"
-    },
-    "ca": {
-      "english_name": "Catalan",
-      "name": "Catal\u00e0"
-    },
-    "cbk-zam": {
-      "english_name": "Zamboanga Chavacano",
-      "name": "Chavacano de Zamboanga"
-    },
-    "cdo": {
-      "english_name": "Min Dong",
-      "name": "M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304"
-    },
-    "ce": {
-      "english_name": "Chechen",
-      "name": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d"
-    },
-    "ceb": {
-      "english_name": "Cebuano",
-      "name": "Sinugboanong Binisaya"
-    },
-    "ch": {
-      "english_name": "Chamorro",
-      "name": "Chamoru"
-    },
-    "chr": {
-      "english_name": "Cherokee",
-      "name": "\u13e3\u13b3\u13a9"
-    },
-    "chy": {
-      "english_name": "Cheyenne",
-      "name": "Tsets\u00eahest\u00e2hese"
-    },
-    "ckb": {
-      "english_name": "Sorani",
-      "name": "Soran\u00ee / \u06a9\u0648\u0631\u062f\u06cc"
-    },
-    "co": {
-      "english_name": "Corsican",
-      "name": "Corsu"
-    },
-    "cr": {
-      "english_name": "Cree",
-      "name": "Nehiyaw"
-    },
-    "crh": {
-      "english_name": "Crimean Tatar",
-      "name": "Q\u0131r\u0131mtatarca"
-    },
-    "cs": {
-      "english_name": "Czech",
-      "name": "\u010ce\u0161tina"
-    },
-    "csb": {
-      "english_name": "Kashubian",
-      "name": "Kasz\u00ebbsczi"
-    },
-    "cu": {
-      "english_name": "Old Church Slavonic",
-      "name": "\u0421\u043b\u043e\u0432\u0463\u043d\u044c\u0441\u043a\u044a"
-    },
-    "cv": {
-      "english_name": "Chuvash",
-      "name": "\u0427\u0103\u0432\u0430\u0448"
-    },
-    "cy": {
-      "english_name": "Welsh",
-      "name": "Cymraeg"
-    },
-    "da": {
-      "english_name": "Danish",
-      "name": "Dansk"
-    },
-    "dag": {
-      "english_name": "Dagbani",
-      "name": "Dagbanli"
-    },
-    "de": {
-      "english_name": "German",
-      "name": "Deutsch"
-    },
-    "din": {
-      "english_name": "Dinka",
-      "name": "Thu\u0254\u014bj\u00e4\u014b"
-    },
-    "diq": {
-      "english_name": "Zazaki",
-      "name": "Zazaki"
-    },
-    "dsb": {
-      "english_name": "Lower Sorbian",
-      "name": "Dolnoserbski"
-    },
-    "dty": {
-      "english_name": "Doteli",
-      "name": "\u0921\u094b\u091f\u0947\u0932\u0940"
-    },
-    "dv": {
-      "english_name": "Divehi",
-      "name": "\u078b\u07a8\u0788\u07ac\u0780\u07a8\u0784\u07a6\u0790\u07b0"
-    },
-    "dz": {
-      "english_name": "Dzongkha",
-      "name": "\u0f47\u0f7c\u0f44\u0f0b\u0f41"
-    },
-    "ee": {
-      "english_name": "Ewe",
-      "name": "E\u028begbe"
-    },
-    "el": {
-      "english_name": "Greek",
-      "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"
-    },
-    "eml": {
-      "english_name": "Emilian-Romagnol",
-      "name": "Emili\u00e0n e rumagn\u00f2l"
-    },
-    "en": {
-      "english_name": "English",
-      "name": "English"
-    },
-    "eo": {
-      "english_name": "Esperanto",
-      "name": "Esperanto"
-    },
-    "es": {
-      "english_name": "Spanish",
-      "name": "Espa\u00f1ol"
-    },
-    "et": {
-      "english_name": "Estonian",
-      "name": "Eesti"
-    },
-    "eu": {
-      "english_name": "Basque",
-      "name": "Euskara"
-    },
-    "ext": {
-      "english_name": "Extremaduran",
-      "name": "Estreme\u00f1u"
-    },
-    "fa": {
-      "english_name": "Persian",
-      "name": "\u0641\u0627\u0631\u0633\u06cc"
-    },
-    "ff": {
-      "english_name": "Fula",
-      "name": "Fulfulde"
-    },
-    "fi": {
-      "english_name": "Finnish",
-      "name": "Suomi"
-    },
-    "fiu-vro": {
-      "english_name": "V\u00f5ro",
-      "name": "V\u00f5ro"
-    },
-    "fj": {
-      "english_name": "Fijian",
-      "name": "Na Vosa Vakaviti"
-    },
-    "fo": {
-      "english_name": "Faroese",
-      "name": "F\u00f8royskt"
-    },
-    "fr": {
-      "english_name": "French",
-      "name": "Fran\u00e7ais"
-    },
-    "frp": {
-      "english_name": "Franco-Proven\u00e7al",
-      "name": "Arpetan"
-    },
-    "frr": {
-      "english_name": "North Frisian",
-      "name": "Nordfrasch"
-    },
-    "fur": {
-      "english_name": "Friulian",
-      "name": "Furlan"
-    },
-    "fy": {
-      "english_name": "West Frisian",
-      "name": "Frysk"
-    },
-    "ga": {
-      "english_name": "Irish",
-      "name": "Gaeilge"
-    },
-    "gag": {
-      "english_name": "Gagauz",
-      "name": "Gagauz"
-    },
-    "gan": {
-      "english_name": "Gan",
-      "name": "\u8d1b\u8a9e"
-    },
-    "gcr": {
-      "english_name": "Guianan Creole",
-      "name": "Kriy\u00f2l Gwiyannen"
-    },
-    "gd": {
-      "english_name": "Scottish Gaelic",
-      "name": "G\u00e0idhlig"
-    },
-    "gl": {
-      "english_name": "Galician",
-      "name": "Galego"
-    },
-    "glk": {
-      "english_name": "Gilaki",
-      "name": "\u06af\u06cc\u0644\u06a9\u06cc"
-    },
-    "gn": {
-      "english_name": "Guarani",
-      "name": "Ava\u00f1e'\u1ebd"
-    },
-    "gom": {
-      "english_name": "Goan Konkani",
-      "name": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni"
-    },
-    "gor": {
-      "english_name": "Gorontalo",
-      "name": "Hulontalo"
-    },
-    "got": {
-      "english_name": "Gothic",
-      "name": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a"
-    },
-    "gu": {
-      "english_name": "Gujarati",
-      "name": "\u0a97\u0ac1\u0a9c\u0ab0\u0abe\u0aa4\u0ac0"
-    },
-    "guw": {
-      "english_name": "Gun",
-      "name": "Gungbe"
-    },
-    "gv": {
-      "english_name": "Manx",
-      "name": "Gaelg"
-    },
-    "ha": {
-      "english_name": "Hausa",
-      "name": "Hausa / \u0647\u064e\u0648\u064f\u0633\u064e"
-    },
-    "hak": {
-      "english_name": "Hakka",
-      "name": "Hak-k\u00e2-fa / \u5ba2\u5bb6\u8a71"
-    },
-    "haw": {
-      "english_name": "Hawaiian",
-      "name": "Hawai\u02bbi"
-    },
-    "he": {
-      "english_name": "Hebrew",
-      "name": "\u05e2\u05d1\u05e8\u05d9\u05ea"
-    },
-    "hi": {
-      "english_name": "Hindi",
-      "name": "\u0939\u093f\u0928\u094d\u0926\u0940"
-    },
-    "hif": {
-      "english_name": "Fiji Hindi",
-      "name": "Fiji Hindi"
-    },
-    "hr": {
-      "english_name": "Croatian",
-      "name": "Hrvatski"
-    },
-    "hsb": {
-      "english_name": "Upper Sorbian",
-      "name": "Hornjoserbsce"
-    },
-    "ht": {
-      "english_name": "Haitian",
-      "name": "Kr\u00e8yol ayisyen"
-    },
-    "hu": {
-      "english_name": "Hungarian",
-      "name": "Magyar"
-    },
-    "hy": {
-      "english_name": "Armenian",
-      "name": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576"
-    },
-    "hyw": {
-      "english_name": "Western Armenian",
-      "name": "\u0531\u0580\u0565\u0582\u0574\u057f\u0561\u0570\u0561\u0575\u0565\u0580\u0567\u0576"
-    },
-    "ia": {
-      "english_name": "Interlingua",
-      "name": "Interlingua"
-    },
-    "id": {
-      "english_name": "Indonesian",
-      "name": "Bahasa Indonesia"
-    },
-    "ie": {
-      "english_name": "Interlingue",
-      "name": "Interlingue"
-    },
-    "ig": {
-      "english_name": "Igbo",
-      "name": "\u00ccgb\u00f2"
-    },
-    "ik": {
-      "english_name": "Inupiak",
-      "name": "I\u00f1upiatun"
-    },
-    "ilo": {
-      "english_name": "Ilokano",
-      "name": "Ilokano"
-    },
-    "inh": {
-      "english_name": "Ingush",
-      "name": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439"
-    },
-    "io": {
-      "english_name": "Ido",
-      "name": "Ido"
-    },
-    "is": {
-      "english_name": "Icelandic",
-      "name": "\u00cdslenska"
-    },
-    "it": {
-      "english_name": "Italian",
-      "name": "Italiano"
-    },
-    "iu": {
-      "english_name": "Inuktitut",
-      "name": "\u1403\u14c4\u1483\u144e\u1450\u1466"
-    },
-    "ja": {
-      "english_name": "Japanese",
-      "name": "\u65e5\u672c\u8a9e"
-    },
-    "jam": {
-      "english_name": "Jamaican Patois",
-      "name": "Jumiekan Kryuol"
-    },
-    "jbo": {
-      "english_name": "Lojban",
-      "name": "Lojban"
-    },
-    "jv": {
-      "english_name": "Javanese",
-      "name": "Basa Jawa"
-    },
-    "ka": {
-      "english_name": "Georgian",
-      "name": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8"
-    },
-    "kaa": {
-      "english_name": "Karakalpak",
-      "name": "Qaraqalpaqsha"
-    },
-    "kab": {
-      "english_name": "Kabyle",
-      "name": "Taqbaylit"
-    },
-    "kbd": {
-      "english_name": "Kabardian Circassian",
-      "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d (Adighabze)"
-    },
-    "kbp": {
-      "english_name": "Kabiye",
-      "name": "Kab\u0269y\u025b"
-    },
-    "kcg": {
-      "english_name": "Tyap",
-      "name": "Tyap"
-    },
-    "kg": {
-      "english_name": "Kongo",
-      "name": "Kik\u00f4ngo"
-    },
-    "ki": {
-      "english_name": "Kikuyu",
-      "name": "G\u0129k\u0169y\u0169"
-    },
-    "kk": {
-      "english_name": "Kazakh",
-      "name": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430"
-    },
-    "kl": {
-      "english_name": "Greenlandic",
-      "name": "Kalaallisut"
-    },
-    "km": {
-      "english_name": "Khmer",
-      "name": "\u1797\u17b6\u179f\u17b6\u1781\u17d2\u1798\u17c2\u179a"
-    },
-    "kn": {
-      "english_name": "Kannada",
-      "name": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1"
-    },
-    "ko": {
-      "english_name": "Korean",
-      "name": "\ud55c\uad6d\uc5b4"
-    },
-    "koi": {
-      "english_name": "Komi-Permyak",
-      "name": "\u041f\u0435\u0440\u0435\u043c \u041a\u043e\u043c\u0438 (Perem Komi)"
-    },
-    "krc": {
-      "english_name": "Karachay-Balkar",
-      "name": "\u041a\u044a\u0430\u0440\u0430\u0447\u0430\u0439-\u041c\u0430\u043b\u043a\u044a\u0430\u0440 (Qarachay-Malqar)"
-    },
-    "ks": {
-      "english_name": "Kashmiri",
-      "name": "\u0915\u0936\u094d\u092e\u0940\u0930\u0940 / \u0643\u0634\u0645\u064a\u0631\u064a"
-    },
-    "ksh": {
-      "english_name": "Ripuarian",
-      "name": "Ripoarisch"
-    },
-    "ku": {
-      "english_name": "Kurdish",
-      "name": "Kurd\u00ee / \u0643\u0648\u0631\u062f\u06cc"
-    },
-    "kv": {
-      "english_name": "Komi",
-      "name": "\u041a\u043e\u043c\u0438"
-    },
-    "kw": {
-      "english_name": "Cornish",
-      "name": "Kernowek/Karnuack"
-    },
-    "ky": {
-      "english_name": "Kyrgyz",
-      "name": "\u041a\u044b\u0440\u0433\u044b\u0437\u0447\u0430"
-    },
-    "la": {
-      "english_name": "Latin",
-      "name": "Latina"
-    },
-    "lad": {
-      "english_name": "Ladino",
-      "name": "Dzhudezmo"
-    },
-    "lb": {
-      "english_name": "Luxembourgish",
-      "name": "L\u00ebtzebuergesch"
-    },
-    "lbe": {
-      "english_name": "Lak",
-      "name": "\u041b\u0430\u043a\u043a\u0443"
-    },
-    "lez": {
-      "english_name": "Lezgian",
-      "name": "\u041b\u0435\u0437\u0433\u0438 \u0447\u0406\u0430\u043b (Lezgi \u010d\u2019al)"
-    },
-    "lfn": {
-      "english_name": "Lingua Franca Nova",
-      "name": "Lingua franca nova"
-    },
-    "lg": {
-      "english_name": "Luganda",
-      "name": "Luganda"
-    },
-    "li": {
-      "english_name": "Limburgish",
-      "name": "Limburgs"
-    },
-    "lij": {
-      "english_name": "Ligurian",
-      "name": "L\u00ecgure"
-    },
-    "lld": {
-      "english_name": "Ladin",
-      "name": "Lingaz"
-    },
-    "lmo": {
-      "english_name": "Lombard",
-      "name": "Lumbaart"
-    },
-    "ln": {
-      "english_name": "Lingala",
-      "name": "Lingala"
-    },
-    "lo": {
-      "english_name": "Lao",
-      "name": "\u0ea5\u0eb2\u0ea7"
-    },
-    "lt": {
-      "english_name": "Lithuanian",
-      "name": "Lietuvi\u0173"
-    },
-    "ltg": {
-      "english_name": "Latgalian",
-      "name": "Latga\u013cu"
-    },
-    "lv": {
-      "english_name": "Latvian",
-      "name": "Latvie\u0161u"
-    },
-    "mad": {
-      "english_name": "Madurese",
-      "name": "Madhur\u00e2"
-    },
-    "mai": {
-      "english_name": "Maithili",
-      "name": "\u092e\u0948\u0925\u093f\u0932\u0940"
-    },
-    "map-bms": {
-      "english_name": "Banyumasan",
-      "name": "Basa Banyumasan"
-    },
-    "mdf": {
-      "english_name": "Moksha",
-      "name": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c (Mokshanj K\u00e4lj)"
-    },
-    "mg": {
-      "english_name": "Malagasy",
-      "name": "Malagasy"
-    },
-    "mhr": {
-      "english_name": "Meadow Mari",
-      "name": "\u041e\u043b\u044b\u043a \u041c\u0430\u0440\u0438\u0439 (Olyk Marij)"
-    },
-    "mi": {
-      "english_name": "Maori",
-      "name": "M\u0101ori"
-    },
-    "min": {
-      "english_name": "Minangkabau",
-      "name": "Minangkabau"
-    },
-    "mk": {
-      "english_name": "Macedonian",
-      "name": "\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438"
-    },
-    "ml": {
-      "english_name": "Malayalam",
-      "name": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02"
-    },
-    "mn": {
-      "english_name": "Mongolian",
-      "name": "\u041c\u043e\u043d\u0433\u043e\u043b"
-    },
-    "mni": {
-      "english_name": "Meitei",
-      "name": "\uabc3\uabe4\uabc7\uabe9\uabc2\uabe3\uabdf"
-    },
-    "mnw": {
-      "english_name": "Mon",
-      "name": "\u1019\u1014\u103a"
-    },
-    "mr": {
-      "english_name": "Marathi",
-      "name": "\u092e\u0930\u093e\u0920\u0940"
-    },
-    "mrj": {
-      "english_name": "Hill Mari",
-      "name": "\u041a\u044b\u0440\u044b\u043a \u041c\u0430\u0440\u044b (Kyryk Mary)"
-    },
-    "ms": {
-      "english_name": "Malay",
-      "name": "Bahasa Melayu"
-    },
-    "mt": {
-      "english_name": "Maltese",
-      "name": "Malti"
-    },
-    "mwl": {
-      "english_name": "Mirandese",
-      "name": "Mirand\u00e9s"
-    },
-    "my": {
-      "english_name": "Burmese",
-      "name": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c"
-    },
-    "myv": {
-      "english_name": "Erzya",
-      "name": "\u042d\u0440\u0437\u044f\u043d\u044c (Erzjanj Kelj)"
-    },
-    "mzn": {
-      "english_name": "Mazandarani",
-      "name": "\u0645\u064e\u0632\u0650\u0631\u0648\u0646\u064a"
-    },
-    "na": {
-      "english_name": "Nauruan",
-      "name": "dorerin Naoero"
-    },
-    "nah": {
-      "english_name": "Nahuatl",
-      "name": "N\u0101huatl"
-    },
-    "nap": {
-      "english_name": "Neapolitan",
-      "name": "Nnapulitano"
-    },
-    "nds": {
-      "english_name": "Low Saxon",
-      "name": "Plattd\u00fc\u00fctsch"
-    },
-    "nds-nl": {
-      "english_name": "Dutch Low Saxon",
-      "name": "Nedersaksisch"
-    },
-    "ne": {
-      "english_name": "Nepali",
-      "name": "\u0928\u0947\u092a\u093e\u0932\u0940"
-    },
-    "new": {
-      "english_name": "Newar",
-      "name": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e"
-    },
-    "nia": {
-      "english_name": "Nias",
-      "name": "Li Niha"
-    },
-    "nl": {
-      "english_name": "Dutch",
-      "name": "Nederlands"
-    },
-    "nn": {
-      "english_name": "Norwegian (Nynorsk)",
-      "name": "Nynorsk"
-    },
-    "no": {
-      "english_name": "Norwegian (Bokm\u00e5l)",
-      "name": "Norsk (Bokm\u00e5l)"
-    },
-    "nov": {
-      "english_name": "Novial",
-      "name": "Novial"
-    },
-    "nqo": {
-      "english_name": "N\u2019Ko",
-      "name": "\u07d2\u07de\u07cf"
-    },
-    "nrm": {
-      "english_name": "Norman",
-      "name": "Nouormand/Normaund"
-    },
-    "nso": {
-      "english_name": "Northern Sotho",
-      "name": "Sepedi"
-    },
-    "nv": {
-      "english_name": "Navajo",
-      "name": "Din\u00e9 bizaad"
-    },
-    "ny": {
-      "english_name": "Chichewa",
-      "name": "Chichewa"
-    },
-    "oc": {
-      "english_name": "Occitan",
-      "name": "Occitan"
-    },
-    "olo": {
-      "english_name": "Livvi-Karelian",
-      "name": "Karjalan"
-    },
-    "om": {
-      "english_name": "Oromo",
-      "name": "Oromoo"
-    },
-    "or": {
-      "english_name": "Oriya",
-      "name": "\u0b13\u0b21\u0b3c\u0b3f\u0b06"
-    },
-    "os": {
-      "english_name": "Ossetian",
-      "name": "\u0418\u0440\u043e\u043d\u0430\u0443"
-    },
-    "pa": {
-      "english_name": "Punjabi",
-      "name": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40"
-    },
-    "pag": {
-      "english_name": "Pangasinan",
-      "name": "Pangasinan"
-    },
-    "pam": {
-      "english_name": "Kapampangan",
-      "name": "Kapampangan"
-    },
-    "pap": {
-      "english_name": "Papiamentu",
-      "name": "Papiamentu"
-    },
-    "pcd": {
-      "english_name": "Picard",
-      "name": "Picard"
-    },
-    "pcm": {
-      "english_name": "Nigerian Pidgin",
-      "name": "Naij\u00e1"
-    },
-    "pdc": {
-      "english_name": "Pennsylvania German",
-      "name": "Deitsch"
-    },
-    "pfl": {
-      "english_name": "Palatinate German",
-      "name": "P\u00e4lzisch"
-    },
-    "pi": {
-      "english_name": "Pali",
-      "name": "\u092a\u093e\u0934\u093f"
-    },
-    "pih": {
-      "english_name": "Norfolk",
-      "name": "Norfuk"
-    },
-    "pl": {
-      "english_name": "Polish",
-      "name": "Polski"
-    },
-    "pms": {
-      "english_name": "Piedmontese",
-      "name": "Piemont\u00e8is"
-    },
-    "pnb": {
-      "english_name": "Western Punjabi",
-      "name": "\u0634\u0627\u06c1 \u0645\u06a9\u06be\u06cc \u067e\u0646\u062c\u0627\u0628\u06cc (Sh\u0101hmukh\u012b Pa\u00f1j\u0101b\u012b)"
-    },
-    "pnt": {
-      "english_name": "Pontic",
-      "name": "\u03a0\u03bf\u03bd\u03c4\u03b9\u03b1\u03ba\u03ac"
-    },
-    "ps": {
-      "english_name": "Pashto",
-      "name": "\u067e\u069a\u062a\u0648"
-    },
-    "pt": {
-      "english_name": "Portuguese",
-      "name": "Portugu\u00eas"
-    },
-    "pwn": {
-      "english_name": "Paiwan",
-      "name": "Paiwan"
-    },
-    "qu": {
-      "english_name": "Quechua",
-      "name": "Qichwa simi"
-    },
-    "rm": {
-      "english_name": "Romansh",
-      "name": "Rumantsch"
-    },
-    "rmy": {
-      "english_name": "Romani",
-      "name": "romani - \u0930\u094b\u092e\u093e\u0928\u0940"
-    },
-    "rn": {
-      "english_name": "Kirundi",
-      "name": "Ikirundi"
-    },
-    "ro": {
-      "english_name": "Romanian",
-      "name": "Rom\u00e2n\u0103"
-    },
-    "roa-rup": {
-      "english_name": "Aromanian",
-      "name": "Arm\u00e3neashce"
-    },
-    "roa-tara": {
-      "english_name": "Tarantino",
-      "name": "Tarand\u00edne"
-    },
-    "ru": {
-      "english_name": "Russian",
-      "name": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439"
-    },
-    "rue": {
-      "english_name": "Rusyn",
-      "name": "\u0420\u0443\u0441\u0438\u043d\u044c\u0441\u043a\u044b\u0439"
-    },
-    "rw": {
-      "english_name": "Kinyarwanda",
-      "name": "Ikinyarwanda"
-    },
-    "sa": {
-      "english_name": "Sanskrit",
-      "name": "\u0938\u0902\u0938\u094d\u0915\u0943\u0924\u092e\u094d"
-    },
-    "sah": {
-      "english_name": "Sakha",
-      "name": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430 (Saxa Tyla)"
-    },
-    "sat": {
-      "english_name": "Santali",
-      "name": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64"
-    },
-    "sc": {
-      "english_name": "Sardinian",
-      "name": "Sardu"
-    },
-    "scn": {
-      "english_name": "Sicilian",
-      "name": "Sicilianu"
-    },
-    "sco": {
-      "english_name": "Scots",
-      "name": "Scots"
-    },
-    "sd": {
-      "english_name": "Sindhi",
-      "name": "\u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc \u060c \u0938\u093f\u0928\u094d\u0927"
-    },
-    "se": {
-      "english_name": "Northern Sami",
-      "name": "S\u00e1megiella"
-    },
-    "sg": {
-      "english_name": "Sango",
-      "name": "S\u00e4ng\u00f6"
-    },
-    "sh": {
-      "english_name": "Serbo-Croatian",
-      "name": "Srpskohrvatski / \u0421\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438"
-    },
-    "shi": {
-      "english_name": "Tachelhit",
-      "name": "Tacl\u1e25it"
-    },
-    "shn": {
-      "english_name": "Shan",
-      "name": "\u101c\u102d\u1075\u103a\u1088\u1010\u1086\u1038"
-    },
-    "si": {
-      "english_name": "Sinhalese",
-      "name": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd"
-    },
-    "simple": {
-      "english_name": "Simple English",
-      "name": "Simple English"
-    },
-    "sk": {
-      "english_name": "Slovak",
-      "name": "Sloven\u010dina"
-    },
-    "skr": {
-      "english_name": "Saraiki",
-      "name": "\u0633\u0631\u0627\u0626\u06cc\u06a9\u06cc"
-    },
-    "sl": {
-      "english_name": "Slovenian",
-      "name": "Sloven\u0161\u010dina"
-    },
-    "sm": {
-      "english_name": "Samoan",
-      "name": "Gagana Samoa"
-    },
-    "smn": {
-      "english_name": "Inari Sami",
-      "name": "Anar\u00e2\u0161kiel\u00e2"
-    },
-    "sn": {
-      "english_name": "Shona",
-      "name": "chiShona"
-    },
-    "so": {
-      "english_name": "Somali",
-      "name": "Soomaali"
-    },
-    "sq": {
-      "english_name": "Albanian",
-      "name": "Shqip"
-    },
-    "sr": {
-      "english_name": "Serbian",
-      "name": "\u0421\u0440\u043f\u0441\u043a\u0438 / Srpski"
-    },
-    "srn": {
-      "english_name": "Sranan",
-      "name": "Sranantongo"
-    },
-    "ss": {
-      "english_name": "Swati",
-      "name": "SiSwati"
-    },
-    "st": {
-      "english_name": "Sesotho",
-      "name": "Sesotho"
-    },
-    "stq": {
-      "english_name": "Saterland Frisian",
-      "name": "Seeltersk"
-    },
-    "su": {
-      "english_name": "Sundanese",
-      "name": "Basa Sunda"
-    },
-    "sv": {
-      "english_name": "Swedish",
-      "name": "Svenska"
-    },
-    "sw": {
-      "english_name": "Swahili",
-      "name": "Kiswahili"
-    },
-    "szl": {
-      "english_name": "Silesian",
-      "name": "\u015al\u016fnski"
-    },
-    "szy": {
-      "english_name": "Sakizaya",
-      "name": "Sakizaya"
-    },
-    "ta": {
-      "english_name": "Tamil",
-      "name": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd"
-    },
-    "tay": {
-      "english_name": "Atayal",
-      "name": "Tayal"
-    },
-    "tcy": {
-      "english_name": "Tulu",
-      "name": "\u0ca4\u0cc1\u0cb3\u0cc1"
-    },
-    "te": {
-      "english_name": "Telugu",
-      "name": "\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41"
-    },
-    "tet": {
-      "english_name": "Tetum",
-      "name": "Tetun"
-    },
-    "tg": {
-      "english_name": "Tajik",
-      "name": "\u0422\u043e\u04b7\u0438\u043a\u04e3"
-    },
-    "th": {
-      "english_name": "Thai",
-      "name": "\u0e44\u0e17\u0e22"
-    },
-    "ti": {
-      "english_name": "Tigrinya",
-      "name": "\u1275\u130d\u122d\u129b"
-    },
-    "tk": {
-      "english_name": "Turkmen",
-      "name": "T\u00fcrkmen"
-    },
-    "tl": {
-      "english_name": "Tagalog",
-      "name": "Tagalog"
-    },
-    "tn": {
-      "english_name": "Tswana",
-      "name": "Setswana"
-    },
-    "to": {
-      "english_name": "Tongan",
-      "name": "faka Tonga"
-    },
-    "tpi": {
-      "english_name": "Tok Pisin",
-      "name": "Tok Pisin"
-    },
-    "tr": {
-      "english_name": "Turkish",
-      "name": "T\u00fcrk\u00e7e"
-    },
-    "trv": {
-      "english_name": "Seediq",
-      "name": "Taroko"
-    },
-    "ts": {
-      "english_name": "Tsonga",
-      "name": "Xitsonga"
-    },
-    "tt": {
-      "english_name": "Tatar",
-      "name": "Tatar\u00e7a / \u0422\u0430\u0442\u0430\u0440\u0447\u0430"
-    },
-    "tum": {
-      "english_name": "Tumbuka",
-      "name": "chiTumbuka"
-    },
-    "tw": {
-      "english_name": "Twi",
-      "name": "Twi"
-    },
-    "ty": {
-      "english_name": "Tahitian",
-      "name": "Reo M\u0101`ohi"
-    },
-    "tyv": {
-      "english_name": "Tuvan",
-      "name": "\u0422\u044b\u0432\u0430"
-    },
-    "udm": {
-      "english_name": "Udmurt",
-      "name": "\u0423\u0434\u043c\u0443\u0440\u0442 \u043a\u044b\u043b"
-    },
-    "ug": {
-      "english_name": "Uyghur",
-      "name": "\u0626\u06c7\u064a\u063a\u06c7\u0631 \u062a\u0649\u0644\u0649"
-    },
-    "uk": {
-      "english_name": "Ukrainian",
-      "name": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"
-    },
-    "ur": {
-      "english_name": "Urdu",
-      "name": "\u0627\u0631\u062f\u0648"
-    },
-    "uz": {
-      "english_name": "Uzbek",
-      "name": "O\u2018zbek"
-    },
-    "ve": {
-      "english_name": "Venda",
-      "name": "Tshivenda"
-    },
-    "vec": {
-      "english_name": "Venetian",
-      "name": "V\u00e8neto"
-    },
-    "vep": {
-      "english_name": "Vepsian",
-      "name": "Veps\u00e4n"
-    },
-    "vi": {
-      "english_name": "Vietnamese",
-      "name": "Ti\u1ebfng Vi\u1ec7t"
-    },
-    "vls": {
-      "english_name": "West Flemish",
-      "name": "West-Vlams"
-    },
-    "vo": {
-      "english_name": "Volap\u00fck",
-      "name": "Volap\u00fck"
-    },
-    "wa": {
-      "english_name": "Walloon",
-      "name": "Walon"
-    },
-    "war": {
-      "english_name": "Waray-Waray",
-      "name": "Winaray"
-    },
-    "wo": {
-      "english_name": "Wolof",
-      "name": "Wolof"
-    },
-    "wuu": {
-      "english_name": "Wu",
-      "name": "\u5434\u8bed"
-    },
-    "xal": {
-      "english_name": "Kalmyk",
-      "name": "\u0425\u0430\u043b\u044c\u043c\u0433"
-    },
-    "xh": {
-      "english_name": "Xhosa",
-      "name": "isiXhosa"
-    },
-    "xmf": {
-      "english_name": "Mingrelian",
-      "name": "\u10db\u10d0\u10e0\u10d2\u10d0\u10da\u10e3\u10e0\u10d8 (Margaluri)"
-    },
-    "yi": {
-      "english_name": "Yiddish",
-      "name": "\u05d9\u05d9\u05b4\u05d3\u05d9\u05e9"
-    },
-    "yo": {
-      "english_name": "Yoruba",
-      "name": "Yor\u00f9b\u00e1"
-    },
-    "za": {
-      "english_name": "Zhuang",
-      "name": "Cuengh"
-    },
-    "zea": {
-      "english_name": "Zeelandic",
-      "name": "Ze\u00eauws"
-    },
-    "zh": {
-      "english_name": "Chinese",
-      "name": "\u4e2d\u6587"
-    },
-    "zh-classical": {
-      "english_name": "Classical Chinese",
-      "name": "\u53e4\u6587 / \u6587\u8a00\u6587"
-    },
-    "zh-min-nan": {
-      "english_name": "Min Nan",
-      "name": "B\u00e2n-l\u00e2m-g\u00fa"
-    },
-    "zh-yue": {
-      "english_name": "Cantonese",
-      "name": "\u7cb5\u8a9e"
-    },
-    "zu": {
-      "english_name": "Zulu",
-      "name": "isiZulu"
-    }
-  },
-  "yahoo": [
-    "ar",
-    "bg",
-    "cs",
-    "da",
-    "de",
-    "el",
-    "en",
-    "es",
-    "et",
-    "fi",
-    "fr",
-    "he",
-    "hr",
-    "hu",
-    "it",
-    "ja",
-    "ko",
-    "lt",
-    "lv",
-    "nl",
-    "no",
-    "pl",
-    "pt",
-    "ro",
-    "ru",
-    "sk",
-    "sl",
-    "sv",
-    "th",
-    "tr",
-    "zh_chs",
-    "zh_cht"
-  ]
-}

+ 136 - 0
searx/enginelib/__init__.py

@@ -0,0 +1,136 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Engine related implementations
+
+.. note::
+
+   The long term goal is to modularize all relevant implementations to the
+   engines here in this Python package.  In addition to improved modularization,
+   this will also be necessary in part because the probability of circular
+   imports will increase due to the increased typification of implementations in
+   the future.
+
+   ToDo:
+
+   - move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`.
+"""
+
+
+from __future__ import annotations
+from typing import Union, Dict, List, Callable, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from searx.enginelib import traits
+
+
+class Engine:  # pylint: disable=too-few-public-methods
+    """Class of engine instances build from YAML settings.
+
+    Further documentation see :ref:`general engine configuration`.
+
+    .. hint::
+
+       This class is currently never initialized and only used for type hinting.
+    """
+
+    # Common options in the engine module
+
+    engine_type: str
+    """Type of the engine (:origin:`searx/search/processors`)"""
+
+    paging: bool
+    """Engine supports multiple pages."""
+
+    time_range_support: bool
+    """Engine supports search time range."""
+
+    safesearch: bool
+    """Engine supports SafeSearch"""
+
+    language_support: bool
+    """Engine supports languages (locales) search."""
+
+    language: str
+    """For an engine, when there is ``language: ...`` in the YAML settings the engine
+    does support only this one language:
+
+    .. code:: yaml
+
+      - name: google french
+        engine: google
+        language: fr
+    """
+
+    region: str
+    """For an engine, when there is ``region: ...`` in the YAML settings the engine
+    does support only this one region::
+
+    .. code:: yaml
+
+      - name: google belgium
+        engine: google
+        region: fr-BE
+    """
+
+    fetch_traits: Callable
+    """Function to to fetch engine's traits from origin."""
+
+    traits: traits.EngineTraits
+    """Traits of the engine."""
+
+    # settings.yml
+
+    categories: List[str]
+    """Tabs, in which the engine is working."""
+
+    name: str
+    """Name that will be used across SearXNG to define this engine.  In settings, on
+    the result page .."""
+
+    engine: str
+    """Name of the python file used to handle requests and responses to and from
+    this search engine (file name from :origin:`searx/engines` without
+    ``.py``)."""
+
+    enable_http: bool
+    """Enable HTTP (by default only HTTPS is enabled)."""
+
+    shortcut: str
+    """Code used to execute bang requests (``!foo``)"""
+
+    timeout: float
+    """Specific timeout for search-engine."""
+
+    display_error_messages: bool
+    """Display error messages on the web UI."""
+
+    proxies: dict
+    """Set proxies for a specific engine (YAML):
+
+    .. code:: yaml
+
+       proxies :
+         http:  socks5://proxy:port
+         https: socks5://proxy:port
+    """
+
+    disabled: bool
+    """To disable by default the engine, but not deleting it.  It will allow the
+    user to manually activate it in the settings."""
+
+    inactive: bool
+    """Remove the engine from the settings (*disabled & removed*)."""
+
+    about: dict
+    """Additional fileds describing the engine.
+
+    .. code:: yaml
+
+       about:
+          website: https://example.com
+          wikidata_id: Q306656
+          official_api_documentation: https://example.com/api-doc
+          use_official_api: true
+          require_api_key: true
+          results: HTML
+    """

+ 250 - 0
searx/enginelib/traits.py

@@ -0,0 +1,250 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Engine's traits are fetched from the origin engines and stored in a JSON file
+in the *data folder*.  Most often traits are languages and region codes and
+their mapping from SearXNG's representation to the representation in the origin
+search engine.  For new traits new properties can be added to the class
+:py:class:`EngineTraits`.
+
+To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
+used.
+"""
+
+from __future__ import annotations
+import json
+import dataclasses
+from typing import Dict, Union, Callable, Optional, TYPE_CHECKING
+from typing_extensions import Literal, Self
+
+from searx import locales
+from searx.data import data_dir, ENGINE_TRAITS
+
+if TYPE_CHECKING:
+    from . import Engine
+
+
+class EngineTraitsEncoder(json.JSONEncoder):
+    """Encodes :class:`EngineTraits` to a serializable object, see
+    :class:`json.JSONEncoder`."""
+
+    def default(self, o):
+        """Return dictionary of a :class:`EngineTraits` object."""
+        if isinstance(o, EngineTraits):
+            return o.__dict__
+        return super().default(o)
+
+
+@dataclasses.dataclass
+class EngineTraits:
+    """The class is intended to be instantiated for each engine."""
+
+    regions: Dict[str, str] = dataclasses.field(default_factory=dict)
+    """Maps SearXNG's internal representation of a region to the one of the engine.
+
+    SearXNG's internal representation can be parsed by babel and the value is
+    send to the engine:
+
+    .. code:: python
+
+       regions ={
+           'fr-BE' : <engine's region name>,
+       }
+
+       for key, egnine_region regions.items():
+          searxng_region = babel.Locale.parse(key, sep='-')
+          ...
+    """
+
+    languages: Dict[str, str] = dataclasses.field(default_factory=dict)
+    """Maps SearXNG's internal representation of a language to the one of the engine.
+
+    SearXNG's internal representation can be parsed by babel and the value is
+    send to the engine:
+
+    .. code:: python
+
+       languages = {
+           'ca' : <engine's language name>,
+       }
+
+       for key, egnine_lang in languages.items():
+          searxng_lang = babel.Locale.parse(key)
+          ...
+    """
+
+    all_locale: Optional[str] = None
+    """To which locale value SearXNG's ``all`` language is mapped (shown a "Default
+    language").
+    """
+
+    data_type: Literal['traits_v1'] = 'traits_v1'
+    """Data type, default is 'traits_v1'.
+    """
+
+    custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
+    """A place to store engine's custom traits, not related to the SearXNG core
+
+    """
+
+    def get_language(self, searxng_locale: str, default=None):
+        """Return engine's language string that *best fits* to SearXNG's locale.
+
+        :param searxng_locale: SearXNG's internal representation of locale
+          selected by the user.
+
+        :param default: engine's default language
+
+        The *best fits* rules are implemented in
+        :py:obj:`locales.get_engine_locale`.  Except for the special value ``all``
+        which is determined from :py:obj`EngineTraits.all_language`.
+        """
+        if searxng_locale == 'all' and self.all_locale is not None:
+            return self.all_locale
+        return locales.get_engine_locale(searxng_locale, self.languages, default=default)
+
+    def get_region(self, searxng_locale: str, default=None):
+        """Return engine's region string that best fits to SearXNG's locale.
+
+        :param searxng_locale: SearXNG's internal representation of locale
+          selected by the user.
+
+        :param default: engine's default region
+
+        The *best fits* rules are implemented in
+        :py:obj:`locales.get_engine_locale`.  Except for the special value ``all``
+        which is determined from :py:obj`EngineTraits.all_language`.
+        """
+        if searxng_locale == 'all' and self.all_locale is not None:
+            return self.all_locale
+        return locales.get_engine_locale(searxng_locale, self.regions, default=default)
+
+    def is_locale_supported(self, searxng_locale: str) -> bool:
+        """A *locale* (SearXNG's internal representation) is considered to be supported
+        by the engine if the *region* or the *language* is supported by the
+        engine.  For verification the functions :py:func:`self.get_region` and
+        :py:func:`self.get_region` are used.
+        """
+        if self.data_type == 'traits_v1':
+            return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
+
+        raise TypeError('engine traits of type %s is unknown' % self.data_type)
+
+    def copy(self):
+        """Create a copy of the dataclass object."""
+        return EngineTraits(**dataclasses.asdict(self))
+
+    @classmethod
+    def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
+        """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
+        and set properties from the origin engine in the object ``engine_traits``.  If
+        function does not exists, ``None`` is returned.
+        """
+
+        fetch_traits = getattr(engine, 'fetch_traits', None)
+        engine_traits = None
+
+        if fetch_traits:
+            engine_traits = cls()
+            fetch_traits(engine_traits)
+        return engine_traits
+
+    def set_traits(self, engine: Engine):
+        """Set traits from self object in a :py:obj:`.Engine` namespace.
+
+        :param engine: engine instance build by :py:func:`searx.engines.load_engine`
+        """
+
+        if self.data_type == 'traits_v1':
+            self._set_traits_v1(engine)
+        else:
+            raise TypeError('engine traits of type %s is unknown' % self.data_type)
+
+    def _set_traits_v1(self, engine: Engine):
+        # For an engine, when there is `language: ...` in the YAML settings the engine
+        # does support only this one language (region)::
+        #
+        #   - name: google italian
+        #     engine: google
+        #     language: it
+        #     region: it-IT
+
+        traits = self.copy()
+
+        _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
+
+        languages = traits.languages
+        if hasattr(engine, 'language'):
+            if engine.language not in languages:
+                raise ValueError(_msg % (engine.name, 'language', engine.language))
+            traits.languages = {engine.language: languages[engine.language]}
+
+        regions = traits.regions
+        if hasattr(engine, 'region'):
+            if engine.region not in regions:
+                raise ValueError(_msg % (engine.name, 'region', engine.region))
+            traits.regions = {engine.region: regions[engine.region]}
+
+        engine.language_support = bool(traits.languages or traits.regions)
+
+        # set the copied & modified traits in engine's namespace
+        engine.traits = traits
+
+
+class EngineTraitsMap(Dict[str, EngineTraits]):
+    """A python dictionary to map :class:`EngineTraits` by engine name."""
+
+    ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
+    """File with persistence of the :py:obj:`EngineTraitsMap`."""
+
+    def save_data(self):
+        """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
+        with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
+            json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
+
+    @classmethod
+    def from_data(cls) -> Self:
+        """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
+        obj = cls()
+        for k, v in ENGINE_TRAITS.items():
+            obj[k] = EngineTraits(**v)
+        return obj
+
+    @classmethod
+    def fetch_traits(cls, log: Callable) -> Self:
+        from searx import engines  # pylint: disable=cyclic-import, import-outside-toplevel
+
+        names = list(engines.engines)
+        names.sort()
+        obj = cls()
+
+        for engine_name in names:
+            engine = engines.engines[engine_name]
+
+            traits = EngineTraits.fetch_traits(engine)
+            if traits is not None:
+                log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
+                log("%-20s: SearXNG regions   --> %s" % (engine_name, len(traits.regions)))
+                obj[engine_name] = traits
+
+        return obj
+
+    def set_traits(self, engine: Engine):
+        """Set traits in a :py:obj:`Engine` namespace.
+
+        :param engine: engine instance build by :py:func:`searx.engines.load_engine`
+        """
+
+        engine_traits = EngineTraits(data_type='traits_v1')
+        if engine.name in self.keys():
+            engine_traits = self[engine.name]
+
+        elif engine.engine in self.keys():
+            # The key of the dictionary traits_map is the *engine name*
+            # configured in settings.xml.  When multiple engines are configured
+            # in settings.yml to use the same origin engine (python module)
+            # these additional engines can use the languages from the origin
+            # engine.  For this use the configured ``engine: ...`` from
+            # settings.yml
+            engine_traits = self[engine.engine]
+
+        engine_traits.set_traits(engine)

+ 15 - 85
searx/engines/__init__.py

@@ -11,24 +11,22 @@ usage::
 
 
 """
 """
 
 
+from __future__ import annotations
+
 import sys
 import sys
 import copy
 import copy
-from typing import Dict, List, Optional
-
 from os.path import realpath, dirname
 from os.path import realpath, dirname
-from babel.localedata import locale_identifiers
+
+from typing import TYPE_CHECKING, Dict, Optional
+
 from searx import logger, settings
 from searx import logger, settings
-from searx.data import ENGINES_LANGUAGES
-from searx.network import get
-from searx.utils import load_module, match_language, gen_useragent
+from searx.utils import load_module
 
 
+if TYPE_CHECKING:
+    from searx.enginelib import Engine
 
 
 logger = logger.getChild('engines')
 logger = logger.getChild('engines')
 ENGINE_DIR = dirname(realpath(__file__))
 ENGINE_DIR = dirname(realpath(__file__))
-BABEL_LANGS = [
-    lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
-    for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
-]
 ENGINE_DEFAULT_ARGS = {
 ENGINE_DEFAULT_ARGS = {
     "engine_type": "online",
     "engine_type": "online",
     "inactive": False,
     "inactive": False,
@@ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = {
     "timeout": settings["outgoing"]["request_timeout"],
     "timeout": settings["outgoing"]["request_timeout"],
     "shortcut": "-",
     "shortcut": "-",
     "categories": ["general"],
     "categories": ["general"],
-    "supported_languages": [],
-    "language_aliases": {},
     "paging": False,
     "paging": False,
     "safesearch": False,
     "safesearch": False,
     "time_range_support": False,
     "time_range_support": False,
@@ -52,24 +48,6 @@ ENGINE_DEFAULT_ARGS = {
 OTHER_CATEGORY = 'other'
 OTHER_CATEGORY = 'other'
 
 
 
 
-class Engine:  # pylint: disable=too-few-public-methods
-    """This class is currently never initialized and only used for type hinting."""
-
-    name: str
-    engine: str
-    shortcut: str
-    categories: List[str]
-    supported_languages: List[str]
-    about: dict
-    inactive: bool
-    disabled: bool
-    language_support: bool
-    paging: bool
-    safesearch: bool
-    time_range_support: bool
-    timeout: float
-
-
 # Defaults for the namespace of an engine module, see :py:func:`load_engine`
 # Defaults for the namespace of an engine module, see :py:func:`load_engine`
 
 
 categories = {'general': []}
 categories = {'general': []}
@@ -136,9 +114,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
         return None
         return None
 
 
     update_engine_attributes(engine, engine_data)
     update_engine_attributes(engine, engine_data)
-    set_language_attributes(engine)
     update_attributes_for_tor(engine)
     update_attributes_for_tor(engine)
 
 
+    # avoid cyclic imports
+    # pylint: disable=import-outside-toplevel
+    from searx.enginelib.traits import EngineTraitsMap
+
+    trait_map = EngineTraitsMap.from_data()
+    trait_map.set_traits(engine)
+
     if not is_engine_active(engine):
     if not is_engine_active(engine):
         return None
         return None
 
 
@@ -190,60 +174,6 @@ def update_engine_attributes(engine: Engine, engine_data):
             setattr(engine, arg_name, copy.deepcopy(arg_value))
             setattr(engine, arg_name, copy.deepcopy(arg_value))
 
 
 
 
-def set_language_attributes(engine: Engine):
-    # assign supported languages from json file
-    if engine.name in ENGINES_LANGUAGES:
-        engine.supported_languages = ENGINES_LANGUAGES[engine.name]
-
-    elif engine.engine in ENGINES_LANGUAGES:
-        # The key of the dictionary ENGINES_LANGUAGES is the *engine name*
-        # configured in settings.xml.  When multiple engines are configured in
-        # settings.yml to use the same origin engine (python module) these
-        # additional engines can use the languages from the origin engine.
-        # For this use the configured ``engine: ...`` from settings.yml
-        engine.supported_languages = ENGINES_LANGUAGES[engine.engine]
-
-    if hasattr(engine, 'language'):
-        # For an engine, when there is `language: ...` in the YAML settings, the
-        # engine supports only one language, in this case
-        # engine.supported_languages should contains this value defined in
-        # settings.yml
-        if engine.language not in engine.supported_languages:
-            raise ValueError(
-                "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language)
-            )
-
-        if isinstance(engine.supported_languages, dict):
-            engine.supported_languages = {engine.language: engine.supported_languages[engine.language]}
-        else:
-            engine.supported_languages = [engine.language]
-
-    # find custom aliases for non standard language codes
-    for engine_lang in engine.supported_languages:
-        iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
-        if (
-            iso_lang
-            and iso_lang != engine_lang
-            and not engine_lang.startswith(iso_lang)
-            and iso_lang not in engine.supported_languages
-        ):
-            engine.language_aliases[iso_lang] = engine_lang
-
-    # language_support
-    engine.language_support = len(engine.supported_languages) > 0
-
-    # assign language fetching method if auxiliary method exists
-    if hasattr(engine, '_fetch_supported_languages'):
-        headers = {
-            'User-Agent': gen_useragent(),
-            'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
-        }
-        engine.fetch_supported_languages = (
-            # pylint: disable=protected-access
-            lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
-        )
-
-
 def update_attributes_for_tor(engine: Engine) -> bool:
 def update_attributes_for_tor(engine: Engine) -> bool:
     if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
     if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
         engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
         engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')

+ 115 - 110
searx/engines/archlinux.py

@@ -1,15 +1,32 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
 """
 """
- Arch Linux Wiki
+Arch Linux Wiki
+~~~~~~~~~~~~~~~
+
+This implementation does not use a official API: Mediawiki provides API, but
+Arch Wiki blocks access to it.
 
 
- API: Mediawiki provides API, but Arch Wiki blocks access to it
 """
 """
 
 
-from urllib.parse import urlencode, urljoin
-from lxml import html
+from typing import TYPE_CHECKING
+from urllib.parse import urlencode, urljoin, urlparse
+import lxml
+import babel
+
+from searx import network
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
+from searx.enginelib.traits import EngineTraits
+from searx.locales import language_tag
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 
 
-# about
 about = {
 about = {
     "website": 'https://wiki.archlinux.org/',
     "website": 'https://wiki.archlinux.org/',
     "wikidata_id": 'Q101445877',
     "wikidata_id": 'Q101445877',
@@ -22,125 +39,113 @@ about = {
 # engine dependent config
 # engine dependent config
 categories = ['it', 'software wikis']
 categories = ['it', 'software wikis']
 paging = True
 paging = True
-base_url = 'https://wiki.archlinux.org'
-
-# xpath queries
-xpath_results = '//ul[@class="mw-search-results"]/li'
-xpath_link = './/div[@class="mw-search-result-heading"]/a'
-
-
-# cut 'en' from 'en-US', 'de' from 'de-CH', and so on
-def locale_to_lang_code(locale):
-    if locale.find('-') >= 0:
-        locale = locale.split('-')[0]
-    return locale
-
-
-# wikis for some languages were moved off from the main site, we need to make
-# requests to correct URLs to be able to get results in those languages
-lang_urls = {
-    # fmt: off
-    'all': {
-        'base': 'https://wiki.archlinux.org',
-        'search': '/index.php?title=Special:Search&offset={offset}&{query}'
-    },
-    'de': {
-        'base': 'https://wiki.archlinux.de',
-        'search': '/index.php?title=Spezial:Suche&offset={offset}&{query}'
-    },
-    'fr': {
-        'base': 'https://wiki.archlinux.fr',
-        'search': '/index.php?title=Spécial:Recherche&offset={offset}&{query}'
-    },
-    'ja': {
-        'base': 'https://wiki.archlinuxjp.org',
-        'search': '/index.php?title=特別:検索&offset={offset}&{query}'
-    },
-    'ro': {
-        'base': 'http://wiki.archlinux.ro',
-        'search': '/index.php?title=Special:Căutare&offset={offset}&{query}'
-    },
-    'tr': {
-        'base': 'http://archtr.org/wiki',
-        'search': '/index.php?title=Özel:Ara&offset={offset}&{query}'
-    }
-    # fmt: on
-}
-
-
-# get base & search URLs for selected language
-def get_lang_urls(language):
-    if language in lang_urls:
-        return lang_urls[language]
-    return lang_urls['all']
-
-
-# Language names to build search requests for
-# those languages which are hosted on the main site.
-main_langs = {
-    'ar': 'العربية',
-    'bg': 'Български',
-    'cs': 'Česky',
-    'da': 'Dansk',
-    'el': 'Ελληνικά',
-    'es': 'Español',
-    'he': 'עברית',
-    'hr': 'Hrvatski',
-    'hu': 'Magyar',
-    'it': 'Italiano',
-    'ko': '한국어',
-    'lt': 'Lietuviškai',
-    'nl': 'Nederlands',
-    'pl': 'Polski',
-    'pt': 'Português',
-    'ru': 'Русский',
-    'sl': 'Slovenský',
-    'th': 'ไทย',
-    'uk': 'Українська',
-    'zh': '简体中文',
-}
-supported_languages = dict(lang_urls, **main_langs)
+main_wiki = 'wiki.archlinux.org'
 
 
 
 
-# do search-request
 def request(query, params):
 def request(query, params):
-    # translate the locale (e.g. 'en-US') to language code ('en')
-    language = locale_to_lang_code(params['language'])
-
-    # if our language is hosted on the main site, we need to add its name
-    # to the query in order to narrow the results to that language
-    if language in main_langs:
-        query += ' (' + main_langs[language] + ')'
 
 
-    # prepare the request parameters
-    query = urlencode({'search': query})
+    sxng_lang = params['searxng_locale'].split('-')[0]
+    netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
+    title = traits.custom['title'].get(sxng_lang, 'Special:Search')
+    base_url = 'https://' + netloc + '/index.php?'
     offset = (params['pageno'] - 1) * 20
     offset = (params['pageno'] - 1) * 20
 
 
-    # get request URLs for our language of choice
-    urls = get_lang_urls(language)
-    search_url = urls['base'] + urls['search']
-
-    params['url'] = search_url.format(query=query, offset=offset)
+    if netloc == main_wiki:
+        eng_lang: str = traits.get_language(sxng_lang, 'English')
+        query += ' (' + eng_lang + ')'
+    elif netloc == 'wiki.archlinuxcn.org':
+        base_url = 'https://' + netloc + '/wzh/index.php?'
+
+    args = {
+        'search': query,
+        'title': title,
+        'limit': 20,
+        'offset': offset,
+        'profile': 'default',
+    }
 
 
+    params['url'] = base_url + urlencode(args)
     return params
     return params
 
 
 
 
-# get response from search-request
 def response(resp):
 def response(resp):
-    # get the base URL for the language in which request was made
-    language = locale_to_lang_code(resp.search_params['language'])
-    base_url = get_lang_urls(language)['base']
 
 
     results = []
     results = []
+    dom = lxml.html.fromstring(resp.text)
 
 
-    dom = html.fromstring(resp.text)
+    # get the base URL for the language in which request was made
+    sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
+    netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
+    base_url = 'https://' + netloc + '/index.php?'
+
+    for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
+        link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0)
+        content = extract_text(result.xpath('.//div[@class="searchresult"]'))
+        results.append(
+            {
+                'url': urljoin(base_url, link.get('href')),
+                'title': extract_text(link),
+                'content': content,
+            }
+        )
 
 
-    # parse results
-    for result in eval_xpath_list(dom, xpath_results):
-        link = eval_xpath_getindex(result, xpath_link, 0)
-        href = urljoin(base_url, link.attrib.get('href'))
-        title = extract_text(link)
+    return results
 
 
-        results.append({'url': href, 'title': title})
 
 
-    return results
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from Archlinix-Wiki.  The location of the Wiki address of a
+    language is mapped in a :py:obj:`custom field
+    <searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``).  Depending
+    on the location, the ``title`` argument in the request is translated.
+
+    .. code:: python
+
+       "custom": {
+         "wiki_netloc": {
+           "de": "wiki.archlinux.de",
+            # ...
+           "zh": "wiki.archlinuxcn.org"
+         }
+         "title": {
+           "de": "Spezial:Suche",
+            # ...
+           "zh": "Special:\u641c\u7d22"
+         },
+       },
+
+    """
+
+    engine_traits.custom['wiki_netloc'] = {}
+    engine_traits.custom['title'] = {}
+
+    title_map = {
+        'de': 'Spezial:Suche',
+        'fa': 'ویژه:جستجو',
+        'ja': '特別:検索',
+        'zh': 'Special:搜索',
+    }
+
+    resp = network.get('https://wiki.archlinux.org/')
+    if not resp.ok:
+        print("ERROR: response from wiki.archlinix.org is not OK.")
+
+    dom = lxml.html.fromstring(resp.text)
+    for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
+
+        sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
+        # zh_Hans --> zh
+        sxng_tag = sxng_tag.split('_')[0]
+
+        netloc = urlparse(a.get('href')).netloc
+        if netloc != 'wiki.archlinux.org':
+            title = title_map.get(sxng_tag)
+            if not title:
+                print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
+                continue
+            engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
+            engine_traits.custom['title'][sxng_tag] = title
+
+        eng_tag = extract_text(eval_xpath_list(a, ".//span"))
+        engine_traits.languages[sxng_tag] = eng_tag
+
+    engine_traits.languages['en'] = 'English'

+ 204 - 54
searx/engines/bing.py

@@ -1,16 +1,53 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Bing (Web)
+"""This is the implementation of the Bing-WEB engine. Some of this
+implementations are shared by other engines:
+
+- :ref:`bing images engine`
+- :ref:`bing news engine`
+- :ref:`bing videos engine`
+
+On the `preference page`_ Bing offers a lot of languages an regions (see section
+'Search results languages' and 'Country/region').  However, the abundant choice
+does not correspond to reality, where Bing has a full-text indexer only for a
+limited number of languages.  By example: you can select a language like Māori
+but you never get a result in this language.
+
+What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
+to be completely correct either (if you take a closer look you will find some
+inaccuracies there too):
+
+- :py:obj:`searx.engines.bing.bing_traits_url`
+- :py:obj:`searx.engines.bing_videos.bing_traits_url`
+- :py:obj:`searx.engines.bing_images.bing_traits_url`
+- :py:obj:`searx.engines.bing_news.bing_traits_url`
+
+.. _preference page: https://www.bing.com/account/general
+.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
 
 
-- https://github.com/searx/searx/issues/2019#issuecomment-648227442
 """
 """
-# pylint: disable=too-many-branches
+# pylint: disable=too-many-branches, invalid-name
 
 
+from typing import TYPE_CHECKING
+import datetime
 import re
 import re
-from urllib.parse import urlencode, urlparse, parse_qs
+import uuid
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
-from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language, eval_xpath_getindex
-from searx.network import multi_requests, Request
+import babel
+import babel.languages
+
+from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
+from searx import network
+from searx.locales import language_tag, region_tag
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 about = {
 about = {
     "website": 'https://www.bing.com',
     "website": 'https://www.bing.com',
@@ -21,56 +58,124 @@ about = {
     "results": 'HTML',
     "results": 'HTML',
 }
 }
 
 
+send_accept_language_header = True
+"""Bing tries to guess user's language and territory from the HTTP
+Accept-Language.  Optional the user can select a search-language (can be
+different to the UI language) and a region (market code)."""
+
 # engine dependent config
 # engine dependent config
 categories = ['general', 'web']
 categories = ['general', 'web']
 paging = True
 paging = True
-time_range_support = False
-safesearch = False
-send_accept_language_header = True
-supported_languages_url = 'https://www.bing.com/account/general'
-language_aliases = {}
-
-# search-url
-base_url = 'https://www.bing.com/'
+time_range_support = True
+safesearch = True
+safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}  # cookie: ADLT=STRICT
 
 
-# initial query:     https://www.bing.com/search?q=foo&search=&form=QBLH
-inital_query = 'search?{query}&search=&form=QBLH'
+base_url = 'https://www.bing.com/search'
+"""Bing (Web) search URL"""
 
 
-# following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE
-page_query = 'search?{query}&search=&first={offset}&FORM=PERE'
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
+"""Bing (Web) search API description"""
 
 
 
 
 def _get_offset_from_pageno(pageno):
 def _get_offset_from_pageno(pageno):
     return (pageno - 1) * 10 + 1
     return (pageno - 1) * 10 + 1
 
 
 
 
-def request(query, params):
+def set_bing_cookies(params, engine_language, engine_region, SID):
+
+    # set cookies
+    # -----------
+
+    params['cookies']['_EDGE_V'] = '1'
+
+    # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
+    _EDGE_S = [
+        'F=1',
+        'SID=%s' % SID,
+        'mkt=%s' % engine_region.lower(),
+        'ui=%s' % engine_language.lower(),
+    ]
+    params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
+    logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
+
+    # "_EDGE_CD": "m=zh-tw",
+
+    _EDGE_CD = [  # pylint: disable=invalid-name
+        'm=%s' % engine_region.lower(),  # search region: zh-cn
+        'u=%s' % engine_language.lower(),  # UI: en-us
+    ]
+
+    params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
+    logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
 
 
-    offset = _get_offset_from_pageno(params.get('pageno', 1))
+    SRCHHPGUSR = [  # pylint: disable=invalid-name
+        'SRCHLANG=%s' % engine_language,
+        # Trying to set ADLT cookie here seems not to have any effect, I assume
+        # there is some age verification by a cookie (and/or session ID) needed,
+        # to disable the SafeSearch.
+        'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
+    ]
+    params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
+    logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
+
+
+def request(query, params):
+    """Assemble a Bing-Web request."""
 
 
-    # logger.debug("params['pageno'] --> %s", params.get('pageno'))
-    # logger.debug("          offset --> %s", offset)
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
 
 
-    search_string = page_query
-    if offset == 1:
-        search_string = inital_query
+    SID = uuid.uuid1().hex.upper()
+    CVID = uuid.uuid1().hex.upper()
 
 
-    if params['language'] == 'all':
-        lang = 'EN'
-    else:
-        lang = match_language(params['language'], supported_languages, language_aliases)
+    set_bing_cookies(params, engine_language, engine_region, SID)
 
 
-    query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
+    # build URL query
+    # ---------------
 
 
-    search_path = search_string.format(query=urlencode({'q': query}), offset=offset)
+    # query term
+    page = int(params.get('pageno', 1))
+    query_params = {
+        # fmt: off
+        'q': query,
+        'pq': query,
+        'cvid': CVID,
+        'qs': 'n',
+        'sp': '-1'
+        # fmt: on
+    }
 
 
-    if offset > 1:
-        referer = base_url + inital_query.format(query=urlencode({'q': query}))
+    # page
+    if page > 1:
+        referer = base_url + '?' + urlencode(query_params)
         params['headers']['Referer'] = referer
         params['headers']['Referer'] = referer
         logger.debug("headers.Referer --> %s", referer)
         logger.debug("headers.Referer --> %s", referer)
 
 
-    params['url'] = base_url + search_path
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+    query_params['first'] = _get_offset_from_pageno(page)
+
+    if page == 2:
+        query_params['FORM'] = 'PERE'
+    elif page > 2:
+        query_params['FORM'] = 'PERE%s' % (page - 2)
+
+    filters = ''
+    if params['time_range']:
+        query_params['filt'] = 'custom'
+
+        if params['time_range'] == 'day':
+            filters = 'ex1:"ez1"'
+        elif params['time_range'] == 'week':
+            filters = 'ex1:"ez2"'
+        elif params['time_range'] == 'month':
+            filters = 'ex1:"ez3"'
+        elif params['time_range'] == 'year':
+            epoch_1970 = datetime.date(1970, 1, 1)
+            today_no = (datetime.date.today() - epoch_1970).days
+            filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
+
+    params['url'] = base_url + '?' + urlencode(query_params)
+    if filters:
+        params['url'] = params['url'] + '&filters=' + filters
     return params
     return params
 
 
 
 
@@ -107,7 +212,8 @@ def response(resp):
             url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
             url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
             # Bing can shorten the URL either at the end or in the middle of the string
             # Bing can shorten the URL either at the end or in the middle of the string
             if (
             if (
-                url_cite.startswith('https://')
+                url_cite
+                and url_cite.startswith('https://')
                 and '…' not in url_cite
                 and '…' not in url_cite
                 and '...' not in url_cite
                 and '...' not in url_cite
                 and '›' not in url_cite
                 and '›' not in url_cite
@@ -127,9 +233,9 @@ def response(resp):
 
 
     # resolve all Bing redirections in parallel
     # resolve all Bing redirections in parallel
     request_list = [
     request_list = [
-        Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
+        network.Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
     ]
     ]
-    response_list = multi_requests(request_list)
+    response_list = network.multi_requests(request_list)
     for i, redirect_response in enumerate(response_list):
     for i, redirect_response in enumerate(response_list):
         if not isinstance(redirect_response, Exception):
         if not isinstance(redirect_response, Exception):
             results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
             results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
@@ -157,27 +263,71 @@ def response(resp):
     return results
     return results
 
 
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages and regions from Bing-Web."""
+
+    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
+
+
+def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
+
+    # insert alias to map from a language (zh) to a language + script (zh_Hans)
+    engine_traits.languages['zh'] = 'zh-hans'
 
 
-    lang_tags = set()
+    resp = network.get(url)
+
+    if not resp.ok:
+        print("ERROR: response from peertube is not OK.")
 
 
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
-    lang_links = eval_xpath(dom, '//div[@id="language-section"]//li')
 
 
-    for _li in lang_links:
+    map_lang = {'jp': 'ja'}
+    for td in eval_xpath(dom, xpath_language_codes):
+        eng_lang = td.text
 
 
-        href = eval_xpath(_li, './/@href')[0]
-        (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href)
-        query = parse_qs(query, keep_blank_values=True)
+        if eng_lang in ('en-gb', 'pt-br'):
+            # language 'en' is already in the list and a language 'en-gb' can't
+            # be handled in SearXNG, same with pt-br which is covered by pt-pt.
+            continue
 
 
-        # fmt: off
-        setlang = query.get('setlang', [None, ])[0]
-        # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN']
-        lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2]  # fmt: skip
-        # fmt: on
+        babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
+        try:
+            sxng_tag = language_tag(babel.Locale.parse(babel_lang))
+        except babel.UnknownLocaleError:
+            print("ERROR: language (%s) is unknown by babel" % (eng_lang))
+            continue
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_lang:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
+            continue
+        engine_traits.languages[sxng_tag] = eng_lang
 
 
-        tag = lang + '-' + nation if nation else lang
-        lang_tags.add(tag)
+    map_region = {
+        'en-ID': 'id_ID',
+        'no-NO': 'nb_NO',
+    }
 
 
-    return list(lang_tags)
+    for td in eval_xpath(dom, xpath_market_codes):
+        eng_region = td.text
+        babel_region = map_region.get(eng_region, eng_region).replace('-', '_')
+
+        if eng_region == 'en-WW':
+            engine_traits.all_locale = eng_region
+            continue
+
+        try:
+            sxng_tag = region_tag(babel.Locale.parse(babel_region))
+        except babel.UnknownLocaleError:
+            print("ERROR: region (%s) is unknown by babel" % (eng_region))
+            continue
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_region:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
+            continue
+        engine_traits.regions[sxng_tag] = eng_region

+ 77 - 52
searx/engines/bing_images.py

@@ -1,20 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Bing (Images)
-
+"""Bing-Images: description see :py:obj:`searx.engines.bing`.
 """
 """
+# pylint: disable=invalid-name
+
 
 
-from json import loads
+from typing import TYPE_CHECKING
+import uuid
+import json
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
 from lxml import html
 from lxml import html
 
 
-from searx.utils import match_language
-from searx.engines.bing import language_aliases
-from searx.engines.bing import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+    set_bing_cookies,
+    _fetch_traits,
 )
 )
+from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -31,77 +41,92 @@ categories = ['images', 'web']
 paging = True
 paging = True
 safesearch = True
 safesearch = True
 time_range_support = True
 time_range_support = True
-send_accept_language_header = True
-supported_languages_url = 'https://www.bing.com/account/general'
-number_of_results = 28
 
 
-# search-url
-base_url = 'https://www.bing.com/'
-search_string = (
+base_url = 'https://www.bing.com/images/async'
+"""Bing (Images) search URL"""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes'
+"""Bing (Images) search API description"""
+
+time_map = {
     # fmt: off
     # fmt: off
-    'images/search'
-    '?{query}'
-    '&count={count}'
-    '&first={first}'
-    '&tsc=ImageHoverTitle'
+    'day': 60 * 24,
+    'week': 60 * 24 * 7,
+    'month': 60 * 24 * 31,
+    'year': 60 * 24 * 365,
     # fmt: on
     # fmt: on
-)
-time_range_string = '&qft=+filterui:age-lt{interval}'
-time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
-
-# safesearch definitions
-safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
+}
 
 
 
 
-# do search-request
 def request(query, params):
 def request(query, params):
-    offset = ((params['pageno'] - 1) * number_of_results) + 1
+    """Assemble a Bing-Image request."""
 
 
-    search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
 
 
-    language = match_language(params['language'], supported_languages, language_aliases).lower()
+    SID = uuid.uuid1().hex.upper()
+    set_bing_cookies(params, engine_language, engine_region, SID)
 
 
-    params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
+    # build URL query
+    # - example: https://www.bing.com/images/async?q=foo&first=155&count=35
 
 
-    params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1'
+    query_params = {
+        # fmt: off
+        'q': query,
+        'async' : 'content',
+        # to simplify the page count lets use the default of 35 images per page
+        'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
+        'count' : 35,
+        # fmt: on
+    }
 
 
-    params['url'] = base_url + search_path
-    if params['time_range'] in time_range_dict:
-        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
+    # time range
+    # - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'
+
+    if params['time_range']:
+        query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']]
+
+    params['url'] = base_url + '?' + urlencode(query_params)
 
 
     return params
     return params
 
 
 
 
-# get response from search-request
 def response(resp):
 def response(resp):
-    results = []
+    """Get response from Bing-Images"""
 
 
+    results = []
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
 
 
-    # parse results
-    for result in dom.xpath('//div[@class="imgpt"]'):
-        img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
-        # Microsoft seems to experiment with this code so don't make the path too specific,
-        # just catch the text section for the first anchor in img_info assuming this to be
-        # the originating site.
-        source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
+    for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'):
 
 
-        m = loads(result.xpath('./a/@m')[0])
+        metadata = result.xpath('.//a[@class="iusc"]/@m')
+        if not metadata:
+            continue
 
 
-        # strip 'Unicode private use area' highlighting, they render to Tux
-        # the Linux penguin and a standing diamond on my machine...
-        title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
+        metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0])
+        title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
+        img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip()
+        source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
         results.append(
         results.append(
             {
             {
                 'template': 'images.html',
                 'template': 'images.html',
-                'url': m['purl'],
-                'thumbnail_src': m['turl'],
-                'img_src': m['murl'],
-                'content': '',
+                'url': metadata['purl'],
+                'thumbnail_src': metadata['turl'],
+                'img_src': metadata['murl'],
+                'content': metadata['desc'],
                 'title': title,
                 'title': title,
                 'source': source,
                 'source': source,
                 'img_format': img_format,
                 'img_format': img_format,
             }
             }
         )
         )
-
     return results
     return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages and regions from Bing-News."""
+
+    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)

+ 117 - 108
searx/engines/bing_news.py

@@ -1,24 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Bing (News)
+"""Bing-News: description see :py:obj:`searx.engines.bing`.
 """
 """
 
 
-from urllib.parse import (
-    urlencode,
-    urlparse,
-    parse_qsl,
-    quote,
-)
-from datetime import datetime
-from dateutil import parser
-from lxml import etree
-from lxml.etree import XPath
-from searx.utils import match_language, eval_xpath_getindex
-from searx.engines.bing import (  # pylint: disable=unused-import
-    language_aliases,
-    _fetch_supported_languages,
-    supported_languages_url,
+# pylint: disable=invalid-name
+
+from typing import TYPE_CHECKING
+import uuid
+from urllib.parse import urlencode
+
+from lxml import html
+
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+    set_bing_cookies,
+    _fetch_traits,
 )
 )
+from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 
 
 # about
 # about
 about = {
 about = {
@@ -34,108 +40,111 @@ about = {
 categories = ['news']
 categories = ['news']
 paging = True
 paging = True
 time_range_support = True
 time_range_support = True
-send_accept_language_header = True
-
-# search-url
-base_url = 'https://www.bing.com/'
-search_string = 'news/search?{query}&first={offset}&format=RSS'
-search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
-time_range_dict = {'day': '7', 'week': '8', 'month': '9'}
-
-
-def url_cleanup(url_string):
-    """remove click"""
-
-    parsed_url = urlparse(url_string)
-    if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/news/apiclick.aspx':
-        query = dict(parse_qsl(parsed_url.query))
-        url_string = query.get('url', None)
-    return url_string
-
-
-def image_url_cleanup(url_string):
-    """replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=..."""
-
-    parsed_url = urlparse(url_string)
-    if parsed_url.netloc.endswith('bing.com') and parsed_url.path == '/th':
-        query = dict(parse_qsl(parsed_url.query))
-        url_string = "https://www.bing.com/th?id=" + quote(query.get('id'))
-    return url_string
-
-
-def _get_url(query, language, offset, time_range):
-    if time_range in time_range_dict:
-        search_path = search_string_with_time.format(
-            # fmt: off
-            query = urlencode({
-                'q': query,
-                'setmkt': language
-            }),
-            offset = offset,
-            interval = time_range_dict[time_range]
-            # fmt: on
-        )
-    else:
-        # e.g. setmkt=de-de&setlang=de
-        search_path = search_string.format(
-            # fmt: off
-            query = urlencode({
-                'q': query,
-                'setmkt': language
-            }),
-            offset = offset
-            # fmt: on
-        )
-    return base_url + search_path
+time_map = {
+    'day': '4',
+    'week': '8',
+    'month': '9',
+}
+"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the
+difference of *last day* and *last week* in the result list is just marginally.
+"""
+
+base_url = 'https://www.bing.com/news/infinitescrollajax'
+"""Bing (News) search URL"""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes'
+"""Bing (News) search API description"""
+
+mkt_alias = {
+    'zh': 'en-WW',
+    'zh-CN': 'en-WW',
+}
+"""Bing News has an official market code 'zh-CN' but we won't get a result with
+this market code.  For 'zh' and 'zh-CN' we better use the *Worldwide aggregate*
+market code (en-WW).
+"""
 
 
 
 
 def request(query, params):
 def request(query, params):
+    """Assemble a Bing-News request."""
+
+    sxng_locale = params['searxng_locale']
+    engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
+    engine_language = traits.get_language(sxng_locale, 'en')
+
+    SID = uuid.uuid1().hex.upper()
+    set_bing_cookies(params, engine_language, engine_region, SID)
 
 
-    if params['time_range'] and params['time_range'] not in time_range_dict:
-        return params
+    # build URL query
+    #
+    # example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
 
 
-    offset = (params['pageno'] - 1) * 10 + 1
-    if params['language'] == 'all':
-        language = 'en-US'
-    else:
-        language = match_language(params['language'], supported_languages, language_aliases)
-    params['url'] = _get_url(query, language, offset, params['time_range'])
+    query_params = {
+        # fmt: off
+        'q': query,
+        'InfiniteScroll': 1,
+        # to simplify the page count lets use the default of 10 images per page
+        'first' : (int(params.get('pageno', 1)) - 1) * 10 + 1,
+        # fmt: on
+    }
+
+    if params['time_range']:
+        # qft=interval:"7"
+        query_params['qft'] = 'qft=interval="%s"' % time_map.get(params['time_range'], '9')
+
+    params['url'] = base_url + '?' + urlencode(query_params)
 
 
     return params
     return params
 
 
 
 
 def response(resp):
 def response(resp):
-
+    """Get response from Bing-Video"""
     results = []
     results = []
-    rss = etree.fromstring(resp.content)
-    namespaces = rss.nsmap
-
-    for item in rss.xpath('./channel/item'):
-        # url / title / content
-        url = url_cleanup(eval_xpath_getindex(item, './link/text()', 0, default=None))
-        title = eval_xpath_getindex(item, './title/text()', 0, default=url)
-        content = eval_xpath_getindex(item, './description/text()', 0, default='')
-
-        # publishedDate
-        publishedDate = eval_xpath_getindex(item, './pubDate/text()', 0, default=None)
-        try:
-            publishedDate = parser.parse(publishedDate, dayfirst=False)
-        except TypeError:
-            publishedDate = datetime.now()
-        except ValueError:
-            publishedDate = datetime.now()
-
-        # thumbnail
-        thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None)
-        if thumbnail is not None:
-            thumbnail = image_url_cleanup(thumbnail)
-
-        # append result
-        if thumbnail is not None:
-            results.append(
-                {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail}
-            )
-        else:
-            results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content})
+
+    if not resp.ok or not resp.text:
+        return results
+
+    dom = html.fromstring(resp.text)
+
+    for newsitem in dom.xpath('//div[contains(@class, "newsitem")]'):
+
+        url = newsitem.xpath('./@url')[0]
+        title = ' '.join(newsitem.xpath('.//div[@class="caption"]//a[@class="title"]/text()')).strip()
+        content = ' '.join(newsitem.xpath('.//div[@class="snippet"]/text()')).strip()
+        thumbnail = None
+        author = newsitem.xpath('./@data-author')[0]
+        metadata = ' '.join(newsitem.xpath('.//div[@class="source"]/span/text()')).strip()
+
+        img_src = newsitem.xpath('.//a[@class="imagelink"]//img/@src')
+        if img_src:
+            thumbnail = 'https://www.bing.com/' + img_src[0]
+
+        results.append(
+            {
+                'url': url,
+                'title': title,
+                'content': content,
+                'img_src': thumbnail,
+                'author': author,
+                'metadata': metadata,
+            }
+        )
 
 
     return results
     return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages and regions from Bing-News.
+
+    The :py:obj:`description <searx.engines.bing_news.bing_traits_url>` of the
+    first table says *"query parameter when calling the Video Search API."*
+    .. thats why I use the 4. table "News Category API markets" for the
+    ``xpath_market_codes``.
+
+    """
+
+    xpath_market_codes = '//table[4]/tbody/tr/td[3]'
+    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)

+ 71 - 41
searx/engines/bing_videos.py

@@ -1,21 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Bing (Videos)
-
+"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
 """
 """
+# pylint: disable=invalid-name
 
 
-from json import loads
+from typing import TYPE_CHECKING
+import uuid
+import json
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
 from lxml import html
 from lxml import html
 
 
-from searx.utils import match_language
-from searx.engines.bing import language_aliases
-
-from searx.engines.bing import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
+from searx.enginelib.traits import EngineTraits
+from searx.engines.bing import (
+    set_bing_cookies,
+    _fetch_traits,
 )
 )
+from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 
 
 about = {
 about = {
     "website": 'https://www.bing.com/videos',
     "website": 'https://www.bing.com/videos',
@@ -26,65 +35,76 @@ about = {
     "results": 'HTML',
     "results": 'HTML',
 }
 }
 
 
+# engine dependent config
 categories = ['videos', 'web']
 categories = ['videos', 'web']
 paging = True
 paging = True
 safesearch = True
 safesearch = True
 time_range_support = True
 time_range_support = True
-send_accept_language_header = True
-number_of_results = 28
 
 
-base_url = 'https://www.bing.com/'
-search_string = (
+base_url = 'https://www.bing.com/videos/asyncv2'
+"""Bing (Videos) async search URL."""
+
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes'
+"""Bing (Video) search API description"""
+
+time_map = {
     # fmt: off
     # fmt: off
-    'videos/search'
-    '?{query}'
-    '&count={count}'
-    '&first={first}'
-    '&scope=video'
-    '&FORM=QBLH'
+    'day': 60 * 24,
+    'week': 60 * 24 * 7,
+    'month': 60 * 24 * 31,
+    'year': 60 * 24 * 365,
     # fmt: on
     # fmt: on
-)
-time_range_string = '&qft=+filterui:videoage-lt{interval}'
-time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
-
-# safesearch definitions
-safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
+}
 
 
 
 
-# do search-request
 def request(query, params):
 def request(query, params):
-    offset = ((params['pageno'] - 1) * number_of_results) + 1
+    """Assemble a Bing-Video request."""
 
 
-    search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
 
 
-    # safesearch cookie
-    params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
+    SID = uuid.uuid1().hex.upper()
+    set_bing_cookies(params, engine_language, engine_region, SID)
 
 
-    # language cookie
-    language = match_language(params['language'], supported_languages, language_aliases).lower()
-    params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
+    # build URL query
+    #
+    # example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
 
 
-    # query and paging
-    params['url'] = base_url + search_path
+    query_params = {
+        # fmt: off
+        'q': query,
+        'async' : 'content',
+        # to simplify the page count lets use the default of 35 images per page
+        'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
+        'count' : 35,
+        # fmt: on
+    }
 
 
     # time range
     # time range
-    if params['time_range'] in time_range_dict:
-        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
+    #
+    # example: one week (10080 minutes) '&qft= filterui:videoage-lt10080'  '&form=VRFLTR'
+
+    if params['time_range']:
+        query_params['form'] = 'VRFLTR'
+        query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']]
+
+    params['url'] = base_url + '?' + urlencode(query_params)
 
 
     return params
     return params
 
 
 
 
-# get response from search-request
 def response(resp):
 def response(resp):
+    """Get response from Bing-Video"""
     results = []
     results = []
 
 
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
 
 
-    for result in dom.xpath('//div[@class="dg_u"]/div[contains(@class, "mc_vtvc")]'):
-        metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
+    for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'):
+        metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
         info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
         info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
         content = '{0} - {1}'.format(metadata['du'], info)
         content = '{0} - {1}'.format(metadata['du'], info)
-        thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid'])
+        thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0]
+
         results.append(
         results.append(
             {
             {
                 'url': metadata['murl'],
                 'url': metadata['murl'],
@@ -96,3 +116,13 @@ def response(resp):
         )
         )
 
 
     return results
     return results
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages and regions from Bing-Videos."""
+
+    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
+    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
+    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
+
+    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)

+ 127 - 48
searx/engines/dailymotion.py

@@ -1,17 +1,35 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Dailymotion (Videos)
+# lint: pylint
+"""
+Dailymotion (Videos)
+~~~~~~~~~~~~~~~~~~~~
+
+.. _REST GET: https://developers.dailymotion.com/tools/
+.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
+.. _Video filters API: https://developers.dailymotion.com/api/#video-filters
+.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
 
 
 """
 """
 
 
-from typing import Set
+from typing import TYPE_CHECKING
+
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 import time
 import time
 import babel
 import babel
 
 
 from searx.exceptions import SearxEngineAPIException
 from searx.exceptions import SearxEngineAPIException
-from searx.network import raise_for_httperror
+from searx import network
 from searx.utils import html_to_text
 from searx.utils import html_to_text
+from searx.locales import region_tag, language_tag
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -37,11 +55,24 @@ time_delta_dict = {
 }
 }
 
 
 safesearch = True
 safesearch = True
-safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''}
+safesearch_params = {
+    2: {'is_created_for_kids': 'true'},
+    1: {'is_created_for_kids': 'true'},
+    0: {},
+}
+"""True if this video is "Created for Kids" / intends to target an audience
+under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
+"""
 
 
-# search-url
-# - https://developers.dailymotion.com/tools/
-# - https://www.dailymotion.com/doc/api/obj-video.html
+family_filter_map = {
+    2: 'true',
+    1: 'true',
+    0: 'false',
+}
+"""By default, the family filter is turned on. Setting this parameter to
+``false`` will stop filtering-out explicit content from searches and global
+contexts (``family_filter`` in `Global API Parameters`_ ).
+"""
 
 
 result_fields = [
 result_fields = [
     'allow_embed',
     'allow_embed',
@@ -53,27 +84,21 @@ result_fields = [
     'thumbnail_360_url',
     'thumbnail_360_url',
     'id',
     'id',
 ]
 ]
-search_url = (
-    'https://api.dailymotion.com/videos?'
-    'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
-).format(
-    fields=','.join(result_fields),
-    password_protected='false',
-    private='false',
-    sort='relevance',
-    limit=number_of_results,
-)
-iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+"""`Fields selection`_, by default, a few fields are returned. To request more
+specific fields, the ``fields`` parameter is used with the list of fields
+SearXNG needs in the response to build a video result list.
+"""
 
 
-# The request query filters by 'languages' & 'country', therefore instead of
-# fetching only languages we need to fetch locales.
-supported_languages_url = 'https://api.dailymotion.com/locales'
-supported_languages_iso639: Set[str] = set()
+search_url = 'https://api.dailymotion.com/videos?'
+"""URL to retrieve a list of videos.
 
 
+- `REST GET`_
+- `Global API Parameters`_
+- `Video filters API`_
+"""
 
 
-def init(_engine_settings):
-    global supported_languages_iso639
-    supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages])
+iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+"""URL template to embed video in SearXNG's result list."""
 
 
 
 
 def request(query, params):
 def request(query, params):
@@ -81,34 +106,42 @@ def request(query, params):
     if not query:
     if not query:
         return False
         return False
 
 
-    language = params['language']
-    if language == 'all':
-        language = 'en-US'
-    locale = babel.Locale.parse(language, sep='-')
+    eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], 'en')
 
 
-    language_iso639 = locale.language
-    if locale.language not in supported_languages_iso639:
-        language_iso639 = 'en'
-
-    query_args = {
+    args = {
         'search': query,
         'search': query,
-        'languages': language_iso639,
+        'family_filter': family_filter_map.get(params['safesearch'], 'false'),
+        'thumbnail_ratio': 'original',  # original|widescreen|square
+        # https://developers.dailymotion.com/api/#video-filters
+        'languages': eng_lang,
         'page': params['pageno'],
         'page': params['pageno'],
+        'password_protected': 'false',
+        'private': 'false',
+        'sort': 'relevance',
+        'limit': number_of_results,
+        'fields': ','.join(result_fields),
     }
     }
 
 
-    if locale.territory:
-        localization = locale.language + '_' + locale.territory
-        if localization in supported_languages:
-            query_args['country'] = locale.territory
+    args.update(safesearch_params.get(params['safesearch'], {}))
+
+    # Don't add localization and country arguments if the user does select a
+    # language (:de, :en, ..)
+
+    if len(params['searxng_locale'].split('-')) > 1:
+        # https://developers.dailymotion.com/api/#global-parameters
+        args['localization'] = eng_region
+        args['country'] = eng_region.split('_')[1]
+        # Insufficient rights for the `ams_country' parameter of route `GET /videos'
+        # 'ams_country': eng_region.split('_')[1],
 
 
     time_delta = time_delta_dict.get(params["time_range"])
     time_delta = time_delta_dict.get(params["time_range"])
     if time_delta:
     if time_delta:
         created_after = datetime.now() - time_delta
         created_after = datetime.now() - time_delta
-        query_args['created_after'] = datetime.timestamp(created_after)
+        args['created_after'] = datetime.timestamp(created_after)
 
 
-    query_str = urlencode(query_args)
-    params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '')
-    params['raise_for_httperror'] = False
+    query_str = urlencode(args)
+    params['url'] = search_url + query_str
 
 
     return params
     return params
 
 
@@ -123,7 +156,7 @@ def response(resp):
     if 'error' in search_res:
     if 'error' in search_res:
         raise SearxEngineAPIException(search_res['error'].get('message'))
         raise SearxEngineAPIException(search_res['error'].get('message'))
 
 
-    raise_for_httperror(resp)
+    network.raise_for_httperror(resp)
 
 
     # parse results
     # parse results
     for res in search_res.get('list', []):
     for res in search_res.get('list', []):
@@ -167,7 +200,53 @@ def response(resp):
     return results
     return results
 
 
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    response_json = resp.json()
-    return [item['locale'] for item in response_json['list']]
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch locales & languages from dailymotion.
+
+    Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
+    There are duplications in the locale codes returned from Dailymotion which
+    can be ignored::
+
+      en_EN --> en_GB, en_US
+      ar_AA --> ar_EG, ar_AE, ar_SA
+
+    The language list `api/languages <https://api.dailymotion.com/languages>`_
+    contains over 7000 *languages* codes (see PR1071_).  We use only those
+    language codes that are used in the locales.
+
+    .. _PR1071: https://github.com/searxng/searxng/pull/1071
+
+    """
+
+    resp = network.get('https://api.dailymotion.com/locales')
+    if not resp.ok:
+        print("ERROR: response from dailymotion/locales is not OK.")
+
+    for item in resp.json()['list']:
+        eng_tag = item['locale']
+        if eng_tag in ('en_EN', 'ar_AA'):
+            continue
+        try:
+            sxng_tag = region_tag(babel.Locale.parse(eng_tag))
+        except babel.UnknownLocaleError:
+            print("ERROR: item unknown --> %s" % item)
+            continue
+
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag
+
+    locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
+
+    resp = network.get('https://api.dailymotion.com/languages')
+    if not resp.ok:
+        print("ERROR: response from dailymotion/languages is not OK.")
+
+    for item in resp.json()['list']:
+        eng_tag = item['code']
+        if eng_tag in locale_lang_list:
+            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
+            engine_traits.languages[sxng_tag] = eng_tag

+ 1 - 1
searx/engines/demo_offline.py

@@ -63,7 +63,7 @@ def search(query, request_params):
     for row in result_list:
     for row in result_list:
         entry = {
         entry = {
             'query': query,
             'query': query,
-            'language': request_params['language'],
+            'language': request_params['searxng_locale'],
             'value': row.get("value"),
             'value': row.get("value"),
             # choose a result template or comment out to use the *default*
             # choose a result template or comment out to use the *default*
             'template': 'key-value.html',
             'template': 'key-value.html',

+ 298 - 62
searx/engines/duckduckgo.py

@@ -1,71 +1,207 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""DuckDuckGo Lite
+"""
+DuckDuckGo Lite
+~~~~~~~~~~~~~~~
 """
 """
 
 
-from json import loads
-
-from lxml.html import fromstring
+from typing import TYPE_CHECKING
+from urllib.parse import urlencode
+import json
+import babel
+import lxml.html
 
 
+from searx import (
+    network,
+    locales,
+    redislib,
+)
+from searx import redisdb
 from searx.utils import (
 from searx.utils import (
-    dict_subset,
     eval_xpath,
     eval_xpath,
     eval_xpath_getindex,
     eval_xpath_getindex,
     extract_text,
     extract_text,
-    match_language,
 )
 )
-from searx.network import get
+from searx.enginelib.traits import EngineTraits
+from searx.exceptions import SearxEngineAPIException
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
-# about
 about = {
 about = {
     "website": 'https://lite.duckduckgo.com/lite/',
     "website": 'https://lite.duckduckgo.com/lite/',
     "wikidata_id": 'Q12805',
     "wikidata_id": 'Q12805',
-    "official_api_documentation": 'https://duckduckgo.com/api',
     "use_official_api": False,
     "use_official_api": False,
     "require_api_key": False,
     "require_api_key": False,
     "results": 'HTML',
     "results": 'HTML',
 }
 }
 
 
+send_accept_language_header = True
+"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
+``Accept-Language``.  Optional the user can select a region filter (but not a
+language).
+"""
+
 # engine dependent config
 # engine dependent config
 categories = ['general', 'web']
 categories = ['general', 'web']
 paging = True
 paging = True
-supported_languages_url = 'https://duckduckgo.com/util/u588.js'
 time_range_support = True
 time_range_support = True
-send_accept_language_header = True
+safesearch = True  # user can't select but the results are filtered
 
 
-language_aliases = {
-    'ar-SA': 'ar-XA',
-    'es-419': 'es-XL',
-    'ja': 'jp-JP',
-    'ko': 'kr-KR',
-    'sl-SI': 'sl-SL',
-    'zh-TW': 'tzh-TW',
-    'zh-HK': 'tzh-HK',
-}
+url = 'https://lite.duckduckgo.com/lite/'
+# url_ping = 'https://duckduckgo.com/t/sl_l'
 
 
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
+form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
 
 
-# search-url
-url = 'https://lite.duckduckgo.com/lite/'
-url_ping = 'https://duckduckgo.com/t/sl_l'
 
 
-# match query's language to a region code that duckduckgo will accept
-def get_region_code(lang, lang_list=None):
-    if lang == 'all':
-        return None
+def cache_vqd(query, value):
+    """Caches a ``vqd`` value from a query.
+
+    The vqd value depends on the query string and is needed for the follow up
+    pages or the images loaded by a XMLHttpRequest:
+
+    - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
+    - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
+
+    """
+    c = redisdb.client()
+    if c:
+        logger.debug("cache vqd value: %s", value)
+        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
+        c.set(key, value, ex=600)
+
+
+def get_vqd(query, headers):
+    """Returns the ``vqd`` that fits to the *query*.  If there is no ``vqd`` cached
+    (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
+    response.
+
+    """
+    value = None
+    c = redisdb.client()
+    if c:
+        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
+        value = c.get(key)
+        if value:
+            value = value.decode('utf-8')
+            logger.debug("re-use cached vqd value: %s", value)
+            return value
 
 
-    lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT')
-    lang_parts = lang_code.split('-')
+    query_url = 'https://duckduckgo.com/?{query}&iar=images'.format(query=urlencode({'q': query}))
+    res = network.get(query_url, headers=headers)
+    content = res.text
+    if content.find('vqd=\'') == -1:
+        raise SearxEngineAPIException('Request failed')
+    value = content[content.find('vqd=\'') + 5 :]
+    value = value[: value.find('\'')]
+    logger.debug("new vqd value: %s", value)
+    cache_vqd(query, value)
+    return value
 
 
-    # country code goes first
-    return lang_parts[1].lower() + '-' + lang_parts[0].lower()
+
+def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
+    """Get DuckDuckGo's language identifier from SearXNG's locale.
+
+    DuckDuckGo defines its lanaguages by region codes (see
+    :py:obj:`fetch_traits`).
+
+    To get region and language of a DDG service use:
+
+    .. code: python
+
+       eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+       eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
+    It might confuse, but the ``l`` value of the cookie is what SearXNG calls
+    the *region*:
+
+    .. code:: python
+
+        # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
+        params['cookies']['ad'] = eng_lang
+        params['cookies']['ah'] = eng_region
+        params['cookies']['l'] = eng_region
+
+    .. hint::
+
+       `DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
+       selection to the user, only a region can be selected by the user
+       (``eng_region`` from the example above).  DDG-lite stores the selected
+       region in a cookie::
+
+         params['cookies']['kl'] = eng_region  # 'ar-es'
+
+    """
+    return eng_traits.custom['lang_region'].get(sxng_locale, eng_traits.get_language(sxng_locale, default))
+
+
+ddg_reg_map = {
+    'tw-tzh': 'zh_TW',
+    'hk-tzh': 'zh_HK',
+    'ct-ca': 'skip',  # ct-ca and es-ca both map to ca_ES
+    'es-ca': 'ca_ES',
+    'id-en': 'id_ID',
+    'no-no': 'nb_NO',
+    'jp-jp': 'ja_JP',
+    'kr-kr': 'ko_KR',
+    'xa-ar': 'ar_SA',
+    'sl-sl': 'sl_SI',
+    'th-en': 'th_TH',
+    'vn-en': 'vi_VN',
+}
+
+ddg_lang_map = {
+    # use ar --> ar_EG (Egypt's arabic)
+    "ar_DZ": 'lang_region',
+    "ar_JO": 'lang_region',
+    "ar_SA": 'lang_region',
+    # use bn --> bn_BD
+    'bn_IN': 'lang_region',
+    # use de --> de_DE
+    'de_CH': 'lang_region',
+    # use en --> en_US,
+    'en_AU': 'lang_region',
+    'en_CA': 'lang_region',
+    'en_GB': 'lang_region',
+    # Esperanto
+    'eo_XX': 'eo',
+    # use es --> es_ES,
+    'es_AR': 'lang_region',
+    'es_CL': 'lang_region',
+    'es_CO': 'lang_region',
+    'es_CR': 'lang_region',
+    'es_EC': 'lang_region',
+    'es_MX': 'lang_region',
+    'es_PE': 'lang_region',
+    'es_UY': 'lang_region',
+    'es_VE': 'lang_region',
+    # use fr --> rf_FR
+    'fr_CA': 'lang_region',
+    'fr_CH': 'lang_region',
+    'fr_BE': 'lang_region',
+    # use nl --> nl_NL
+    'nl_BE': 'lang_region',
+    # use pt --> pt_PT
+    'pt_BR': 'lang_region',
+    # skip these languages
+    'od_IN': 'skip',
+    'io_XX': 'skip',
+    'tokipona_XX': 'skip',
+}
 
 
 
 
 def request(query, params):
 def request(query, params):
 
 
+    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+    # eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
     params['url'] = url
     params['url'] = url
     params['method'] = 'POST'
     params['method'] = 'POST'
-
     params['data']['q'] = query
     params['data']['q'] = query
 
 
     # The API is not documented, so we do some reverse engineering and emulate
     # The API is not documented, so we do some reverse engineering and emulate
@@ -88,23 +224,19 @@ def request(query, params):
         params['data']['s'] = offset
         params['data']['s'] = offset
         params['data']['dc'] = offset + 1
         params['data']['dc'] = offset + 1
 
 
+    # request needs a vqd argument
+    params['data']['vqd'] = get_vqd(query, params["headers"])
+
     # initial page does not have additional data in the input form
     # initial page does not have additional data in the input form
     if params['pageno'] > 1:
     if params['pageno'] > 1:
-        # request the second page (and more pages) needs 'o' and 'api' arguments
-        params['data']['o'] = 'json'
-        params['data']['api'] = 'd.js'
 
 
-    # initial page does not have additional data in the input form
-    if params['pageno'] > 2:
-        # request the third page (and more pages) some more arguments
-        params['data']['nextParams'] = ''
-        params['data']['v'] = ''
-        params['data']['vqd'] = ''
+        params['data']['o'] = form_data.get('o', 'json')
+        params['data']['api'] = form_data.get('api', 'd.js')
+        params['data']['nextParams'] = form_data.get('nextParams', '')
+        params['data']['v'] = form_data.get('v', 'l')
 
 
-    region_code = get_region_code(params['language'], supported_languages)
-    if region_code:
-        params['data']['kl'] = region_code
-        params['cookies']['kl'] = region_code
+    params['data']['kl'] = eng_region
+    params['cookies']['kl'] = eng_region
 
 
     params['data']['df'] = ''
     params['data']['df'] = ''
     if params['time_range'] in time_range_dict:
     if params['time_range'] in time_range_dict:
@@ -116,26 +248,40 @@ def request(query, params):
     return params
     return params
 
 
 
 
-# get response from search-request
 def response(resp):
 def response(resp):
 
 
-    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
-    get(url_ping, headers=headers_ping)
-
     if resp.status_code == 303:
     if resp.status_code == 303:
         return []
         return []
 
 
     results = []
     results = []
-    doc = fromstring(resp.text)
+    doc = lxml.html.fromstring(resp.text)
 
 
     result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
     result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
-    if not len(result_table) >= 3:
+
+    if len(result_table) == 2:
+        # some locales (at least China) does not have a "next page" button and
+        # the layout of the HTML tables is different.
+        result_table = result_table[1]
+    elif not len(result_table) >= 3:
         # no more results
         # no more results
         return []
         return []
-    result_table = result_table[2]
+    else:
+        result_table = result_table[2]
+        # update form data from response
+        form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
+        if len(form):
+
+            form = form[0]
+            form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
+            form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
+            form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
+            logger.debug('form_data: %s', form_data)
+
+            value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
+            query = resp.search_params['data']['q']
+            cache_vqd(query, value)
 
 
     tr_rows = eval_xpath(result_table, './/tr')
     tr_rows = eval_xpath(result_table, './/tr')
-
     # In the last <tr> is the form of the 'previous/next page' links
     # In the last <tr> is the form of the 'previous/next page' links
     tr_rows = tr_rows[:-1]
     tr_rows = tr_rows[:-1]
 
 
@@ -172,15 +318,105 @@ def response(resp):
     return results
     return results
 
 
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages & regions from DuckDuckGo.
+
+    SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
+    DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
+    sense in a SearXNG request since SearXNG's ``all`` will not add a
+    ``Accept-Language`` HTTP header.  The value in ``engine_traits.all_locale``
+    is ``wt-wt`` (the region).
+
+    Beside regions DuckDuckGo also defines its lanaguages by region codes.  By
+    example these are the english languages in DuckDuckGo:
+
+    - en_US
+    - en_AU
+    - en_CA
+    - en_GB
+
+    The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
+    SearXNG's locale.
 
 
-    # response is a js file with regions as an embedded object
-    response_page = resp.text
-    response_page = response_page[response_page.find('regions:{') + 8 :]
-    response_page = response_page[: response_page.find('}') + 1]
+    """
+    # pylint: disable=too-many-branches, too-many-statements
+    # fetch regions
+
+    engine_traits.all_locale = 'wt-wt'
+
+    # updated from u588 to u661 / should be updated automatically?
+    resp = network.get('https://duckduckgo.com/util/u661.js')
+
+    if not resp.ok:
+        print("ERROR: response from DuckDuckGo is not OK.")
+
+    pos = resp.text.find('regions:{') + 8
+    js_code = resp.text[pos:]
+    pos = js_code.find('}') + 1
+    regions = json.loads(js_code[:pos])
+
+    for eng_tag, name in regions.items():
+
+        if eng_tag == 'wt-wt':
+            engine_traits.all_locale = 'wt-wt'
+            continue
+
+        region = ddg_reg_map.get(eng_tag)
+        if region == 'skip':
+            continue
 
 
-    regions_json = loads(response_page)
-    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+        if not region:
+            eng_territory, eng_lang = eng_tag.split('-')
+            region = eng_lang + '_' + eng_territory.upper()
 
 
-    return list(supported_languages)
+        try:
+            sxng_tag = locales.region_tag(babel.Locale.parse(region))
+        except babel.UnknownLocaleError:
+            print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
+            continue
+
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag
+
+    # fetch languages
+
+    engine_traits.custom['lang_region'] = {}
+
+    pos = resp.text.find('languages:{') + 10
+    js_code = resp.text[pos:]
+    pos = js_code.find('}') + 1
+    js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
+    languages = json.loads(js_code)
+
+    for eng_lang, name in languages.items():
+
+        if eng_lang == 'wt_WT':
+            continue
+
+        babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
+        if babel_tag == 'skip':
+            continue
+
+        try:
+
+            if babel_tag == 'lang_region':
+                sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
+                engine_traits.custom['lang_region'][sxng_tag] = eng_lang
+                continue
+
+            sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
+
+        except babel.UnknownLocaleError:
+            print("ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
+            continue
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_lang:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
+            continue
+        engine_traits.languages[sxng_tag] = eng_lang

+ 22 - 13
searx/engines/duckduckgo_definitions.py

@@ -1,22 +1,33 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""DuckDuckGo (Instant Answer API)
+"""
+DuckDuckGo Instant Answer API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
+reverse engineering we can see that some services (e.g. instant answers) still
+in use from the DDG search engine.
+
+As far we can say the *instant answers* API does not support languages, or at
+least we could not find out how language support should work.  It seems that
+most of the features are based on English terms.
 
 
 """
 """
 
 
-import json
+from typing import TYPE_CHECKING
+
 from urllib.parse import urlencode, urlparse, urljoin
 from urllib.parse import urlencode, urlparse, urljoin
 from lxml import html
 from lxml import html
 
 
 from searx.data import WIKIDATA_UNITS
 from searx.data import WIKIDATA_UNITS
-from searx.engines.duckduckgo import language_aliases
-from searx.engines.duckduckgo import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
-)
-from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function
+from searx.utils import extract_text, html_to_text, get_string_replaces_function
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
 
 
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
 # about
 # about
 about = {
 about = {
     "website": 'https://duckduckgo.com/',
     "website": 'https://duckduckgo.com/',
@@ -37,7 +48,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
 
 
 
 
 def is_broken_text(text):
 def is_broken_text(text):
-    """duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
+    """duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
 
 
     The href URL is broken, the "Related website" may contains some HTML.
     The href URL is broken, the "Related website" may contains some HTML.
 
 
@@ -62,8 +73,6 @@ def result_to_text(text, htmlResult):
 
 
 def request(query, params):
 def request(query, params):
     params['url'] = URL.format(query=urlencode({'q': query}))
     params['url'] = URL.format(query=urlencode({'q': query}))
-    language = match_language(params['language'], supported_languages, language_aliases)
-    language = language.split('-')[0]
     return params
     return params
 
 
 
 
@@ -71,7 +80,7 @@ def response(resp):
     # pylint: disable=too-many-locals, too-many-branches, too-many-statements
     # pylint: disable=too-many-locals, too-many-branches, too-many-statements
     results = []
     results = []
 
 
-    search_res = json.loads(resp.text)
+    search_res = resp.json()
 
 
     # search_res.get('Entity') possible values (not exhaustive) :
     # search_res.get('Entity') possible values (not exhaustive) :
     # * continent / country / department / location / waterfall
     # * continent / country / department / location / waterfall
@@ -235,7 +244,7 @@ def unit_to_str(unit):
 
 
 
 
 def area_to_str(area):
 def area_to_str(area):
-    """parse {'unit': 'http://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}"""
+    """parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
     unit = unit_to_str(area.get('unit'))
     unit = unit_to_str(area.get('unit'))
     if unit is not None:
     if unit is not None:
         try:
         try:

+ 55 - 57
searx/engines/duckduckgo_images.py

@@ -1,26 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
- DuckDuckGo (Images)
+DuckDuckGo Images
+~~~~~~~~~~~~~~~~~
 """
 """
 
 
-from json import loads
+from typing import TYPE_CHECKING
 from urllib.parse import urlencode
 from urllib.parse import urlencode
-from searx.exceptions import SearxEngineAPIException
-from searx.engines.duckduckgo import get_region_code
-from searx.engines.duckduckgo import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
+
+from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
+from searx.engines.duckduckgo import (
+    get_ddg_lang,
+    get_vqd,
 )
 )
-from searx.network import get
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
     "website": 'https://duckduckgo.com/',
     "website": 'https://duckduckgo.com/',
     "wikidata_id": 'Q12805',
     "wikidata_id": 'Q12805',
-    "official_api_documentation": {
-        'url': 'https://duckduckgo.com/api',
-        'comment': 'but images are not supported',
-    },
     "use_official_api": False,
     "use_official_api": False,
     "require_api_key": False,
     "require_api_key": False,
     "results": 'JSON (site requires js to get images)',
     "results": 'JSON (site requires js to get images)',
@@ -32,70 +36,64 @@ paging = True
 safesearch = True
 safesearch = True
 send_accept_language_header = True
 send_accept_language_header = True
 
 
-# search-url
-images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
-site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images'
+safesearch_cookies = {0: '-2', 1: None, 2: '1'}
+safesearch_args = {0: '1', 1: None, 2: '1'}
 
 
 
 
-# run query in site to get vqd number needed for requesting images
-# TODO: find a way to get this number without an extra request (is it a hash of the query?)
-def get_vqd(query, headers):
-    query_url = site_url.format(query=urlencode({'q': query}))
-    res = get(query_url, headers=headers)
-    content = res.text
-    if content.find('vqd=\'') == -1:
-        raise SearxEngineAPIException('Request failed')
-    vqd = content[content.find('vqd=\'') + 5 :]
-    vqd = vqd[: vqd.find('\'')]
-    return vqd
+def request(query, params):
 
 
+    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+    eng_lang = get_ddg_lang(traits, params['searxng_locale'])
 
 
-# do search-request
-def request(query, params):
-    # to avoid running actual external requests when testing
-    if 'is_test' not in params:
-        vqd = get_vqd(query, params['headers'])
-    else:
-        vqd = '12345'
+    args = {
+        'q': query,
+        'o': 'json',
+        # 'u': 'bing',
+        'l': eng_region,
+        'vqd': get_vqd(query, params["headers"]),
+    }
 
 
-    offset = (params['pageno'] - 1) * 50
+    if params['pageno'] > 1:
+        args['s'] = (params['pageno'] - 1) * 100
 
 
-    safesearch = params['safesearch'] - 1
+    params['cookies']['ad'] = eng_lang  # zh_CN
+    params['cookies']['ah'] = eng_region  # "us-en,de-de"
+    params['cookies']['l'] = eng_region  # "hk-tzh"
+    logger.debug("cookies: %s", params['cookies'])
 
 
-    region_code = get_region_code(params['language'], lang_list=supported_languages)
-    if region_code:
-        params['url'] = images_url.format(
-            query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd
-        )
-    else:
-        params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
+    safe_search = safesearch_cookies.get(params['safesearch'])
+    if safe_search is not None:
+        params['cookies']['p'] = safe_search  # "-2", "1"
+    safe_search = safesearch_args.get(params['safesearch'])
+    if safe_search is not None:
+        args['p'] = safe_search  # "-1", "1"
+
+    args = urlencode(args)
+    params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,')
+
+    params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01'
+    params['headers']['Referer'] = 'https://duckduckgo.com/'
+    params['headers']['X-Requested-With'] = 'XMLHttpRequest'
+    logger.debug("headers: %s", params['headers'])
 
 
     return params
     return params
 
 
 
 
-# get response from search-request
 def response(resp):
 def response(resp):
     results = []
     results = []
+    res_json = resp.json()
 
 
-    content = resp.text
-    res_json = loads(content)
-
-    # parse results
     for result in res_json['results']:
     for result in res_json['results']:
-        title = result['title']
-        url = result['url']
-        thumbnail = result['thumbnail']
-        image = result['image']
-
-        # append result
         results.append(
         results.append(
             {
             {
                 'template': 'images.html',
                 'template': 'images.html',
-                'title': title,
+                'title': result['title'],
                 'content': '',
                 'content': '',
-                'thumbnail_src': thumbnail,
-                'img_src': image,
-                'url': url,
+                'thumbnail_src': result['thumbnail'],
+                'img_src': result['image'],
+                'url': result['url'],
+                'img_format': '%s x %s' % (result['width'], result['height']),
+                'source': result['source'],
             }
             }
         )
         )
 
 

+ 31 - 4
searx/engines/duckduckgo_weather.py

@@ -1,13 +1,29 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""DuckDuckGo Weather"""
+"""
+DuckDuckGo Weather
+~~~~~~~~~~~~~~~~~~
+"""
 
 
+from typing import TYPE_CHECKING
 from json import loads
 from json import loads
 from urllib.parse import quote
 from urllib.parse import quote
 
 
 from datetime import datetime
 from datetime import datetime
 from flask_babel import gettext
 from flask_babel import gettext
 
 
+from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
+from searx.engines.duckduckgo import get_ddg_lang
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
+
 about = {
 about = {
     "website": 'https://duckduckgo.com/',
     "website": 'https://duckduckgo.com/',
     "wikidata_id": 'Q12805',
     "wikidata_id": 'Q12805',
@@ -17,9 +33,11 @@ about = {
     "results": "JSON",
     "results": "JSON",
 }
 }
 
 
-categories = ["others"]
+send_accept_language_header = True
 
 
-url = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
+# engine dependent config
+categories = ["others"]
+URL = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
 
 
 
 
 def generate_condition_table(condition):
 def generate_condition_table(condition):
@@ -72,8 +90,17 @@ def generate_day_table(day):
 
 
 
 
 def request(query, params):
 def request(query, params):
-    params["url"] = url.format(query=quote(query), lang=params['language'].split('-')[0])
 
 
+    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
+    eng_lang = get_ddg_lang(traits, params['searxng_locale'])
+
+    # !ddw paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
+    params['cookies']['ad'] = eng_lang
+    params['cookies']['ah'] = eng_region
+    params['cookies']['l'] = eng_region
+    logger.debug("cookies: %s", params['cookies'])
+
+    params["url"] = URL.format(query=quote(query), lang=eng_lang.split('_')[0])
     return params
     return params
 
 
 
 

+ 3 - 3
searx/engines/gentoo.py

@@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org'
 # xpath queries
 # xpath queries
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
+xpath_content = './/div[@class="searchresult"]'
 
 
 
 
 # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
 # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
@@ -77,8 +78,6 @@ main_langs = {
     'uk': 'Українська',
     'uk': 'Українська',
     'zh': '简体中文',
     'zh': '简体中文',
 }
 }
-supported_languages = dict(lang_urls, **main_langs)
-
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
@@ -118,7 +117,8 @@ def response(resp):
         link = result.xpath(xpath_link)[0]
         link = result.xpath(xpath_link)[0]
         href = urljoin(base_url, link.attrib.get('href'))
         href = urljoin(base_url, link.attrib.get('href'))
         title = extract_text(link)
         title = extract_text(link)
+        content = extract_text(result.xpath(xpath_content))
 
 
-        results.append({'url': href, 'title': title})
+        results.append({'url': href, 'title': title, 'content': content})
 
 
     return results
     return results

+ 305 - 189
searx/engines/google.py

@@ -1,34 +1,39 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""This is the implementation of the google WEB engine.  Some of this
-implementations are shared by other engines:
+"""This is the implementation of the Google WEB engine.  Some of this
+implementations (manly the :py:obj:`get_google_info`) are shared by other
+engines:
 
 
 - :ref:`google images engine`
 - :ref:`google images engine`
 - :ref:`google news engine`
 - :ref:`google news engine`
 - :ref:`google videos engine`
 - :ref:`google videos engine`
-
-The google WEB engine itself has a special setup option:
-
-.. code:: yaml
-
-  - name: google
-    ...
-    use_mobile_ui: false
-
-``use_mobile_ui``: (default: ``false``)
-  Enables to use *mobile endpoint* to bypass the google blocking (see
-  :issue:`159`).  On the mobile UI of Google Search, the button :guilabel:`More
-  results` is not affected by Google rate limiting and we can still do requests
-  while actively blocked by the original Google search.  By activate
-  ``use_mobile_ui`` this behavior is simulated by adding the parameter
-  ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`.
+- :ref:`google scholar engine`
+- :ref:`google autocomplete`
 
 
 """
 """
 
 
+from typing import TYPE_CHECKING
+
+import re
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from lxml import html
 from lxml import html
-from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
+import babel
+import babel.core
+import babel.languages
+
+from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
+from searx.locales import language_tag, region_tag, get_offical_locales
+from searx import network
 from searx.exceptions import SearxEngineCaptchaException
 from searx.exceptions import SearxEngineCaptchaException
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 
 
 # about
 # about
 about = {
 about = {
@@ -45,64 +50,6 @@ categories = ['general', 'web']
 paging = True
 paging = True
 time_range_support = True
 time_range_support = True
 safesearch = True
 safesearch = True
-send_accept_language_header = True
-use_mobile_ui = False
-supported_languages_url = 'https://www.google.com/preferences?#languages'
-
-# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
-google_domains = {
-    'BG': 'google.bg',  # Bulgaria
-    'CZ': 'google.cz',  # Czech Republic
-    'DE': 'google.de',  # Germany
-    'DK': 'google.dk',  # Denmark
-    'AT': 'google.at',  # Austria
-    'CH': 'google.ch',  # Switzerland
-    'GR': 'google.gr',  # Greece
-    'AU': 'google.com.au',  # Australia
-    'CA': 'google.ca',  # Canada
-    'GB': 'google.co.uk',  # United Kingdom
-    'ID': 'google.co.id',  # Indonesia
-    'IE': 'google.ie',  # Ireland
-    'IN': 'google.co.in',  # India
-    'MY': 'google.com.my',  # Malaysia
-    'NZ': 'google.co.nz',  # New Zealand
-    'PH': 'google.com.ph',  # Philippines
-    'SG': 'google.com.sg',  # Singapore
-    'US': 'google.com',  # United States (google.us) redirects to .com
-    'ZA': 'google.co.za',  # South Africa
-    'AR': 'google.com.ar',  # Argentina
-    'CL': 'google.cl',  # Chile
-    'ES': 'google.es',  # Spain
-    'MX': 'google.com.mx',  # Mexico
-    'EE': 'google.ee',  # Estonia
-    'FI': 'google.fi',  # Finland
-    'BE': 'google.be',  # Belgium
-    'FR': 'google.fr',  # France
-    'IL': 'google.co.il',  # Israel
-    'HR': 'google.hr',  # Croatia
-    'HU': 'google.hu',  # Hungary
-    'IT': 'google.it',  # Italy
-    'JP': 'google.co.jp',  # Japan
-    'KR': 'google.co.kr',  # South Korea
-    'LT': 'google.lt',  # Lithuania
-    'LV': 'google.lv',  # Latvia
-    'NO': 'google.no',  # Norway
-    'NL': 'google.nl',  # Netherlands
-    'PL': 'google.pl',  # Poland
-    'BR': 'google.com.br',  # Brazil
-    'PT': 'google.pt',  # Portugal
-    'RO': 'google.ro',  # Romania
-    'RU': 'google.ru',  # Russia
-    'SK': 'google.sk',  # Slovakia
-    'SI': 'google.si',  # Slovenia
-    'SE': 'google.se',  # Sweden
-    'TH': 'google.co.th',  # Thailand
-    'TR': 'google.com.tr',  # Turkey
-    'UA': 'google.com.ua',  # Ukraine
-    'CN': 'google.com.hk',  # There is no google.cn, we use .com.hk for zh-CN
-    'HK': 'google.com.hk',  # Hong Kong
-    'TW': 'google.com.tw',  # Taiwan
-}
 
 
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 
 
@@ -112,50 +59,50 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
 # specific xpath variables
 # specific xpath variables
 # ------------------------
 # ------------------------
 
 
-results_xpath = './/div[@data-sokoban-container]'
+results_xpath = './/div[contains(@jscontroller, "SC7lYd")]'
 title_xpath = './/a/h3[1]'
 title_xpath = './/a/h3[1]'
 href_xpath = './/a[h3]/@href'
 href_xpath = './/a[h3]/@href'
-content_xpath = './/div[@data-content-feature=1]'
-
-# google *sections* are no usual *results*, we ignore them
-g_section_with_header = './g-section-with-header'
-
+content_xpath = './/div[@data-sncf]'
 
 
 # Suggestions are links placed in a *card-section*, we extract only the text
 # Suggestions are links placed in a *card-section*, we extract only the text
 # from the links not the links itself.
 # from the links not the links itself.
 suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
 suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
 
 
+# UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for
+#                                    # celebrities like '!google natasha allegri'
+#                                    # or '!google chris evans'
+UI_ASYNC = 'use_ac:true,_fmt:prog'
+"""Format of the response from UI's async request."""
+
 
 
-def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
-    """Composing various language properties for the google engines.
+def get_google_info(params, eng_traits):
+    """Composing various (language) properties for the google engines (:ref:`google
+    API`).
 
 
     This function is called by the various google engines (:ref:`google web
     This function is called by the various google engines (:ref:`google web
     engine`, :ref:`google images engine`, :ref:`google news engine` and
     engine`, :ref:`google images engine`, :ref:`google news engine` and
     :ref:`google videos engine`).
     :ref:`google videos engine`).
 
 
-    :param dict param: request parameters of the engine
-
-    :param list lang_list: list of supported languages of the engine
-        :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
-
-    :param dict lang_list: custom aliases for non standard language codes
-        (used when calling :py:func:`searx.utils.match_language`)
+    :param dict param: Request parameters of the engine.  At least
+        a ``searxng_locale`` key should be in the dictionary.
 
 
-    :param bool supported_any_language: When a language is not specified, the
-        language interpretation is left up to Google to decide how the search
-        results should be delivered.  This argument is ``True`` for the google
-        engine and ``False`` for the other engines (google-images, -news,
-        -scholar, -videos).
+    :param eng_traits: Engine's traits fetched from google preferences
+        (:py:obj:`searx.enginelib.traits.EngineTraits`)
 
 
     :rtype: dict
     :rtype: dict
     :returns:
     :returns:
         Py-Dictionary with the key/value pairs:
         Py-Dictionary with the key/value pairs:
 
 
         language:
         language:
-            Return value from :py:func:`searx.utils.match_language`
+            The language code that is used by google (e.g. ``lang_en`` or
+            ``lang_zh-TW``)
 
 
         country:
         country:
-            The country code (e.g. US, AT, CA, FR, DE ..)
+            The country code that is used by google (e.g. ``US`` or ``TW``)
+
+        locale:
+            A instance of :py:obj:`babel.core.Locale` build from the
+            ``searxng_locale`` value.
 
 
         subdomain:
         subdomain:
             Google subdomain :py:obj:`google_domains` that fits to the country
             Google subdomain :py:obj:`google_domains` that fits to the country
@@ -165,52 +112,67 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
             Py-Dictionary with additional request arguments (can be passed to
             Py-Dictionary with additional request arguments (can be passed to
             :py:func:`urllib.parse.urlencode`).
             :py:func:`urllib.parse.urlencode`).
 
 
+            - ``hl`` parameter: specifies the interface language of user interface.
+            - ``lr`` parameter: restricts search results to documents written in
+              a particular language.
+            - ``cr`` parameter: restricts search results to documents
+              originating in a particular country.
+            - ``ie`` parameter: sets the character encoding scheme that should
+              be used to interpret the query string ('utf8').
+            - ``oe`` parameter: sets the character encoding scheme that should
+              be used to decode the XML result ('utf8').
+
         headers:
         headers:
             Py-Dictionary with additional HTTP headers (can be passed to
             Py-Dictionary with additional HTTP headers (can be passed to
             request's headers)
             request's headers)
+
+            - ``Accept: '*/*``
+
     """
     """
+
     ret_val = {
     ret_val = {
         'language': None,
         'language': None,
         'country': None,
         'country': None,
         'subdomain': None,
         'subdomain': None,
         'params': {},
         'params': {},
         'headers': {},
         'headers': {},
+        'cookies': {},
+        'locale': None,
     }
     }
 
 
-    # language ...
+    sxng_locale = params.get('searxng_locale', 'all')
+    try:
+        locale = babel.Locale.parse(sxng_locale, sep='-')
+    except babel.core.UnknownLocaleError:
+        locale = None
 
 
-    _lang = params['language']
-    _any_language = _lang.lower() == 'all'
-    if _any_language:
-        _lang = 'en-US'
-    language = match_language(_lang, lang_list, custom_aliases)
-    ret_val['language'] = language
+    eng_lang = eng_traits.get_language(sxng_locale, 'lang_en')
+    lang_code = eng_lang.split('_')[-1]  # lang_zh-TW --> zh-TW / lang_en --> en
+    country = eng_traits.get_region(sxng_locale, eng_traits.all_locale)
 
 
-    # country ...
+    # Test zh_hans & zh_hant --> in the topmost links in the result list of list
+    # TW and HK you should a find wiktionary.org zh_hant link.  In the result
+    # list of zh-CN should not be no hant link instead you should find
+    # zh.m.wikipedia.org/zh somewhere in the top.
 
 
-    _l = _lang.split('-')
-    if len(_l) == 2:
-        country = _l[1]
-    else:
-        country = _l[0].upper()
-        if country == 'EN':
-            country = 'US'
-    ret_val['country'] = country
-
-    # subdomain ...
-
-    ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com')
-
-    # params & headers
+    # '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5
+    # '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5
 
 
-    lang_country = '%s-%s' % (language, country)  # (en-US, en-EN, de-DE, de-AU, fr-FR ..)
+    ret_val['language'] = eng_lang
+    ret_val['country'] = country
+    ret_val['locale'] = locale
+    ret_val['subdomain'] = eng_traits.custom['supported_domains'].get(country.upper(), 'www.google.com')
 
 
     # hl parameter:
     # hl parameter:
-    #   https://developers.google.com/custom-search/docs/xml_results#hlsp The
-    # Interface Language:
+    #   The hl parameter specifies the interface language (host language) of
+    #   your user interface. To improve the performance and the quality of your
+    #   search results, you are strongly encouraged to set this parameter
+    #   explicitly.
+    #   https://developers.google.com/custom-search/docs/xml_results#hlsp
+    # The Interface Language:
     #   https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
     #   https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
 
 
-    ret_val['params']['hl'] = lang_list.get(lang_country, language)
+    ret_val['params']['hl'] = lang_code
 
 
     # lr parameter:
     # lr parameter:
     #   The lr (language restrict) parameter restricts search results to
     #   The lr (language restrict) parameter restricts search results to
@@ -218,22 +180,72 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
     #   https://developers.google.com/custom-search/docs/xml_results#lrsp
     #   https://developers.google.com/custom-search/docs/xml_results#lrsp
     #   Language Collection Values:
     #   Language Collection Values:
     #   https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections
     #   https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections
+    #
+    # To select 'all' languages an empty 'lr' value is used.
+    #
+    # Different to other google services, Google Schloar supports to select more
+    # than one language. The languages are seperated by a pipe '|' (logical OR).
+    # By example: &lr=lang_zh-TW%7Clang_de selects articles written in
+    # traditional chinese OR german language.
 
 
-    if _any_language and supported_any_language:
+    ret_val['params']['lr'] = eng_lang
+    if sxng_locale == 'all':
+        ret_val['params']['lr'] = ''
 
 
-        # interpretation is left up to Google (based on whoogle)
-        #
-        # - add parameter ``source=lnt``
-        # - don't use parameter ``lr``
-        # - don't add a ``Accept-Language`` HTTP header.
+    # cr parameter:
+    #   The cr parameter restricts search results to documents originating in a
+    #   particular country.
+    #   https://developers.google.com/custom-search/docs/xml_results#crsp
 
 
-        ret_val['params']['source'] = 'lnt'
+    ret_val['params']['cr'] = 'country' + country
+    if sxng_locale == 'all':
+        ret_val['params']['cr'] = ''
 
 
-    else:
+    # gl parameter: (mandatory by Geeogle News)
+    #   The gl parameter value is a two-letter country code. For WebSearch
+    #   results, the gl parameter boosts search results whose country of origin
+    #   matches the parameter value. See the Country Codes section for a list of
+    #   valid values.
+    #   Specifying a gl parameter value in WebSearch requests should improve the
+    #   relevance of results. This is particularly true for international
+    #   customers and, even more specifically, for customers in English-speaking
+    #   countries other than the United States.
+    #   https://developers.google.com/custom-search/docs/xml_results#glsp
+
+    ret_val['params']['gl'] = country
+
+    # ie parameter:
+    #   The ie parameter sets the character encoding scheme that should be used
+    #   to interpret the query string. The default ie value is latin1.
+    #   https://developers.google.com/custom-search/docs/xml_results#iesp
+
+    ret_val['params']['ie'] = 'utf8'
 
 
-        # restricts search results to documents written in a particular
-        # language.
-        ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
+    # oe parameter:
+    #   The oe parameter sets the character encoding scheme that should be used
+    #   to decode the XML result. The default oe value is latin1.
+    #   https://developers.google.com/custom-search/docs/xml_results#oesp
+
+    ret_val['params']['oe'] = 'utf8'
+
+    # num parameter:
+    #   The num parameter identifies the number of search results to return.
+    #   The default num value is 10, and the maximum value is 20. If you request
+    #   more than 20 results, only 20 results will be returned.
+    #   https://developers.google.com/custom-search/docs/xml_results#numsp
+
+    # HINT: seems to have no effect (tested in google WEB & Images)
+    # ret_val['params']['num'] = 20
+
+    # HTTP headers
+
+    ret_val['headers']['Accept'] = '*/*'
+
+    # Cookies
+
+    # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
+    # - https://github.com/searxng/searxng/issues/1555
+    ret_val['cookies']['CONSENT'] = "YES+"
 
 
     return ret_val
     return ret_val
 
 
@@ -245,33 +257,34 @@ def detect_google_sorry(resp):
 
 
 def request(query, params):
 def request(query, params):
     """Google search request"""
     """Google search request"""
-
+    # pylint: disable=line-too-long
     offset = (params['pageno'] - 1) * 10
     offset = (params['pageno'] - 1) * 10
-
-    lang_info = get_lang_info(params, supported_languages, language_aliases, True)
-
-    additional_parameters = {}
-    if use_mobile_ui:
-        additional_parameters = {
-            'asearch': 'arc',
-            'async': 'use_ac:true,_fmt:prog',
-        }
+    google_info = get_google_info(params, traits)
 
 
     # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
     # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
     query_url = (
     query_url = (
         'https://'
         'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
         + '/search'
         + '/search'
         + "?"
         + "?"
         + urlencode(
         + urlencode(
             {
             {
                 'q': query,
                 'q': query,
-                **lang_info['params'],
-                'ie': "utf8",
-                'oe': "utf8",
-                'start': offset,
+                **google_info['params'],
                 'filter': '0',
                 'filter': '0',
-                **additional_parameters,
+                'start': offset,
+                # 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i',
+                # 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG',
+                # 'cs' : 1,
+                # 'sa': 'N',
+                # 'yv': 3,
+                # 'prmd': 'vin',
+                # 'ei': 'GASaY6TxOcy_xc8PtYeY6AE',
+                # 'sa': 'N',
+                # 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg'
+                # formally known as use_mobile_ui
+                'asearch': 'arc',
+                'async': UI_ASYNC,
             }
             }
         )
         )
     )
     )
@@ -282,25 +295,38 @@ def request(query, params):
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
     params['url'] = query_url
     params['url'] = query_url
 
 
-    params['cookies']['CONSENT'] = "YES+"
-    params['headers'].update(lang_info['headers'])
-    if use_mobile_ui:
-        params['headers']['Accept'] = '*/*'
-    else:
-        params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
-
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
     return params
 
 
 
 
+# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;
+# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
+RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);')
+
+
+def _parse_data_images(dom):
+    data_image_map = {}
+    for img_id, data_image in RE_DATA_IMAGE.findall(dom.text_content()):
+        end_pos = data_image.rfind('=')
+        if end_pos > 0:
+            data_image = data_image[: end_pos + 1]
+        data_image_map[img_id] = data_image
+    logger.debug('data:image objects --> %s', list(data_image_map.keys()))
+    return data_image_map
+
+
 def response(resp):
 def response(resp):
     """Get response from google's search request"""
     """Get response from google's search request"""
-
+    # pylint: disable=too-many-branches, too-many-statements
     detect_google_sorry(resp)
     detect_google_sorry(resp)
 
 
     results = []
     results = []
 
 
     # convert the text to dom
     # convert the text to dom
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
+    data_image_map = _parse_data_images(dom)
+
     # results --> answer
     # results --> answer
     answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
     answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
     if answer_list:
     if answer_list:
@@ -309,25 +335,9 @@ def response(resp):
     else:
     else:
         logger.debug("did not find 'answer'")
         logger.debug("did not find 'answer'")
 
 
-        # results --> number_of_results
-        if not use_mobile_ui:
-            try:
-                _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0)
-                _digit = ''.join([n for n in _txt if n.isdigit()])
-                number_of_results = int(_digit)
-                results.append({'number_of_results': number_of_results})
-            except Exception as e:  # pylint: disable=broad-except
-                logger.debug("did not 'number_of_results'")
-                logger.error(e, exc_info=True)
-
     # parse results
     # parse results
 
 
-    for result in eval_xpath_list(dom, results_xpath):
-
-        # google *sections*
-        if extract_text(eval_xpath(result, g_section_with_header)):
-            logger.debug("ignoring <g-section-with-header>")
-            continue
+    for result in eval_xpath_list(dom, results_xpath):  # pylint: disable=too-many-nested-blocks
 
 
         try:
         try:
             title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
             title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
@@ -336,16 +346,30 @@ def response(resp):
                 logger.debug('ignoring item from the result_xpath list: missing title')
                 logger.debug('ignoring item from the result_xpath list: missing title')
                 continue
                 continue
             title = extract_text(title_tag)
             title = extract_text(title_tag)
+
             url = eval_xpath_getindex(result, href_xpath, 0, None)
             url = eval_xpath_getindex(result, href_xpath, 0, None)
             if url is None:
             if url is None:
+                logger.debug('ignoring item from the result_xpath list: missing url of title "%s"', title)
                 continue
                 continue
-            content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
-            if content is None:
+
+            content_nodes = eval_xpath(result, content_xpath)
+            content = extract_text(content_nodes)
+
+            if not content:
                 logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
                 logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
                 continue
                 continue
 
 
-            logger.debug('add link to results: %s', title)
-            results.append({'url': url, 'title': title, 'content': content})
+            img_src = content_nodes[0].xpath('.//img/@src')
+            if img_src:
+                img_src = img_src[0]
+                if img_src.startswith('data:image'):
+                    img_id = content_nodes[0].xpath('.//img/@id')
+                    if img_id:
+                        img_src = data_image_map.get(img_id[0])
+            else:
+                img_src = None
+
+            results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src})
 
 
         except Exception as e:  # pylint: disable=broad-except
         except Exception as e:  # pylint: disable=broad-except
             logger.error(e, exc_info=True)
             logger.error(e, exc_info=True)
@@ -361,15 +385,107 @@ def response(resp):
 
 
 
 
 # get supported languages from their site
 # get supported languages from their site
-def _fetch_supported_languages(resp):
-    ret_val = {}
+
+
+skip_countries = [
+    # official language of google-country not in google-languages
+    'AL',  # Albanien (sq)
+    'AZ',  # Aserbaidschan  (az)
+    'BD',  # Bangladesch (bn)
+    'BN',  # Brunei Darussalam (ms)
+    'BT',  # Bhutan (dz)
+    'ET',  # Äthiopien (am)
+    'GE',  # Georgien (ka, os)
+    'GL',  # Grönland (kl)
+    'KH',  # Kambodscha (km)
+    'LA',  # Laos (lo)
+    'LK',  # Sri Lanka (si, ta)
+    'ME',  # Montenegro (sr)
+    'MK',  # Nordmazedonien (mk, sq)
+    'MM',  # Myanmar (my)
+    'MN',  # Mongolei (mn)
+    'MV',  # Malediven (dv) // dv_MV is unknown by babel
+    'MY',  # Malaysia (ms)
+    'NP',  # Nepal (ne)
+    'TJ',  # Tadschikistan (tg)
+    'TM',  # Turkmenistan (tk)
+    'UZ',  # Usbekistan (uz)
+]
+
+
+def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
+    """Fetch languages from Google."""
+    # pylint: disable=import-outside-toplevel, too-many-branches
+
+    engine_traits.custom['supported_domains'] = {}
+
+    resp = network.get('https://www.google.com/preferences')
+    if not resp.ok:
+        raise RuntimeError("Response from Google's preferences is not OK.")
+
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
 
 
-    radio_buttons = eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]')
+    # supported language codes
 
 
-    for x in radio_buttons:
-        name = x.get("data-name")
-        code = x.get("value").split('_')[-1]
-        ret_val[code] = {"name": name}
+    lang_map = {'no': 'nb'}
+    for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'):
 
 
-    return ret_val
+        eng_lang = x.get("value").split('_')[-1]
+        try:
+            locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
+        except babel.UnknownLocaleError:
+            print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
+            continue
+        sxng_lang = language_tag(locale)
+
+        conflict = engine_traits.languages.get(sxng_lang)
+        if conflict:
+            if conflict != eng_lang:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
+            continue
+        engine_traits.languages[sxng_lang] = 'lang_' + eng_lang
+
+    # alias languages
+    engine_traits.languages['zh'] = 'lang_zh-CN'
+
+    # supported region codes
+
+    for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'):
+        eng_country = x.get("value")
+
+        if eng_country in skip_countries:
+            continue
+        if eng_country == 'ZZ':
+            engine_traits.all_locale = 'ZZ'
+            continue
+
+        sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True)
+
+        if not sxng_locales:
+            print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country))
+            continue
+
+        for sxng_locale in sxng_locales:
+            engine_traits.regions[region_tag(sxng_locale)] = eng_country
+
+    # alias regions
+    engine_traits.regions['zh-CN'] = 'HK'
+
+    # supported domains
+
+    if add_domains:
+        resp = network.get('https://www.google.com/supported_domains')
+        if not resp.ok:
+            raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.")
+
+        for domain in resp.text.split():
+            domain = domain.strip()
+            if not domain or domain in [
+                '.google.com',
+            ]:
+                continue
+            region = domain.split('.')[-1].upper()
+            engine_traits.custom['supported_domains'][region] = 'www' + domain
+            if region == 'HK':
+                # There is no google.cn, we use .com.hk for zh-CN
+                engine_traits.custom['supported_domains']['CN'] = 'www' + domain

+ 28 - 21
searx/engines/google_images.py

@@ -1,31 +1,38 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""This is the implementation of the google images engine using the google
-internal API used the Google Go Android app.
+"""This is the implementation of the Google Images engine using the internal
+Google API used by the Google Go Android app.
 
 
 This internal API offer results in
 This internal API offer results in
 
 
-- JSON (_fmt:json)
-- Protobuf (_fmt:pb)
-- Protobuf compressed? (_fmt:pc)
-- HTML (_fmt:html)
-- Protobuf encoded in JSON (_fmt:jspb).
+- JSON (``_fmt:json``)
+- Protobuf_ (``_fmt:pb``)
+- Protobuf_ compressed? (``_fmt:pc``)
+- HTML (``_fmt:html``)
+- Protobuf_ encoded in JSON (``_fmt:jspb``).
 
 
+.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
 """
 """
 
 
+from typing import TYPE_CHECKING
+
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from json import loads
 from json import loads
 
 
+from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
     time_range_dict,
     time_range_dict,
     detect_google_sorry,
     detect_google_sorry,
 )
 )
 
 
-# pylint: disable=unused-import
-from searx.engines.google import supported_languages_url, _fetch_supported_languages
+if TYPE_CHECKING:
+    import logging
+    from searx.enginelib.traits import EngineTraits
+
+    logger: logging.Logger
+    traits: EngineTraits
 
 
-# pylint: enable=unused-import
 
 
 # about
 # about
 about = {
 about = {
@@ -40,7 +47,6 @@ about = {
 # engine dependent config
 # engine dependent config
 categories = ['images', 'web']
 categories = ['images', 'web']
 paging = True
 paging = True
-use_locale_domain = True
 time_range_support = True
 time_range_support = True
 safesearch = True
 safesearch = True
 send_accept_language_header = True
 send_accept_language_header = True
@@ -51,20 +57,18 @@ filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
 def request(query, params):
 def request(query, params):
     """Google-Image search request"""
     """Google-Image search request"""
 
 
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    google_info = get_google_info(params, traits)
 
 
     query_url = (
     query_url = (
         'https://'
         'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
         + '/search'
         + '/search'
         + "?"
         + "?"
         + urlencode(
         + urlencode(
             {
             {
                 'q': query,
                 'q': query,
                 'tbm': "isch",
                 'tbm': "isch",
-                **lang_info['params'],
-                'ie': "utf8",
-                'oe': "utf8",
+                **google_info['params'],
                 'asearch': 'isch',
                 'asearch': 'isch',
                 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
                 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
             }
             }
@@ -77,9 +81,8 @@ def request(query, params):
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
     params['url'] = query_url
     params['url'] = query_url
 
 
-    params['headers'].update(lang_info['headers'])
-    params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip'
-    params['headers']['Accept'] = '*/*'
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
     return params
 
 
 
 
@@ -111,7 +114,11 @@ def response(resp):
 
 
         copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
         copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
         if copyright_notice:
         if copyright_notice:
-            result_item['source'] += ' / ' + copyright_notice
+            result_item['source'] += ' | ' + copyright_notice
+
+        freshness_date = item["result"].get("freshness_date")
+        if freshness_date:
+            result_item['source'] += ' | ' + freshness_date
 
 
         file_size = item.get('gsa', {}).get('file_size')
         file_size = item.get('gsa', {}).get('file_size')
         if file_size:
         if file_size:

+ 199 - 51
searx/engines/google_news.py

@@ -1,24 +1,40 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""This is the implementation of the google news engine.  The google news API
-ignores some parameters from the common :ref:`google API`:
+"""This is the implementation of the Google News engine.
 
 
-- num_ : the number of search results is ignored
+Google News has a different region handling compared to Google WEB.
+
+- the ``ceid`` argument has to be set (:py:obj:`ceid_list`)
+- the hl_ argument has to be set correctly (and different to Google WEB)
+- the gl_ argument is mandatory
+
+If one of this argument is not set correctly, the request is redirected to
+CONSENT dialog::
+
+  https://consent.google.com/m?continue=
+
+The google news API ignores some parameters from the common :ref:`google API`:
+
+- num_ : the number of search results is ignored / there is no paging all
+  results for a query term are in the first response.
 - save_ : is ignored / Google-News results are always *SafeSearch*
 - save_ : is ignored / Google-News results are always *SafeSearch*
 
 
+.. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp
+.. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp
 .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
 .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
 .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
 .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
-
 """
 """
 
 
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
 
 
 import binascii
 import binascii
 import re
 import re
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from base64 import b64decode
 from base64 import b64decode
 from lxml import html
 from lxml import html
+import babel
 
 
+from searx import locales
 from searx.utils import (
 from searx.utils import (
     eval_xpath,
     eval_xpath,
     eval_xpath_list,
     eval_xpath_list,
@@ -26,18 +42,19 @@ from searx.utils import (
     extract_text,
     extract_text,
 )
 )
 
 
-# pylint: disable=unused-import
+from searx.engines.google import fetch_traits as _fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
 from searx.engines.google import (
-    supported_languages_url,
-    _fetch_supported_languages,
+    get_google_info,
+    detect_google_sorry,
 )
 )
+from searx.enginelib.traits import EngineTraits
 
 
-# pylint: enable=unused-import
+if TYPE_CHECKING:
+    import logging
 
 
-from searx.engines.google import (
-    get_lang_info,
-    detect_google_sorry,
-)
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -49,70 +66,77 @@ about = {
     "results": 'HTML',
     "results": 'HTML',
 }
 }
 
 
-# compared to other google engines google-news has a different time range
-# support.  The time range is included in the search term.
-time_range_dict = {
-    'day': 'when:1d',
-    'week': 'when:7d',
-    'month': 'when:1m',
-    'year': 'when:1y',
-}
-
 # engine dependent config
 # engine dependent config
-
 categories = ['news']
 categories = ['news']
 paging = False
 paging = False
-use_locale_domain = True
-time_range_support = True
+time_range_support = False
 
 
 # Google-News results are always *SafeSearch*. Option 'safesearch' is set to
 # Google-News results are always *SafeSearch*. Option 'safesearch' is set to
 # False here, otherwise checker will report safesearch-errors::
 # False here, otherwise checker will report safesearch-errors::
 #
 #
 #  safesearch : results are identitical for safesearch=0 and safesearch=2
 #  safesearch : results are identitical for safesearch=0 and safesearch=2
-safesearch = False
-send_accept_language_header = True
+safesearch = True
+# send_accept_language_header = True
 
 
 
 
 def request(query, params):
 def request(query, params):
     """Google-News search request"""
     """Google-News search request"""
 
 
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    sxng_locale = params.get('searxng_locale', 'en-US')
+    ceid = locales.get_engine_locale(sxng_locale, traits.custom['ceid'], default='US:en')
+    google_info = get_google_info(params, traits)
+    google_info['subdomain'] = 'news.google.com'  # google news has only one domain
 
 
-    # google news has only one domain
-    lang_info['subdomain'] = 'news.google.com'
+    ceid_region, ceid_lang = ceid.split(':')
+    ceid_lang, ceid_suffix = (
+        ceid_lang.split('-')
+        + [
+            None,
+        ]
+    )[:2]
 
 
-    ceid = "%s:%s" % (lang_info['country'], lang_info['language'])
+    google_info['params']['hl'] = ceid_lang
 
 
-    # google news redirects en to en-US
-    if lang_info['params']['hl'] == 'en':
-        lang_info['params']['hl'] = 'en-US'
+    if ceid_suffix and ceid_suffix not in ['Hans', 'Hant']:
 
 
-    # Very special to google-news compared to other google engines, the time
-    # range is included in the search term.
-    if params['time_range']:
-        query += ' ' + time_range_dict[params['time_range']]
+        if ceid_region.lower() == ceid_lang:
+            google_info['params']['hl'] = ceid_lang + '-' + ceid_region
+        else:
+            google_info['params']['hl'] = ceid_lang + '-' + ceid_suffix
+
+    elif ceid_region.lower() != ceid_lang:
+
+        if ceid_region in ['AT', 'BE', 'CH', 'IL', 'SA', 'IN', 'BD', 'PT']:
+            google_info['params']['hl'] = ceid_lang
+        else:
+            google_info['params']['hl'] = ceid_lang + '-' + ceid_region
+
+    google_info['params']['lr'] = 'lang_' + ceid_lang.split('-')[0]
+    google_info['params']['gl'] = ceid_region
 
 
     query_url = (
     query_url = (
         'https://'
         'https://'
-        + lang_info['subdomain']
-        + '/search'
-        + "?"
-        + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'gl': lang_info['country']})
+        + google_info['subdomain']
+        + "/search?"
+        + urlencode(
+            {
+                'q': query,
+                **google_info['params'],
+            }
+        )
+        # ceid includes a ':' character which must not be urlencoded
         + ('&ceid=%s' % ceid)
         + ('&ceid=%s' % ceid)
-    )  # ceid includes a ':' character which must not be urlencoded
-    params['url'] = query_url
-
-    params['cookies']['CONSENT'] = "YES+"
-    params['headers'].update(lang_info['headers'])
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+    )
 
 
+    params['url'] = query_url
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
     return params
 
 
 
 
 def response(resp):
 def response(resp):
     """Get response from google's search request"""
     """Get response from google's search request"""
     results = []
     results = []
-
     detect_google_sorry(resp)
     detect_google_sorry(resp)
 
 
     # convert the text to dom
     # convert the text to dom
@@ -152,8 +176,8 @@ def response(resp):
 
 
         # The pub_date is mostly a string like 'yesertday', not a real
         # The pub_date is mostly a string like 'yesertday', not a real
         # timezone date or time.  Therefore we can't use publishedDate.
         # timezone date or time.  Therefore we can't use publishedDate.
-        pub_date = extract_text(eval_xpath(result, './article/div[1]/div[1]/time'))
-        pub_origin = extract_text(eval_xpath(result, './article/div[1]/div[1]/a'))
+        pub_date = extract_text(eval_xpath(result, './article//time'))
+        pub_origin = extract_text(eval_xpath(result, './article//a[@data-n-tid]'))
 
 
         content = ' / '.join([x for x in [pub_origin, pub_date] if x])
         content = ' / '.join([x for x in [pub_origin, pub_date] if x])
 
 
@@ -174,3 +198,127 @@ def response(resp):
 
 
     # return results
     # return results
     return results
     return results
+
+
+ceid_list = [
+    'AE:ar',
+    'AR:es-419',
+    'AT:de',
+    'AU:en',
+    'BD:bn',
+    'BE:fr',
+    'BE:nl',
+    'BG:bg',
+    'BR:pt-419',
+    'BW:en',
+    'CA:en',
+    'CA:fr',
+    'CH:de',
+    'CH:fr',
+    'CL:es-419',
+    'CN:zh-Hans',
+    'CO:es-419',
+    'CU:es-419',
+    'CZ:cs',
+    'DE:de',
+    'EG:ar',
+    'ES:es',
+    'ET:en',
+    'FR:fr',
+    'GB:en',
+    'GH:en',
+    'GR:el',
+    'HK:zh-Hant',
+    'HU:hu',
+    'ID:en',
+    'ID:id',
+    'IE:en',
+    'IL:en',
+    'IL:he',
+    'IN:bn',
+    'IN:en',
+    'IN:hi',
+    'IN:ml',
+    'IN:mr',
+    'IN:ta',
+    'IN:te',
+    'IT:it',
+    'JP:ja',
+    'KE:en',
+    'KR:ko',
+    'LB:ar',
+    'LT:lt',
+    'LV:en',
+    'LV:lv',
+    'MA:fr',
+    'MX:es-419',
+    'MY:en',
+    'NA:en',
+    'NG:en',
+    'NL:nl',
+    'NO:no',
+    'NZ:en',
+    'PE:es-419',
+    'PH:en',
+    'PK:en',
+    'PL:pl',
+    'PT:pt-150',
+    'RO:ro',
+    'RS:sr',
+    'RU:ru',
+    'SA:ar',
+    'SE:sv',
+    'SG:en',
+    'SI:sl',
+    'SK:sk',
+    'SN:fr',
+    'TH:th',
+    'TR:tr',
+    'TW:zh-Hant',
+    'TZ:en',
+    'UA:ru',
+    'UA:uk',
+    'UG:en',
+    'US:en',
+    'US:es-419',
+    'VE:es-419',
+    'VN:vi',
+    'ZA:en',
+    'ZW:en',
+]
+"""List of region/language combinations supported by Google News.  Values of the
+``ceid`` argument of the Google News REST API."""
+
+
+_skip_values = [
+    'ET:en',  # english (ethiopia)
+    'ID:en',  # english (indonesia)
+    'LV:en',  # english (latvia)
+]
+
+_ceid_locale_map = {'NO:no': 'nb-NO'}
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    _fetch_traits(engine_traits, add_domains=False)
+
+    engine_traits.custom['ceid'] = {}
+
+    for ceid in ceid_list:
+        if ceid in _skip_values:
+            continue
+
+        region, lang = ceid.split(':')
+        x = lang.split('-')
+        if len(x) > 1:
+            if x[1] not in ['Hant', 'Hans']:
+                lang = x[0]
+
+        sxng_locale = _ceid_locale_map.get(ceid, lang + '-' + region)
+        try:
+            locale = babel.Locale.parse(sxng_locale, sep='-')
+        except babel.UnknownLocaleError:
+            print("ERROR: %s -> %s is unknown by babel" % (ceid, sxng_locale))
+            continue
+
+        engine_traits.custom['ceid'][locales.region_tag(locale)] = ceid

+ 63 - 57
searx/engines/google_scholar.py

@@ -1,19 +1,18 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Google (Scholar)
+"""This is the implementation of the Google Scholar engine.
 
 
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.
-
-.. _Query Parameter Definitions:
-   https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
+Compared to other Google services the Scholar engine has a simple GET REST-API
+and there does not exists `async` API.  Even though the API slightly vintage we
+can make use of the :ref:`google API` to assemble the arguments of the GET
+request.
 """
 """
 
 
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
+from typing import Optional
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from datetime import datetime
 from datetime import datetime
-from typing import Optional
 from lxml import html
 from lxml import html
 
 
 from searx.utils import (
 from searx.utils import (
@@ -23,19 +22,21 @@ from searx.utils import (
     extract_text,
     extract_text,
 )
 )
 
 
+from searx.exceptions import SearxEngineCaptchaException
+
+from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
     time_range_dict,
     time_range_dict,
-    detect_google_sorry,
 )
 )
+from searx.enginelib.traits import EngineTraits
 
 
-# pylint: disable=unused-import
-from searx.engines.google import (
-    supported_languages_url,
-    _fetch_supported_languages,
-)
+if TYPE_CHECKING:
+    import logging
 
 
-# pylint: enable=unused-import
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -51,53 +52,62 @@ about = {
 categories = ['science', 'scientific publications']
 categories = ['science', 'scientific publications']
 paging = True
 paging = True
 language_support = True
 language_support = True
-use_locale_domain = True
 time_range_support = True
 time_range_support = True
 safesearch = False
 safesearch = False
 send_accept_language_header = True
 send_accept_language_header = True
 
 
 
 
-def time_range_url(params):
-    """Returns a URL query component for a google-Scholar time range based on
-    ``params['time_range']``.  Google-Scholar does only support ranges in years.
-    To have any effect, all the Searx ranges (*day*, *week*, *month*, *year*)
-    are mapped to *year*.  If no range is set, an empty string is returned.
-    Example::
+def time_range_args(params):
+    """Returns a dictionary with a time range arguments based on
+    ``params['time_range']``.
 
 
-        &as_ylo=2019
-    """
-    # as_ylo=2016&as_yhi=2019
-    ret_val = ''
-    if params['time_range'] in time_range_dict:
-        ret_val = urlencode({'as_ylo': datetime.now().year - 1})
-    return '&' + ret_val
+    Google Scholar supports a detailed search by year.  Searching by *last
+    month* or *last week* (as offered by SearXNG) is uncommon for scientific
+    publications and is not supported by Google Scholar.
 
 
+    To limit the result list when the users selects a range, all the SearXNG
+    ranges (*day*, *week*, *month*, *year*) are mapped to *year*.  If no range
+    is set an empty dictionary of arguments is returned.  Example;  when
+    user selects a time range (current year minus one in 2022):
 
 
-def request(query, params):
-    """Google-Scholar search request"""
+    .. code:: python
 
 
-    offset = (params['pageno'] - 1) * 10
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+        { 'as_ylo' : 2021 }
 
 
-    # subdomain is: scholar.google.xy
-    lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
+    """
+    ret_val = {}
+    if params['time_range'] in time_range_dict:
+        ret_val['as_ylo'] = datetime.now().year - 1
+    return ret_val
 
 
-    query_url = (
-        'https://'
-        + lang_info['subdomain']
-        + '/scholar'
-        + "?"
-        + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start': offset})
-    )
 
 
-    query_url += time_range_url(params)
-    params['url'] = query_url
+def detect_google_captcha(dom):
+    """In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is
+    not redirected to ``sorry.google.com``.
+    """
+    if eval_xpath(dom, "//form[@id='gs_captcha_f']"):
+        raise SearxEngineCaptchaException()
+
 
 
-    params['cookies']['CONSENT'] = "YES+"
-    params['headers'].update(lang_info['headers'])
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+def request(query, params):
+    """Google-Scholar search request"""
 
 
-    # params['google_subdomain'] = subdomain
+    google_info = get_google_info(params, traits)
+    # subdomain is: scholar.google.xy
+    google_info['subdomain'] = google_info['subdomain'].replace("www.", "scholar.")
+
+    args = {
+        'q': query,
+        **google_info['params'],
+        'start': (params['pageno'] - 1) * 10,
+        'as_sdt': '2007',  # include patents / to disable set '0,5'
+        'as_vis': '0',  # include citations / to disable set '1'
+    }
+    args.update(time_range_args(params))
+
+    params['url'] = 'https://' + google_info['subdomain'] + '/scholar?' + urlencode(args)
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
     return params
 
 
 
 
@@ -138,19 +148,15 @@ def parse_gs_a(text: Optional[str]):
 
 
 
 
 def response(resp):  # pylint: disable=too-many-locals
 def response(resp):  # pylint: disable=too-many-locals
-    """Get response from google's search request"""
+    """Parse response from Google Scholar"""
     results = []
     results = []
 
 
-    detect_google_sorry(resp)
-
-    # which subdomain ?
-    # subdomain = resp.search_params.get('google_subdomain')
-
     # convert the text to dom
     # convert the text to dom
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
+    detect_google_captcha(dom)
 
 
     # parse results
     # parse results
-    for result in eval_xpath_list(dom, '//div[@data-cid]'):
+    for result in eval_xpath_list(dom, '//div[@data-rp]'):
 
 
         title = extract_text(eval_xpath(result, './/h3[1]//a'))
         title = extract_text(eval_xpath(result, './/h3[1]//a'))
 
 
@@ -158,7 +164,7 @@ def response(resp):  # pylint: disable=too-many-locals
             # this is a [ZITATION] block
             # this is a [ZITATION] block
             continue
             continue
 
 
-        pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
+        pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
         if pub_type:
         if pub_type:
             pub_type = pub_type[1:-1].lower()
             pub_type = pub_type[1:-1].lower()
 
 

+ 32 - 83
searx/engines/google_videos.py

@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""This is the implementation of the google videos engine.
+"""This is the implementation of the Google Videos engine.
 
 
 .. admonition:: Content-Security-Policy (CSP)
 .. admonition:: Content-Security-Policy (CSP)
 
 
@@ -14,9 +14,8 @@
 
 
 """
 """
 
 
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
 
 
-import re
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 
 
@@ -27,20 +26,22 @@ from searx.utils import (
     extract_text,
     extract_text,
 )
 )
 
 
+from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
     time_range_dict,
     time_range_dict,
     filter_mapping,
     filter_mapping,
-    g_section_with_header,
-    title_xpath,
     suggestion_xpath,
     suggestion_xpath,
     detect_google_sorry,
     detect_google_sorry,
 )
 )
+from searx.enginelib.traits import EngineTraits
 
 
-# pylint: disable=unused-import
-from searx.engines.google import supported_languages_url, _fetch_supported_languages
+if TYPE_CHECKING:
+    import logging
 
 
-# pylint: enable=unused-import
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -55,70 +56,32 @@ about = {
 # engine dependent config
 # engine dependent config
 
 
 categories = ['videos', 'web']
 categories = ['videos', 'web']
-paging = False
+paging = True
 language_support = True
 language_support = True
-use_locale_domain = True
 time_range_support = True
 time_range_support = True
 safesearch = True
 safesearch = True
-send_accept_language_header = True
-
-RE_CACHE = {}
-
-
-def _re(regexpr):
-    """returns compiled regular expression"""
-    RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr))
-    return RE_CACHE[regexpr]
-
-
-def scrap_out_thumbs_src(dom):
-    ret_val = {}
-    thumb_name = 'dimg_'
-    for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
-        _script = script.text
-        # "dimg_35":"https://i.ytimg.c....",
-        _dimurl = _re("s='([^']*)").findall(_script)
-        for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script):
-            v = v.replace(r'\u003d', '=')
-            v = v.replace(r'\u0026', '&')
-            ret_val[k] = v
-    logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
-    return ret_val
-
-
-def scrap_out_thumbs(dom):
-    """Scrap out thumbnail data from <script> tags."""
-    ret_val = {}
-    thumb_name = 'dimg_'
-
-    for script in eval_xpath_list(dom, '//script[contains(., "_setImagesSrc")]'):
-        _script = script.text
-
-        # var s='data:image/jpeg;base64, ...'
-        _imgdata = _re("s='([^']*)").findall(_script)
-        if not _imgdata:
-            continue
-
-        # var ii=['dimg_17']
-        for _vidthumb in _re(r"(%s\d+)" % thumb_name).findall(_script):
-            # At least the equal sign in the URL needs to be decoded
-            ret_val[_vidthumb] = _imgdata[0].replace(r"\x3d", "=")
-
-    logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
-    return ret_val
 
 
 
 
 def request(query, params):
 def request(query, params):
     """Google-Video search request"""
     """Google-Video search request"""
 
 
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    google_info = get_google_info(params, traits)
 
 
     query_url = (
     query_url = (
         'https://'
         'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
         + '/search'
         + '/search'
         + "?"
         + "?"
-        + urlencode({'q': query, 'tbm': "vid", **lang_info['params'], 'ie': "utf8", 'oe': "utf8"})
+        + urlencode(
+            {
+                'q': query,
+                'tbm': "vid",
+                'start': 10 * params['pageno'],
+                **google_info['params'],
+                'asearch': 'arc',
+                'async': 'use_ac:true,_fmt:html',
+            }
+        )
     )
     )
 
 
     if params['time_range'] in time_range_dict:
     if params['time_range'] in time_range_dict:
@@ -127,9 +90,8 @@ def request(query, params):
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
         query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
     params['url'] = query_url
     params['url'] = query_url
 
 
-    params['cookies']['CONSENT'] = "YES+"
-    params['headers'].update(lang_info['headers'])
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+    params['cookies'] = google_info['cookies']
+    params['headers'].update(google_info['headers'])
     return params
     return params
 
 
 
 
@@ -141,43 +103,30 @@ def response(resp):
 
 
     # convert the text to dom
     # convert the text to dom
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
-    vidthumb_imgdata = scrap_out_thumbs(dom)
-    thumbs_src = scrap_out_thumbs_src(dom)
-    logger.debug(str(thumbs_src))
 
 
     # parse results
     # parse results
     for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
     for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
 
 
-        # ignore google *sections*
-        if extract_text(eval_xpath(result, g_section_with_header)):
-            logger.debug("ignoring <g-section-with-header>")
-            continue
-
-        # ingnore articles without an image id / e.g. news articles
-        img_id = eval_xpath_getindex(result, './/g-img/img/@id', 0, default=None)
-        if img_id is None:
-            logger.error("no img_id found in item %s (news article?)", len(results) + 1)
+        img_src = eval_xpath_getindex(result, './/img/@src', 0, None)
+        if img_src is None:
             continue
             continue
 
 
-        img_src = vidthumb_imgdata.get(img_id, None)
-        if not img_src:
-            img_src = thumbs_src.get(img_id, "")
+        title = extract_text(eval_xpath_getindex(result, './/a/h3[1]', 0))
+        url = eval_xpath_getindex(result, './/a/h3[1]/../@href', 0)
 
 
-        title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
-        url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
-        length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span'))
         c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
         c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
         content = extract_text(c_node)
         content = extract_text(c_node)
-        pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]'))
+        pub_info = extract_text(eval_xpath(result, './/div[@class="P7xzyf"]'))
+        length = extract_text(eval_xpath(result, './/div[@class="J1mWY"]'))
 
 
         results.append(
         results.append(
             {
             {
                 'url': url,
                 'url': url,
                 'title': title,
                 'title': title,
                 'content': content,
                 'content': content,
-                'length': length,
                 'author': pub_info,
                 'author': pub_info,
                 'thumbnail': img_src,
                 'thumbnail': img_src,
+                'length': length,
                 'template': 'videos.html',
                 'template': 'videos.html',
             }
             }
         )
         )

+ 148 - 47
searx/engines/peertube.py

@@ -1,18 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- peertube (Videos)
+# lint: pylint
+"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
+(more or less) the same REST API and the schema of the JSON result is identical.
+
 """
 """
 
 
-from json import loads
-from datetime import datetime
+import re
 from urllib.parse import urlencode
 from urllib.parse import urlencode
+from datetime import datetime
+from dateutil.parser import parse
+from dateutil.relativedelta import relativedelta
+
+import babel
+
+from searx import network
+from searx.locales import language_tag
 from searx.utils import html_to_text
 from searx.utils import html_to_text
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
 
-# about
 about = {
 about = {
+    # pylint: disable=line-too-long
     "website": 'https://joinpeertube.org',
     "website": 'https://joinpeertube.org',
     "wikidata_id": 'Q50938515',
     "wikidata_id": 'Q50938515',
-    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
     "use_official_api": True,
     "use_official_api": True,
     "require_api_key": False,
     "require_api_key": False,
     "results": 'JSON',
     "results": 'JSON',
@@ -22,66 +34,155 @@ about = {
 categories = ["videos"]
 categories = ["videos"]
 paging = True
 paging = True
 base_url = "https://peer.tube"
 base_url = "https://peer.tube"
-supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
+"""Base URL of the Peertube instance.  A list of instances is available at:
+
+- https://instances.joinpeertube.org/instances
+"""
+
+time_range_support = True
+time_range_table = {
+    'day': relativedelta(),
+    'week': relativedelta(weeks=-1),
+    'month': relativedelta(months=-1),
+    'year': relativedelta(years=-1),
+}
+
+safesearch = True
+safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
+
+
+def minute_to_hm(minute):
+    if isinstance(minute, int):
+        return "%d:%02d" % (divmod(minute, 60))
+    return None
 
 
 
 
-# do search-request
 def request(query, params):
 def request(query, params):
-    sanitized_url = base_url.rstrip("/")
-    pageno = (params["pageno"] - 1) * 15
-    search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}"
-    query_dict = {"search": query}
-    language = params["language"].split("-")[0]
-    if "all" != language and language in supported_languages:
-        query_dict["languageOneOf"] = language
-    params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
-    return params
+    """Assemble request for the Peertube API"""
+
+    if not query:
+        return False
+
+    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], None)
+
+    params['url'] = (
+        base_url.rstrip("/")
+        + "/api/v1/search/videos?"
+        + urlencode(
+            {
+                'search': query,
+                'searchTarget': 'search-index',  # Vidiversum
+                'resultType': 'videos',
+                'start': (params['pageno'] - 1) * 10,
+                'count': 10,
+                # -createdAt: sort by date ascending / createdAt: date descending
+                'sort': '-match',  # sort by *match descending*
+                'nsfw': safesearch_table[params['safesearch']],
+            }
+        )
+    )
+
+    if eng_lang is not None:
+        params['url'] += '&languageOneOf[]=' + eng_lang
+        params['url'] += '&boostLanguages[]=' + eng_lang
 
 
+    if params['time_range'] in time_range_table:
+        time = datetime.now().date() + time_range_table[params['time_range']]
+        params['url'] += '&startDate=' + time.isoformat()
 
 
-def _get_offset_from_pageno(pageno):
-    return (pageno - 1) * 15 + 1
+    return params
 
 
 
 
-# get response from search-request
 def response(resp):
 def response(resp):
-    sanitized_url = base_url.rstrip("/")
+    return video_response(resp)
+
+
+def video_response(resp):
+    """Parse video response from SepiaSearch and Peertube instances."""
     results = []
     results = []
 
 
-    search_res = loads(resp.text)
+    json_data = resp.json()
 
 
-    # return empty array if there are no results
-    if "data" not in search_res:
+    if 'data' not in json_data:
         return []
         return []
 
 
-    # parse results
-    for res in search_res["data"]:
-        title = res["name"]
-        url = sanitized_url + "/videos/watch/" + res["uuid"]
-        description = res["description"]
-        if description:
-            content = html_to_text(res["description"])
-        else:
-            content = ""
-        thumbnail = sanitized_url + res["thumbnailPath"]
-        publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+    for result in json_data['data']:
+        metadata = [
+            x
+            for x in [
+                result.get('channel', {}).get('displayName'),
+                result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
+                ', '.join(result.get('tags', [])),
+            ]
+            if x
+        ]
 
 
         results.append(
         results.append(
             {
             {
-                "template": "videos.html",
-                "url": url,
-                "title": title,
-                "content": content,
-                "publishedDate": publishedDate,
-                "iframe_src": sanitized_url + res["embedPath"],
-                "thumbnail": thumbnail,
+                'url': result['url'],
+                'title': result['name'],
+                'content': html_to_text(result.get('description') or ''),
+                'author': result.get('account', {}).get('displayName'),
+                'length': minute_to_hm(result.get('duration')),
+                'template': 'videos.html',
+                'publishedDate': parse(result['publishedAt']),
+                'iframe_src': result.get('embedUrl'),
+                'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
+                'metadata': ' | '.join(metadata),
             }
             }
         )
         )
 
 
-    # return results
     return results
     return results
 
 
 
 
-def _fetch_supported_languages(resp):
-    videolanguages = resp.json()
-    peertube_languages = list(videolanguages.keys())
-    return peertube_languages
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from peertube's search-index source code.
+
+    See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
+
+    .. _8ed5c729 - Refactor and redesign client:
+       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
+    .. _videoLanguages:
+       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
+    """
+
+    resp = network.get(
+        'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
+        # the response from search-index repository is very slow
+        timeout=60,
+    )
+
+    if not resp.ok:
+        print("ERROR: response from peertube is not OK.")
+        return
+
+    js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
+    if not js_lang:
+        print("ERROR: can't determine languages from peertube")
+        return
+
+    for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
+        try:
+            eng_tag = lang.group(1)
+            if eng_tag == 'oc':
+                # Occitanis not known by babel, its closest relative is Catalan
+                # but 'ca' is already in the list of engine_traits.languages -->
+                # 'oc' will be ignored.
+                continue
+
+            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
+
+        except babel.UnknownLocaleError:
+            print("ERROR: %s is unknown by babel" % eng_tag)
+            continue
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.languages[sxng_tag] = eng_tag
+
+    engine_traits.languages['zh_Hans'] = 'zh'
+    engine_traits.languages['zh_Hant'] = 'zh'

+ 22 - 23
searx/engines/qwant.py

@@ -34,7 +34,9 @@ import babel
 
 
 from searx.exceptions import SearxEngineAPIException
 from searx.exceptions import SearxEngineAPIException
 from searx.network import raise_for_httperror
 from searx.network import raise_for_httperror
-from searx.locales import get_engine_locale
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -49,7 +51,6 @@ about = {
 # engine dependent config
 # engine dependent config
 categories = []
 categories = []
 paging = True
 paging = True
-supported_languages_url = about['website']
 qwant_categ = None  # web|news|inages|videos
 qwant_categ = None  # web|news|inages|videos
 
 
 safesearch = True
 safesearch = True
@@ -95,7 +96,7 @@ def request(query, params):
     )
     )
 
 
     # add quant's locale
     # add quant's locale
-    q_locale = get_engine_locale(params['language'], supported_languages, default='en_US')
+    q_locale = traits.get_region(params["searxng_locale"], default='en_US')
     params['url'] += '&locale=' + q_locale
     params['url'] += '&locale=' + q_locale
 
 
     # add safesearch option
     # add safesearch option
@@ -243,15 +244,20 @@ def response(resp):
     return results
     return results
 
 
 
 
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+
+    # pylint: disable=import-outside-toplevel
+    from searx import network
+    from searx.locales import region_tag
 
 
+    resp = network.get(about['website'])
     text = resp.text
     text = resp.text
     text = text[text.find('INITIAL_PROPS') :]
     text = text[text.find('INITIAL_PROPS') :]
     text = text[text.find('{') : text.find('</script>')]
     text = text[text.find('{') : text.find('</script>')]
 
 
     q_initial_props = loads(text)
     q_initial_props = loads(text)
     q_locales = q_initial_props.get('locales')
     q_locales = q_initial_props.get('locales')
-    q_valid_locales = []
+    eng_tag_list = set()
 
 
     for country, v in q_locales.items():
     for country, v in q_locales.items():
         for lang in v['langs']:
         for lang in v['langs']:
@@ -261,25 +267,18 @@ def _fetch_supported_languages(resp):
                 # qwant-news does not support all locales from qwant-web:
                 # qwant-news does not support all locales from qwant-web:
                 continue
                 continue
 
 
-            q_valid_locales.append(_locale)
-
-    supported_languages = {}
+            eng_tag_list.add(_locale)
 
 
-    for q_locale in q_valid_locales:
+    for eng_tag in eng_tag_list:
         try:
         try:
-            locale = babel.Locale.parse(q_locale, sep='_')
-        except babel.core.UnknownLocaleError:
-            print("ERROR: can't determine babel locale of quant's locale %s" % q_locale)
+            sxng_tag = region_tag(babel.Locale.parse(eng_tag, sep='_'))
+        except babel.UnknownLocaleError:
+            print("ERROR: can't determine babel locale of quant's locale %s" % eng_tag)
             continue
             continue
 
 
-        # note: supported_languages (dict)
-        #
-        #   dict's key is a string build up from a babel.Locale object / the
-        #   notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and
-        #   language) notation and dict's values are the locale strings used by
-        #   the engine.
-
-        searxng_locale = locale.language + '-' + locale.territory  # --> params['language']
-        supported_languages[searxng_locale] = q_locale
-
-    return supported_languages
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag

+ 45 - 65
searx/engines/sepiasearch.py

@@ -1,70 +1,80 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""SepiaSearch uses the same languages as :py:obj:`Peertube
+<searx.engines.peertube>` and the response is identical to the response from the
+peertube engines.
+
 """
 """
- SepiaSearch (Videos)
-"""
 
 
-from json import loads
-from dateutil import parser, relativedelta
+from typing import TYPE_CHECKING
+
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from datetime import datetime
 from datetime import datetime
 
 
-# about
+from searx.engines.peertube import fetch_traits  # pylint: disable=unused-import
+from searx.engines.peertube import (
+    # pylint: disable=unused-import
+    video_response,
+    safesearch_table,
+    time_range_table,
+)
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 about = {
 about = {
+    # pylint: disable=line-too-long
     "website": 'https://sepiasearch.org',
     "website": 'https://sepiasearch.org',
     "wikidata_id": None,
     "wikidata_id": None,
-    "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api",  # NOQA
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
     "use_official_api": True,
     "use_official_api": True,
     "require_api_key": False,
     "require_api_key": False,
     "results": 'JSON',
     "results": 'JSON',
 }
 }
 
 
+# engine dependent config
 categories = ['videos']
 categories = ['videos']
 paging = True
 paging = True
+
+base_url = 'https://sepiasearch.org'
+
 time_range_support = True
 time_range_support = True
 safesearch = True
 safesearch = True
-supported_languages = [
-    # fmt: off
-    'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el',
-    'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt',
-    'sv', 'pl', 'fi', 'ru'
-    # fmt: on
-]
-base_url = 'https://sepiasearch.org/api/v1/search/videos'
-
-safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
-
-time_range_table = {
-    'day': relativedelta.relativedelta(),
-    'week': relativedelta.relativedelta(weeks=-1),
-    'month': relativedelta.relativedelta(months=-1),
-    'year': relativedelta.relativedelta(years=-1),
-}
 
 
 
 
-def minute_to_hm(minute):
-    if isinstance(minute, int):
-        return "%d:%02d" % (divmod(minute, 60))
-    return None
+def request(query, params):
+    """Assemble request for the SepiaSearch API"""
+
+    if not query:
+        return False
 
 
+    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], None)
 
 
-def request(query, params):
     params['url'] = (
     params['url'] = (
-        base_url
-        + '?'
+        base_url.rstrip("/")
+        + "/api/v1/search/videos?"
         + urlencode(
         + urlencode(
             {
             {
                 'search': query,
                 'search': query,
                 'start': (params['pageno'] - 1) * 10,
                 'start': (params['pageno'] - 1) * 10,
                 'count': 10,
                 'count': 10,
-                'sort': '-match',
+                # -createdAt: sort by date ascending / createdAt: date descending
+                'sort': '-match',  # sort by *match descending*
                 'nsfw': safesearch_table[params['safesearch']],
                 'nsfw': safesearch_table[params['safesearch']],
             }
             }
         )
         )
     )
     )
 
 
-    language = params['language'].split('-')[0]
-    if language in supported_languages:
-        params['url'] += '&languageOneOf[]=' + language
+    if eng_lang is not None:
+        params['url'] += '&languageOneOf[]=' + eng_lang
+        params['url'] += '&boostLanguages[]=' + eng_lang
+
     if params['time_range'] in time_range_table:
     if params['time_range'] in time_range_table:
         time = datetime.now().date() + time_range_table[params['time_range']]
         time = datetime.now().date() + time_range_table[params['time_range']]
         params['url'] += '&startDate=' + time.isoformat()
         params['url'] += '&startDate=' + time.isoformat()
@@ -73,34 +83,4 @@ def request(query, params):
 
 
 
 
 def response(resp):
 def response(resp):
-    results = []
-
-    search_results = loads(resp.text)
-
-    if 'data' not in search_results:
-        return []
-
-    for result in search_results['data']:
-        title = result['name']
-        content = result['description']
-        thumbnail = result['thumbnailUrl']
-        publishedDate = parser.parse(result['publishedAt'])
-        author = result.get('account', {}).get('displayName')
-        length = minute_to_hm(result.get('duration'))
-        url = result['url']
-
-        results.append(
-            {
-                'url': url,
-                'title': title,
-                'content': content,
-                'author': author,
-                'length': length,
-                'template': 'videos.html',
-                'publishedDate': publishedDate,
-                'iframe_src': result.get('embedUrl'),
-                'thumbnail': thumbnail,
-            }
-        )
-
-    return results
+    return video_response(resp)

+ 352 - 120
searx/engines/startpage.py

@@ -1,28 +1,108 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Startpage (Web)
+"""Startpage's language & region selectors are a mess ..
+
+.. _startpage regions:
+
+Startpage regions
+=================
+
+In the list of regions there are tags we need to map to common region tags::
+
+  pt-BR_BR --> pt_BR
+  zh-CN_CN --> zh_Hans_CN
+  zh-TW_TW --> zh_Hant_TW
+  zh-TW_HK --> zh_Hant_HK
+  en-GB_GB --> en_GB
+
+and there is at least one tag with a three letter language tag (ISO 639-2)::
+
+  fil_PH --> fil_PH
+
+The locale code ``no_NO`` from Startpage does not exists and is mapped to
+``nb-NO``::
+
+    babel.core.UnknownLocaleError: unknown locale 'no_NO'
+
+For reference see languages-subtag at iana; ``no`` is the macrolanguage [1]_ and
+W3C recommends subtag over macrolanguage [2]_.
+
+.. [1] `iana: language-subtag-registry
+   <https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry>`_ ::
+
+      type: language
+      Subtag: nb
+      Description: Norwegian Bokmål
+      Added: 2005-10-16
+      Suppress-Script: Latn
+      Macrolanguage: no
+
+.. [2]
+   Use macrolanguages with care.  Some language subtags have a Scope field set to
+   macrolanguage, i.e. this primary language subtag encompasses a number of more
+   specific primary language subtags in the registry.  ...  As we recommended for
+   the collection subtags mentioned above, in most cases you should try to use
+   the more specific subtags ... `W3: The primary language subtag
+   <https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag>`_
+
+.. _startpage languages:
+
+Startpage languages
+===================
+
+:py:obj:`send_accept_language_header`:
+  The displayed name in Startpage's settings page depend on the location of the
+  IP when ``Accept-Language`` HTTP header is unset.  In :py:obj:`fetch_traits`
+  we use::
+
+    'Accept-Language': "en-US,en;q=0.5",
+    ..
+
+  to get uniform names independent from the IP).
+
+.. _startpage categories:
+
+Startpage categories
+====================
+
+Startpage's category (for Web-search, News, Videos, ..) is set by
+:py:obj:`startpage_categ` in  settings.yml::
+
+  - name: startpage
+    engine: startpage
+    startpage_categ: web
+    ...
+
+.. hint::
+
+   The default category is ``web`` .. and other categories than ``web`` are not
+   yet implemented.
 
 
 """
 """
 
 
+from typing import TYPE_CHECKING
+from collections import OrderedDict
 import re
 import re
-from time import time
-
-from urllib.parse import urlencode
 from unicodedata import normalize, combining
 from unicodedata import normalize, combining
+from time import time
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
 
 
-from dateutil import parser
-from lxml import html
-from babel import Locale
-from babel.localedata import locale_identifiers
+import dateutil.parser
+import lxml.html
+import babel
+
+from searx import network
+from searx.utils import extract_text, eval_xpath, gen_useragent
+from searx.exceptions import SearxEngineCaptchaException
+from searx.locales import region_tag
+from searx.enginelib.traits import EngineTraits
 
 
-from searx.network import get
-from searx.utils import extract_text, eval_xpath, match_language
-from searx.exceptions import (
-    SearxEngineResponseException,
-    SearxEngineCaptchaException,
-)
+if TYPE_CHECKING:
+    import logging
 
 
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -34,18 +114,28 @@ about = {
     "results": 'HTML',
     "results": 'HTML',
 }
 }
 
 
+startpage_categ = 'web'
+"""Startpage's category, visit :ref:`startpage categories`.
+"""
+
+send_accept_language_header = True
+"""Startpage tries to guess user's language and territory from the HTTP
+``Accept-Language``.  Optional the user can select a search-language (can be
+different to the UI language) and a region filter.
+"""
+
 # engine dependent config
 # engine dependent config
 categories = ['general', 'web']
 categories = ['general', 'web']
-# there is a mechanism to block "bot" search
-# (probably the parameter qid), require
-# storing of qid's between mulitble search-calls
-
 paging = True
 paging = True
-supported_languages_url = 'https://www.startpage.com/do/settings'
+time_range_support = True
+safesearch = True
+
+time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
+safesearch_dict = {0: '0', 1: '1', 2: '1'}
 
 
 # search-url
 # search-url
-base_url = 'https://startpage.com/'
-search_url = base_url + 'sp/search?'
+base_url = 'https://www.startpage.com'
+search_url = base_url + '/sp/search'
 
 
 # specific xpath variables
 # specific xpath variables
 # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
 # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@@ -53,92 +143,193 @@ search_url = base_url + 'sp/search?'
 results_xpath = '//div[@class="w-gl__result__main"]'
 results_xpath = '//div[@class="w-gl__result__main"]'
 link_xpath = './/a[@class="w-gl__result-title result-link"]'
 link_xpath = './/a[@class="w-gl__result-title result-link"]'
 content_xpath = './/p[@class="w-gl__description"]'
 content_xpath = './/p[@class="w-gl__description"]'
+search_form_xpath = '//form[@id="search"]'
+"""XPath of Startpage's origin search form
+
+.. code: html
+
+    <form action="/sp/search" method="post">
+      <input type="text" name="query"  value="" ..>
+      <input type="hidden" name="t" value="device">
+      <input type="hidden" name="lui" value="english">
+      <input type="hidden" name="sc" value="Q7Mt5TRqowKB00">
+      <input type="hidden" name="cat" value="web">
+      <input type="hidden" class="abp" id="abp-input" name="abp" value="1">
+    </form>
+"""
 
 
 # timestamp of the last fetch of 'sc' code
 # timestamp of the last fetch of 'sc' code
 sc_code_ts = 0
 sc_code_ts = 0
 sc_code = ''
 sc_code = ''
+sc_code_cache_sec = 30
+"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
 
 
 
 
-def raise_captcha(resp):
+def get_sc_code(searxng_locale, params):
+    """Get an actual ``sc`` argument from Startpage's search form (HTML page).
 
 
-    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
-        raise SearxEngineCaptchaException()
+    Startpage puts a ``sc`` argument on every HTML :py:obj:`search form
+    <search_form_xpath>`.  Without this argument Startpage considers the request
+    is from a bot.  We do not know what is encoded in the value of the ``sc``
+    argument, but it seems to be a kind of a *time-stamp*.
 
 
+    Startpage's search form generates a new sc-code on each request.  This
+    function scrap a new sc-code from Startpage's home page every
+    :py:obj:`sc_code_cache_sec` seconds.
 
 
-def get_sc_code(headers):
-    """Get an actual `sc` argument from startpage's home page.
+    """
 
 
-    Startpage puts a `sc` argument on every link.  Without this argument
-    startpage considers the request is from a bot.  We do not know what is
-    encoded in the value of the `sc` argument, but it seems to be a kind of a
-    *time-stamp*.  This *time-stamp* is valid for a few hours.
+    global sc_code_ts, sc_code  # pylint: disable=global-statement
 
 
-    This function scrap a new *time-stamp* from startpage's home page every hour
-    (3000 sec).
+    if sc_code and (time() < (sc_code_ts + sc_code_cache_sec)):
+        logger.debug("get_sc_code: reuse '%s'", sc_code)
+        return sc_code
+
+    headers = {**params['headers']}
+    headers['Origin'] = base_url
+    headers['Referer'] = base_url + '/'
+    # headers['Connection'] = 'keep-alive'
+    # headers['Accept-Encoding'] = 'gzip, deflate, br'
+    # headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'
+    # headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0'
+
+    # add Accept-Language header
+    if searxng_locale == 'all':
+        searxng_locale = 'en-US'
+    locale = babel.Locale.parse(searxng_locale, sep='-')
+
+    if send_accept_language_header:
+        ac_lang = locale.language
+        if locale.territory:
+            ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % (
+                locale.language,
+                locale.territory,
+                locale.language,
+            )
+        headers['Accept-Language'] = ac_lang
+
+    get_sc_url = base_url + '/?sc=%s' % (sc_code)
+    logger.debug("query new sc time-stamp ... %s", get_sc_url)
+    logger.debug("headers: %s", headers)
+    resp = network.get(get_sc_url, headers=headers)
+
+    # ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)
+    # ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg
+    # ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21
 
 
-    """
+    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
+        raise SearxEngineCaptchaException(
+            message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha",
+        )
+
+    dom = lxml.html.fromstring(resp.text)
+
+    try:
+        sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0]
+    except IndexError as exc:
+        logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695")
+        raise SearxEngineCaptchaException(
+            message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url,
+        ) from exc
+
+    sc_code_ts = time()
+    logger.debug("get_sc_code: new value is: %s", sc_code)
+    return sc_code
 
 
-    global sc_code_ts, sc_code  # pylint: disable=global-statement
 
 
-    if time() > (sc_code_ts + 3000):
-        logger.debug("query new sc time-stamp ...")
+def request(query, params):
+    """Assemble a Startpage request.
 
 
-        resp = get(base_url, headers=headers)
-        raise_captcha(resp)
-        dom = html.fromstring(resp.text)
+    To avoid CAPTCHA we need to send a well formed HTTP POST request with a
+    cookie.  We need to form a request that is identical to the request build by
+    Startpage's search form:
 
 
-        try:
-            # <input type="hidden" name="sc" value="...">
-            sc_code = eval_xpath(dom, '//input[@name="sc"]/@value')[0]
-        except IndexError as exc:
-            # suspend startpage API --> https://github.com/searxng/searxng/pull/695
-            raise SearxEngineResponseException(
-                suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!"
-            ) from exc
+    - in the cookie the **region** is selected
+    - in the HTTP POST data the **language** is selected
 
 
-        sc_code_ts = time()
-        logger.debug("new value is: %s", sc_code)
+    Additionally the arguments form Startpage's search form needs to be set in
+    HTML POST data / compare ``<input>`` elements: :py:obj:`search_form_xpath`.
+    """
+    if startpage_categ == 'web':
+        return _request_cat_web(query, params)
 
 
-    return sc_code
+    logger.error("Startpages's category '%' is not yet implemented.", startpage_categ)
+    return params
 
 
 
 
-# do search-request
-def request(query, params):
+def _request_cat_web(query, params):
 
 
-    # pylint: disable=line-too-long
-    # The format string from Startpage's FFox add-on [1]::
-    #
-    #     https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=ext-ff&language=__MSG_extensionUrlLanguage__&extVersion=1.3.0
-    #
-    # [1] https://addons.mozilla.org/en-US/firefox/addon/startpage-private-search/
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
 
 
+    # build arguments
     args = {
     args = {
         'query': query,
         'query': query,
-        'page': params['pageno'],
         'cat': 'web',
         'cat': 'web',
-        # 'pl': 'ext-ff',
-        # 'extVersion': '1.3.0',
-        # 'abp': "-1",
-        'sc': get_sc_code(params['headers']),
+        't': 'device',
+        'sc': get_sc_code(params['searxng_locale'], params),  # hint: this func needs HTTP headers,
+        'with_date': time_range_dict.get(params['time_range'], ''),
     }
     }
 
 
-    # set language if specified
-    if params['language'] != 'all':
-        lang_code = match_language(params['language'], supported_languages, fallback=None)
-        if lang_code:
-            language_name = supported_languages[lang_code]['alias']
-            args['language'] = language_name
-            args['lui'] = language_name
+    if engine_language:
+        args['language'] = engine_language
+        args['lui'] = engine_language
+
+    args['abp'] = '1'
+    if params['pageno'] > 1:
+        args['page'] = params['pageno']
+
+    # build cookie
+    lang_homepage = 'en'
+    cookie = OrderedDict()
+    cookie['date_time'] = 'world'
+    cookie['disable_family_filter'] = safesearch_dict[params['safesearch']]
+    cookie['disable_open_in_new_window'] = '0'
+    cookie['enable_post_method'] = '1'  # hint: POST
+    cookie['enable_proxy_safety_suggest'] = '1'
+    cookie['enable_stay_control'] = '1'
+    cookie['instant_answers'] = '1'
+    cookie['lang_homepage'] = 's/device/%s/' % lang_homepage
+    cookie['num_of_results'] = '10'
+    cookie['suggestions'] = '1'
+    cookie['wt_unit'] = 'celsius'
+
+    if engine_language:
+        cookie['language'] = engine_language
+        cookie['language_ui'] = engine_language
+
+    if engine_region:
+        cookie['search_results_region'] = engine_region
+
+    params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()])
+    logger.debug('cookie preferences: %s', params['cookies']['preferences'])
+
+    # POST request
+    logger.debug("data: %s", args)
+    params['data'] = args
+    params['method'] = 'POST'
+    params['url'] = search_url
+    params['headers']['Origin'] = base_url
+    params['headers']['Referer'] = base_url + '/'
+    # is the Accept header needed?
+    # params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
 
 
-    params['url'] = search_url + urlencode(args)
     return params
     return params
 
 
 
 
 # get response from search-request
 # get response from search-request
 def response(resp):
 def response(resp):
-    results = []
+    dom = lxml.html.fromstring(resp.text)
 
 
-    dom = html.fromstring(resp.text)
+    if startpage_categ == 'web':
+        return _response_cat_web(dom)
+
+    logger.error("Startpages's category '%' is not yet implemented.", startpage_categ)
+    return []
+
+
+def _response_cat_web(dom):
+    results = []
 
 
     # parse results
     # parse results
     for result in eval_xpath(dom, results_xpath):
     for result in eval_xpath(dom, results_xpath):
@@ -173,7 +364,7 @@ def response(resp):
             content = content[date_pos:]
             content = content[date_pos:]
 
 
             try:
             try:
-                published_date = parser.parse(date_string, dayfirst=True)
+                published_date = dateutil.parser.parse(date_string, dayfirst=True)
             except ValueError:
             except ValueError:
                 pass
                 pass
 
 
@@ -199,62 +390,103 @@ def response(resp):
     return results
     return results
 
 
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    # startpage's language selector is a mess each option has a displayed name
-    # and a value, either of which may represent the language name in the native
-    # script, the language name in English, an English transliteration of the
-    # native name, the English name of the writing script used by the language,
-    # or occasionally something else entirely.
-
-    # this cases are so special they need to be hardcoded, a couple of them are misspellings
-    language_names = {
-        'english_uk': 'en-GB',
-        'fantizhengwen': ['zh-TW', 'zh-HK'],
-        'hangul': 'ko',
-        'malayam': 'ml',
-        'norsk': 'nb',
-        'sinhalese': 'si',
-        'sudanese': 'su',
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch :ref:`languages <startpage languages>` and :ref:`regions <startpage
+    regions>` from Startpage."""
+    # pylint: disable=too-many-branches
+
+    headers = {
+        'User-Agent': gen_useragent(),
+        'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
     }
     }
+    resp = network.get('https://www.startpage.com/do/settings', headers=headers)
 
 
-    # get the English name of every language known by babel
-    language_names.update(
-        {
-            # fmt: off
-            name.lower(): lang_code
-            # pylint: disable=protected-access
-            for lang_code, name in Locale('en')._data['languages'].items()
-            # fmt: on
-        }
-    )
+    if not resp.ok:
+        print("ERROR: response from Startpage is not OK.")
+
+    dom = lxml.html.fromstring(resp.text)
+
+    # regions
+
+    sp_region_names = []
+    for option in dom.xpath('//form[@name="settings"]//select[@name="search_results_region"]/option'):
+        sp_region_names.append(option.get('value'))
+
+    for eng_tag in sp_region_names:
+        if eng_tag == 'all':
+            continue
+        babel_region_tag = {'no_NO': 'nb_NO'}.get(eng_tag, eng_tag)  # norway
+
+        if '-' in babel_region_tag:
+            l, r = babel_region_tag.split('-')
+            r = r.split('_')[-1]
+            sxng_tag = region_tag(babel.Locale.parse(l + '_' + r, sep='_'))
+
+        else:
+            try:
+                sxng_tag = region_tag(babel.Locale.parse(babel_region_tag, sep='_'))
+
+            except babel.UnknownLocaleError:
+                print("ERROR: can't determine babel locale of startpage's locale %s" % eng_tag)
+                continue
+
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag
+
+    # languages
+
+    catalog_engine2code = {name.lower(): lang_code for lang_code, name in babel.Locale('en').languages.items()}
 
 
     # get the native name of every language known by babel
     # get the native name of every language known by babel
-    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, locale_identifiers()):
-        native_name = Locale(lang_code).get_language_name().lower()
+
+    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
+        native_name = babel.Locale(lang_code).get_language_name().lower()
         # add native name exactly as it is
         # add native name exactly as it is
-        language_names[native_name] = lang_code
+        catalog_engine2code[native_name] = lang_code
 
 
         # add "normalized" language name (i.e. français becomes francais and español becomes espanol)
         # add "normalized" language name (i.e. français becomes francais and español becomes espanol)
         unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name)))
         unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name)))
         if len(unaccented_name) == len(unaccented_name.encode()):
         if len(unaccented_name) == len(unaccented_name.encode()):
             # add only if result is ascii (otherwise "normalization" didn't work)
             # add only if result is ascii (otherwise "normalization" didn't work)
-            language_names[unaccented_name] = lang_code
+            catalog_engine2code[unaccented_name] = lang_code
+
+    # values that can't be determined by babel's languages names
+
+    catalog_engine2code.update(
+        {
+            # traditional chinese used in ..
+            'fantizhengwen': 'zh_Hant',
+            # Korean alphabet
+            'hangul': 'ko',
+            # Malayalam is one of 22 scheduled languages of India.
+            'malayam': 'ml',
+            'norsk': 'nb',
+            'sinhalese': 'si',
+        }
+    )
+
+    skip_eng_tags = {
+        'english_uk',  # SearXNG lang 'en' already maps to 'english'
+    }
 
 
-    dom = html.fromstring(resp.text)
-    sp_lang_names = []
     for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'):
     for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'):
-        sp_lang_names.append((option.get('value'), extract_text(option).lower()))
-
-    supported_languages = {}
-    for sp_option_value, sp_option_text in sp_lang_names:
-        lang_code = language_names.get(sp_option_value) or language_names.get(sp_option_text)
-        if isinstance(lang_code, str):
-            supported_languages[lang_code] = {'alias': sp_option_value}
-        elif isinstance(lang_code, list):
-            for _lc in lang_code:
-                supported_languages[_lc] = {'alias': sp_option_value}
-        else:
-            print('Unknown language option in Startpage: {} ({})'.format(sp_option_value, sp_option_text))
 
 
-    return supported_languages
+        eng_tag = option.get('value')
+        if eng_tag in skip_eng_tags:
+            continue
+        name = extract_text(option).lower()
+
+        sxng_tag = catalog_engine2code.get(eng_tag)
+        if sxng_tag is None:
+            sxng_tag = catalog_engine2code[name]
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.languages[sxng_tag] = eng_tag

+ 36 - 14
searx/engines/wikidata.py

@@ -1,9 +1,12 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Wikidata
+"""This module implements the Wikidata engine.  Some implementations are shared
+from :ref:`wikipedia engine`.
+
 """
 """
 # pylint: disable=missing-class-docstring
 # pylint: disable=missing-class-docstring
 
 
+from typing import TYPE_CHECKING
 from hashlib import md5
 from hashlib import md5
 from urllib.parse import urlencode, unquote
 from urllib.parse import urlencode, unquote
 from json import loads
 from json import loads
@@ -13,12 +16,17 @@ from babel.dates import format_datetime, format_date, format_time, get_datetime_
 
 
 from searx.data import WIKIDATA_UNITS
 from searx.data import WIKIDATA_UNITS
 from searx.network import post, get
 from searx.network import post, get
-from searx.utils import match_language, searx_useragent, get_string_replaces_function
+from searx.utils import searx_useragent, get_string_replaces_function
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
-from searx.engines.wikipedia import (  # pylint: disable=unused-import
-    _fetch_supported_languages,
-    supported_languages_url,
-)
+from searx.engines.wikipedia import fetch_traits as _fetch_traits
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -154,33 +162,35 @@ def send_wikidata_query(query, method='GET'):
 
 
 
 
 def request(query, params):
 def request(query, params):
-    language = params['language'].split('-')[0]
-    if language == 'all':
-        language = 'en'
-    else:
-        language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
+
+    # wikidata does not support zh-classical (zh_Hans) / zh-TW, zh-HK and zh-CN
+    # mapped to zh
+    sxng_lang = params['searxng_locale'].split('-')[0]
+    language = traits.get_language(sxng_lang, 'en')
 
 
     query, attributes = get_query(query, language)
     query, attributes = get_query(query, language)
+    logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
 
 
     params['method'] = 'POST'
     params['method'] = 'POST'
     params['url'] = SPARQL_ENDPOINT_URL
     params['url'] = SPARQL_ENDPOINT_URL
     params['data'] = {'query': query}
     params['data'] = {'query': query}
     params['headers'] = get_headers()
     params['headers'] = get_headers()
-
     params['language'] = language
     params['language'] = language
     params['attributes'] = attributes
     params['attributes'] = attributes
+
     return params
     return params
 
 
 
 
 def response(resp):
 def response(resp):
+
     results = []
     results = []
     jsonresponse = loads(resp.content.decode())
     jsonresponse = loads(resp.content.decode())
 
 
-    language = resp.search_params['language'].lower()
+    language = resp.search_params['language']
     attributes = resp.search_params['attributes']
     attributes = resp.search_params['attributes']
+    logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
 
 
     seen_entities = set()
     seen_entities = set()
-
     for result in jsonresponse.get('results', {}).get('bindings', []):
     for result in jsonresponse.get('results', {}).get('bindings', []):
         attribute_result = {key: value['value'] for key, value in result.items()}
         attribute_result = {key: value['value'] for key, value in result.items()}
         entity_url = attribute_result['item']
         entity_url = attribute_result['item']
@@ -756,3 +766,15 @@ def init(engine_settings=None):  # pylint: disable=unused-argument
         lang = result['name']['xml:lang']
         lang = result['name']['xml:lang']
         entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
         entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
         WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
         WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Use languages evaluated from :py:obj:`wikipedia.fetch_traits
+    <searx.engines.wikipedia.fetch_traits>` except zh-classical (zh_Hans) what
+    is not supported by wikidata."""
+
+    _fetch_traits(engine_traits)
+    # wikidata does not support zh-classical (zh_Hans)
+    engine_traits.languages.pop('zh_Hans')
+    # wikidata does not have net-locations for the languages
+    engine_traits.custom['wiki_netloc'] = {}

+ 170 - 62
searx/engines/wikipedia.py

@@ -1,13 +1,26 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""This module implements the Wikipedia engine.  Some of this implementations
+are shared by other engines:
+
+- :ref:`wikidata engine`
+
+The list of supported languages is fetched from the article linked by
+:py:obj:`wikipedia_article_depth`.  Unlike traditional search engines, wikipedia
+does not support one Wikipedia for all the languages, but there is one Wikipedia
+for every language (:py:obj:`fetch_traits`).
 """
 """
- Wikipedia (Web)
-"""
 
 
-from urllib.parse import quote
-from json import loads
-from lxml.html import fromstring
-from searx.utils import match_language, searx_useragent
-from searx.network import raise_for_httperror
+import urllib.parse
+import babel
+
+from lxml import html
+
+from searx import network
+from searx.locales import language_tag
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -19,32 +32,40 @@ about = {
     "results": 'JSON',
     "results": 'JSON',
 }
 }
 
 
-
 send_accept_language_header = True
 send_accept_language_header = True
 
 
-# search-url
-search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
-supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
-language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
+wikipedia_article_depth = 'https://meta.wikimedia.org/wiki/Wikipedia_article_depth'
+"""The *editing depth* of Wikipedia is one of several possible rough indicators
+of the encyclopedia's collaborative quality, showing how frequently its articles
+are updated.  The measurement of depth was introduced after some limitations of
+the classic measurement of article count were realized.
+"""
+
+# example: https://zh-classical.wikipedia.org/api/rest_v1/page/summary/日
+rest_v1_summary_url = 'https://{wiki_netloc}/api/rest_v1/page/summary/{title}'
+"""`wikipedia rest_v1 summary API`_: The summary response includes an extract of
+the first paragraph of the page in plain text and HTML as well as the type of
+page. This is useful for page previews (fka. Hovercards, aka. Popups) on the web
+and link previews in the apps.
 
 
+.. _wikipedia rest_v1 summary API: https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_
 
 
-# set language in base_url
-def url_lang(lang):
-    lang_pre = lang.split('-')[0]
-    if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
-        return 'en'
-    return match_language(lang, supported_languages, language_aliases).split('-')[0]
+"""
 
 
 
 
-# do search-request
 def request(query, params):
 def request(query, params):
+    """Assemble a request (`wikipedia rest_v1 summary API`_)."""
     if query.islower():
     if query.islower():
         query = query.title()
         query = query.title()
 
 
-    language = url_lang(params['language'])
-    params['url'] = search_url.format(title=quote(query), language=language)
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
+    wiki_netloc = traits.custom['wiki_netloc'].get(engine_language, 'https://en.wikipedia.org/wiki/')
+    title = urllib.parse.quote(query)
+
+    # '!wikipedia 日 :zh-TW' --> https://zh-classical.wikipedia.org/
+    # '!wikipedia 日 :zh' --> https://zh.wikipedia.org/
+    params['url'] = rest_v1_summary_url.format(wiki_netloc=wiki_netloc, title=title)
 
 
-    params['headers']['User-Agent'] = searx_useragent()
     params['raise_for_httperror'] = False
     params['raise_for_httperror'] = False
     params['soft_max_redirects'] = 2
     params['soft_max_redirects'] = 2
 
 
@@ -53,13 +74,14 @@ def request(query, params):
 
 
 # get response from search-request
 # get response from search-request
 def response(resp):
 def response(resp):
+
+    results = []
     if resp.status_code == 404:
     if resp.status_code == 404:
         return []
         return []
-
     if resp.status_code == 400:
     if resp.status_code == 400:
         try:
         try:
-            api_result = loads(resp.text)
-        except:
+            api_result = resp.json()
+        except Exception:  # pylint: disable=broad-except
             pass
             pass
         else:
         else:
             if (
             if (
@@ -68,49 +90,135 @@ def response(resp):
             ):
             ):
                 return []
                 return []
 
 
-    raise_for_httperror(resp)
-
-    results = []
-    api_result = loads(resp.text)
-
-    # skip disambiguation pages
-    if api_result.get('type') != 'standard':
-        return []
+    network.raise_for_httperror(resp)
 
 
+    api_result = resp.json()
     title = api_result['title']
     title = api_result['title']
     wikipedia_link = api_result['content_urls']['desktop']['page']
     wikipedia_link = api_result['content_urls']['desktop']['page']
-
-    results.append({'url': wikipedia_link, 'title': title})
-
-    results.append(
-        {
-            'infobox': title,
-            'id': wikipedia_link,
-            'content': api_result.get('extract', ''),
-            'img_src': api_result.get('thumbnail', {}).get('source'),
-            'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
-        }
-    )
+    results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')})
+
+    if api_result.get('type') == 'standard':
+        results.append(
+            {
+                'infobox': title,
+                'id': wikipedia_link,
+                'content': api_result.get('extract', ''),
+                'img_src': api_result.get('thumbnail', {}).get('source'),
+                'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
+            }
+        )
 
 
     return results
     return results
 
 
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    supported_languages = {}
-    dom = fromstring(resp.text)
-    tables = dom.xpath('//table[contains(@class,"sortable")]')
-    for table in tables:
-        # exclude header row
-        trs = table.xpath('.//tr')[1:]
-        for tr in trs:
-            td = tr.xpath('./td')
-            code = td[3].xpath('./a')[0].text
-            name = td[1].xpath('./a')[0].text
-            english_name = td[1].xpath('./a')[0].text
-            articles = int(td[4].xpath('./a')[0].text.replace(',', ''))
+# Nonstandard language codes
+#
+# These Wikipedias use language codes that do not conform to the ISO 639
+# standard (which is how wiki subdomains are chosen nowadays).
+
+lang_map = {
+    'be-tarask': 'bel',
+    'ak': 'aka',
+    'als': 'gsw',
+    'bat-smg': 'sgs',
+    'cbk-zam': 'cbk',
+    'fiu-vro': 'vro',
+    'map-bms': 'map',
+    'nrm': 'nrf',
+    'roa-rup': 'rup',
+    'nds-nl': 'nds',
+    #'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple)
+    'zh-min-nan': 'nan',
+    'zh-yue': 'yue',
+    'an': 'arg',
+    'zh-classical': 'zh-Hant',  # babel maps classical to zh-Hans (for whatever reason)
+}
+
+unknown_langs = [
+    'an',  # Aragonese
+    'ba',  # Bashkir
+    'bar',  # Bavarian
+    'bcl',  # Central Bicolano
+    'be-tarask',  # Belarusian variant / Belarusian is already covered by 'be'
+    'bpy',  # Bishnupriya Manipuri is unknown by babel
+    'hif',  # Fiji Hindi
+    'ilo',  # Ilokano
+    'li',  # Limburgish
+    'sco',  # Scots (sco) is not known by babel, Scottish Gaelic (gd) is known by babel
+    'sh',  # Serbo-Croatian
+    'simple',  # simple english is not know as a natural language different to english (babel)
+    'vo',  # Volapük
+    'wa',  # Walloon
+]
+
+
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from Wikipedia.
+
+    The location of the Wikipedia address of a language is mapped in a
+    :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`
+    (``wiki_netloc``).  Here is a reduced example:
+
+    .. code:: python
+
+       traits.custom['wiki_netloc'] = {
+           "en": "en.wikipedia.org",
+           ..
+           "gsw": "als.wikipedia.org",
+           ..
+           "zh": "zh.wikipedia.org",
+           "zh-classical": "zh-classical.wikipedia.org"
+       }
+
+    """
+
+    engine_traits.custom['wiki_netloc'] = {}
+
+    # insert alias to map from a region like zh-CN to a language zh_Hans
+    engine_traits.languages['zh_Hans'] = 'zh'
+
+    resp = network.get(wikipedia_article_depth)
+    if not resp.ok:
+        print("ERROR: response from Wikipedia is not OK.")
+
+    dom = html.fromstring(resp.text)
+    for row in dom.xpath('//table[contains(@class,"sortable")]//tbody/tr'):
+
+        cols = row.xpath('./td')
+        if not cols:
+            continue
+        cols = [c.text_content().strip() for c in cols]
+
+        depth = float(cols[3].replace('-', '0').replace(',', ''))
+        articles = int(cols[4].replace(',', '').replace(',', ''))
+
+        if articles < 10000:
             # exclude languages with too few articles
             # exclude languages with too few articles
-            if articles >= 100:
-                supported_languages[code] = {"name": name, "english_name": english_name}
+            continue
+
+        if int(depth) < 20:
+            # Rough indicator of a Wikipedia’s quality, showing how frequently
+            # its articles are updated.
+            continue
 
 
-    return supported_languages
+        eng_tag = cols[2]
+        wiki_url = row.xpath('./td[3]/a/@href')[0]
+        wiki_url = urllib.parse.urlparse(wiki_url)
+
+        if eng_tag in unknown_langs:
+            continue
+
+        try:
+            sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag), sep='-'))
+        except babel.UnknownLocaleError:
+            print("ERROR: %s [%s] is unknown by babel" % (cols[0], eng_tag))
+            continue
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+
+        engine_traits.languages[sxng_tag] = eng_tag
+        engine_traits.custom['wiki_netloc'][eng_tag] = wiki_url.netloc

+ 46 - 24
searx/engines/yahoo.py

@@ -17,8 +17,10 @@ from searx.utils import (
     eval_xpath_getindex,
     eval_xpath_getindex,
     eval_xpath_list,
     eval_xpath_list,
     extract_text,
     extract_text,
-    match_language,
 )
 )
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -34,8 +36,7 @@ about = {
 categories = ['general', 'web']
 categories = ['general', 'web']
 paging = True
 paging = True
 time_range_support = True
 time_range_support = True
-supported_languages_url = 'https://search.yahoo.com/preferences/languages'
-"""Supported languages are read from Yahoo preference page."""
+# send_accept_language_header = True
 
 
 time_range_dict = {
 time_range_dict = {
     'day': ('1d', 'd'),
     'day': ('1d', 'd'),
@@ -43,15 +44,10 @@ time_range_dict = {
     'month': ('1m', 'm'),
     'month': ('1m', 'm'),
 }
 }
 
 
-language_aliases = {
-    'zh-HK': 'zh_chs',
-    'zh-CN': 'zh_chs',  # dead since 2015 / routed to hk.search.yahoo.com
-    'zh-TW': 'zh_cht',
-}
-
 lang2domain = {
 lang2domain = {
     'zh_chs': 'hk.search.yahoo.com',
     'zh_chs': 'hk.search.yahoo.com',
     'zh_cht': 'tw.search.yahoo.com',
     'zh_cht': 'tw.search.yahoo.com',
+    'any': 'search.yahoo.com',
     'en': 'search.yahoo.com',
     'en': 'search.yahoo.com',
     'bg': 'search.yahoo.com',
     'bg': 'search.yahoo.com',
     'cs': 'search.yahoo.com',
     'cs': 'search.yahoo.com',
@@ -67,21 +63,23 @@ lang2domain = {
 }
 }
 """Map language to domain"""
 """Map language to domain"""
 
 
-
-def _get_language(params):
-
-    lang = language_aliases.get(params['language'])
-    if lang is None:
-        lang = match_language(params['language'], supported_languages, language_aliases)
-    lang = lang.split('-')[0]
-    logger.debug("params['language']: %s --> %s", params['language'], lang)
-    return lang
+locale_aliases = {
+    'zh': 'zh_Hans',
+    'zh-HK': 'zh_Hans',
+    'zh-CN': 'zh_Hans',  # dead since 2015 / routed to hk.search.yahoo.com
+    'zh-TW': 'zh_Hant',
+}
 
 
 
 
 def request(query, params):
 def request(query, params):
     """build request"""
     """build request"""
+
+    lang = locale_aliases.get(params['language'], None)
+    if not lang:
+        lang = params['language'].split('-')[0]
+    lang = traits.get_language(lang, traits.all_locale)
+
     offset = (params['pageno'] - 1) * 7 + 1
     offset = (params['pageno'] - 1) * 7 + 1
-    lang = _get_language(params)
     age, btf = time_range_dict.get(params['time_range'], ('', ''))
     age, btf = time_range_dict.get(params['time_range'], ('', ''))
 
 
     args = urlencode(
     args = urlencode(
@@ -154,13 +152,37 @@ def response(resp):
     return results
     return results
 
 
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    supported_languages = []
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from yahoo"""
+
+    # pylint: disable=import-outside-toplevel
+    import babel
+    from searx import network
+    from searx.locales import language_tag
+
+    engine_traits.all_locale = 'any'
+
+    resp = network.get('https://search.yahoo.com/preferences/languages')
+    if not resp.ok:
+        print("ERROR: response from peertube is not OK.")
+
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
     offset = len('lang_')
     offset = len('lang_')
 
 
+    eng2sxng = {'zh_chs': 'zh_Hans', 'zh_cht': 'zh_Hant'}
+
     for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
     for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
-        supported_languages.append(val[offset:])
+        eng_tag = val[offset:]
+
+        try:
+            sxng_tag = language_tag(babel.Locale.parse(eng2sxng.get(eng_tag, eng_tag)))
+        except babel.UnknownLocaleError:
+            print('ERROR: unknown language --> %s' % eng_tag)
+            continue
 
 
-    return supported_languages
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.languages[sxng_tag] = eng_tag

+ 178 - 12
searx/locales.py

@@ -4,11 +4,11 @@
 """Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
 """Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
 """
 """
 
 
-from typing import Set
+from typing import Set, Optional, List
 import os
 import os
 import pathlib
 import pathlib
 
 
-from babel import Locale
+import babel
 from babel.support import Translations
 from babel.support import Translations
 import babel.languages
 import babel.languages
 import babel.core
 import babel.core
@@ -134,7 +134,7 @@ def locales_initialize(directory=None):
     flask_babel.get_translations = get_translations
     flask_babel.get_translations = get_translations
 
 
     for tag, descr in ADDITIONAL_TRANSLATIONS.items():
     for tag, descr in ADDITIONAL_TRANSLATIONS.items():
-        locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
+        locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
         LOCALE_NAMES[tag] = descr
         LOCALE_NAMES[tag] = descr
         if locale.text_direction == 'rtl':
         if locale.text_direction == 'rtl':
             RTL_LOCALES.add(tag)
             RTL_LOCALES.add(tag)
@@ -142,7 +142,7 @@ def locales_initialize(directory=None):
     for tag in LOCALE_BEST_MATCH:
     for tag in LOCALE_BEST_MATCH:
         descr = LOCALE_NAMES.get(tag)
         descr = LOCALE_NAMES.get(tag)
         if not descr:
         if not descr:
-            locale = Locale.parse(tag, sep='-')
+            locale = babel.Locale.parse(tag, sep='-')
             LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
             LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
             if locale.text_direction == 'rtl':
             if locale.text_direction == 'rtl':
                 RTL_LOCALES.add(tag)
                 RTL_LOCALES.add(tag)
@@ -154,12 +154,77 @@ def locales_initialize(directory=None):
         tag = dirname.replace('_', '-')
         tag = dirname.replace('_', '-')
         descr = LOCALE_NAMES.get(tag)
         descr = LOCALE_NAMES.get(tag)
         if not descr:
         if not descr:
-            locale = Locale.parse(dirname)
+            locale = babel.Locale.parse(dirname)
             LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
             LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
             if locale.text_direction == 'rtl':
             if locale.text_direction == 'rtl':
                 RTL_LOCALES.add(tag)
                 RTL_LOCALES.add(tag)
 
 
 
 
+def region_tag(locale: babel.Locale) -> str:
+    """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
+    if not locale.territory:
+        raise ValueError('%s missed a territory')
+    return locale.language + '-' + locale.territory
+
+
+def language_tag(locale: babel.Locale) -> str:
+    """Returns SearXNG's language tag from the locale and if exits, the tag
+    includes the script name (e.g. en, zh_Hant).
+    """
+    sxng_lang = locale.language
+    if locale.script:
+        sxng_lang += '_' + locale.script
+    return sxng_lang
+
+
+def get_locale(locale_tag: str) -> Optional[babel.Locale]:
+    """Returns a :py:obj:`babel.Locale` object parsed from argument
+    ``locale_tag``"""
+    try:
+        locale = babel.Locale.parse(locale_tag, sep='-')
+        return locale
+
+    except babel.core.UnknownLocaleError:
+        return None
+
+
+def get_offical_locales(
+    territory: str, languages=None, regional: bool = False, de_facto: bool = True
+) -> Set[babel.Locale]:
+    """Returns a list of :py:obj:`babel.Locale` with languages from
+    :py:obj:`babel.languages.get_official_languages`.
+
+    :param territory: The territory (country or region) code.
+
+    :param languages: A list of language codes the languages from
+      :py:obj:`babel.languages.get_official_languages` should be in
+      (intersection).  If this argument is ``None``, all official languages in
+      this territory are used.
+
+    :param regional: If the regional flag is set, then languages which are
+      regionally official are also returned.
+
+    :param de_facto: If the de_facto flag is set to `False`, then languages
+      which are “de facto” official are not returned.
+
+    """
+    ret_val = set()
+    o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
+
+    if languages:
+        languages = [l.lower() for l in languages]
+        o_languages = set(l for l in o_languages if l.lower() in languages)
+
+    for lang in o_languages:
+        try:
+            locale = babel.Locale.parse(lang + '_' + territory)
+            ret_val.add(locale)
+        except babel.UnknownLocaleError:
+            continue
+
+    return ret_val
+
+
 def get_engine_locale(searxng_locale, engine_locales, default=None):
 def get_engine_locale(searxng_locale, engine_locales, default=None):
     """Return engine's language (aka locale) string that best fits to argument
     """Return engine's language (aka locale) string that best fits to argument
     ``searxng_locale``.
     ``searxng_locale``.
@@ -177,6 +242,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
           ...
           ...
           'pl-PL'          : 'pl_PL',
           'pl-PL'          : 'pl_PL',
           'pt-PT'          : 'pt_PT'
           'pt-PT'          : 'pt_PT'
+          ..
+          'zh'             : 'zh'
+          'zh_Hans'        : 'zh'
+          'zh_Hant'        : 'zh-classical'
       }
       }
 
 
     .. hint::
     .. hint::
@@ -210,13 +279,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
       engine.
       engine.
 
 
     """
     """
-    # pylint: disable=too-many-branches
+    # pylint: disable=too-many-branches, too-many-return-statements
 
 
     engine_locale = engine_locales.get(searxng_locale)
     engine_locale = engine_locales.get(searxng_locale)
 
 
     if engine_locale is not None:
     if engine_locale is not None:
-        # There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no
-        # need to narrow language nor territory.
+        # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language
+        # "zh --> zh"), no need to narrow language-script nor territory.
         return engine_locale
         return engine_locale
 
 
     try:
     try:
@@ -227,6 +296,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
         except babel.core.UnknownLocaleError:
         except babel.core.UnknownLocaleError:
             return default
             return default
 
 
+    searxng_lang = language_tag(locale)
+    engine_locale = engine_locales.get(searxng_lang)
+    if engine_locale is not None:
+        # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")
+        return engine_locale
+
     # SearXNG's selected locale is not supported by the engine ..
     # SearXNG's selected locale is not supported by the engine ..
 
 
     if locale.territory:
     if locale.territory:
@@ -247,10 +322,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
 
 
     if locale.language:
     if locale.language:
 
 
-        searxng_lang = locale.language
-        if locale.script:
-            searxng_lang += '_' + locale.script
-
         terr_lang_dict = {}
         terr_lang_dict = {}
         for territory, langs in babel.core.get_global("territory_languages").items():
         for territory, langs in babel.core.get_global("territory_languages").items():
             if not langs.get(searxng_lang, {}).get('official_status'):
             if not langs.get(searxng_lang, {}).get('official_status'):
@@ -303,3 +374,98 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
         engine_locale = default
         engine_locale = default
 
 
     return default
     return default
+
+
+def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
+    """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
+
+    :param str searxng_locale: SearXNG's internal representation of locale (de,
+        de-DE, fr-BE, zh, zh-CN, zh-TW ..).
+
+    :param list locale_tag_list: The list of locale tags to select from
+
+    :param str fallback: fallback locale tag (if unset --> ``None``)
+
+    The rules to find a match are implemented in :py:obj:`get_engine_locale`,
+    the ``engine_locales`` is build up by :py:obj:`build_engine_locales`.
+
+    .. hint::
+
+       The *SearXNG locale* string and the members of ``locale_tag_list`` has to
+       be known by babel!  The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the
+       UI and are not known by babel --> will be ignored.
+    """
+
+    # searxng_locale = 'es'
+    # locale_tag_list = ['es-AR', 'es-ES', 'es-MX']
+
+    if not searxng_locale:
+        return fallback
+
+    locale = get_locale(searxng_locale)
+    if locale is None:
+        return fallback
+
+    # normalize to a SearXNG locale that can be passed to get_engine_locale
+
+    searxng_locale = language_tag(locale)
+    if locale.territory:
+        searxng_locale = region_tag(locale)
+
+    # clean up locale_tag_list
+
+    tag_list = []
+    for tag in locale_tag_list:
+        if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
+            continue
+        tag_list.append(tag)
+
+    # emulate fetch_traits
+    engine_locales = build_engine_locales(tag_list)
+    return get_engine_locale(searxng_locale, engine_locales, default=fallback)
+
+
+def build_engine_locales(tag_list: List[str]):
+    """From a list of locale tags a dictionary is build that can be passed by
+    argument ``engine_locales`` to :py:obj:`get_engine_locale`.  This function
+    is mainly used by :py:obj:`match_locale` and is similar to what the
+    ``fetch_traits(..)`` function of engines do.
+
+    If there are territory codes in the ``tag_list`` that have a *script code*
+    additional keys are added to the returned dictionary.
+
+    .. code:: python
+
+       >>> import locales
+       >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW'])
+       >>> engine_locales
+       {
+           'en': 'en', 'en-US': 'en-US',
+           'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN',
+           'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW'
+       }
+       >>> get_engine_locale('zh-Hans', engine_locales)
+       'zh-CN'
+
+    This function is a good example to understand the language/region model
+    of SearXNG:
+
+      SearXNG only distinguishes between **search languages** and **search
+      regions**, by adding the *script-tags*, languages with *script-tags* can
+      be assigned to the **regions** that SearXNG supports.
+
+    """
+    engine_locales = {}
+
+    for tag in tag_list:
+        locale = get_locale(tag)
+        if locale is None:
+            logger.warn("build_engine_locales: skip locale tag %s / unknown by babel", tag)
+            continue
+        if locale.territory:
+            engine_locales[region_tag(locale)] = tag
+            if locale.script:
+                engine_locales[language_tag(locale)] = tag
+        else:
+            engine_locales[language_tag(locale)] = tag
+    return engine_locales

+ 1 - 1
searx/preferences.py

@@ -13,7 +13,7 @@ from typing import Iterable, Dict, List
 import flask
 import flask
 
 
 from searx import settings, autocomplete
 from searx import settings, autocomplete
-from searx.engines import Engine
+from searx.enginelib import Engine
 from searx.plugins import Plugin
 from searx.plugins import Plugin
 from searx.locales import LOCALE_NAMES
 from searx.locales import LOCALE_NAMES
 from searx.webutils import VALID_LANGUAGE_CODE
 from searx.webutils import VALID_LANGUAGE_CODE

+ 3 - 3
searx/query.py

@@ -4,7 +4,7 @@ from abc import abstractmethod, ABC
 import re
 import re
 
 
 from searx import settings
 from searx import settings
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
 from searx.engines import categories, engines, engine_shortcuts
 from searx.engines import categories, engines, engine_shortcuts
 from searx.external_bang import get_bang_definition_and_autocomplete
 from searx.external_bang import get_bang_definition_and_autocomplete
 from searx.search import EngineRef
 from searx.search import EngineRef
@@ -84,7 +84,7 @@ class LanguageParser(QueryPartParser):
         found = False
         found = False
         # check if any language-code is equal with
         # check if any language-code is equal with
         # declared language-codes
         # declared language-codes
-        for lc in language_codes:
+        for lc in sxng_locales:
             lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
             lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
 
 
             # if correct language-code is found
             # if correct language-code is found
@@ -125,7 +125,7 @@ class LanguageParser(QueryPartParser):
                     self.raw_text_query.autocomplete_list.append(lang)
                     self.raw_text_query.autocomplete_list.append(lang)
             return
             return
 
 
-        for lc in language_codes:
+        for lc in sxng_locales:
             if lc[0] not in settings['search']['languages']:
             if lc[0] not in settings['search']['languages']:
                 continue
                 continue
             lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
             lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)

+ 4 - 1
searx/search/processors/__init__.py

@@ -30,7 +30,10 @@ from .abstract import EngineProcessor
 
 
 logger = logger.getChild('search.processors')
 logger = logger.getChild('search.processors')
 PROCESSORS: Dict[str, EngineProcessor] = {}
 PROCESSORS: Dict[str, EngineProcessor] = {}
-"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""
+"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)
+
+:meta hide-value:
+"""
 
 
 
 
 def get_processor_class(engine_type):
 def get_processor_class(engine_type):

+ 11 - 1
searx/search/processors/abstract.py

@@ -138,7 +138,8 @@ class EngineProcessor(ABC):
         return False
         return False
 
 
     def get_params(self, search_query, engine_category):
     def get_params(self, search_query, engine_category):
-        """Returns a set of *request params* or ``None`` if request is not supported.
+        """Returns a set of (see :ref:`request params <engine request arguments>`) or
+        ``None`` if request is not supported.
 
 
         Not supported conditions (``None`` is returned):
         Not supported conditions (``None`` is returned):
 
 
@@ -159,11 +160,20 @@ class EngineProcessor(ABC):
         params['safesearch'] = search_query.safesearch
         params['safesearch'] = search_query.safesearch
         params['time_range'] = search_query.time_range
         params['time_range'] = search_query.time_range
         params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
         params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
+        params['searxng_locale'] = search_query.lang
+
+        # deprecated / vintage --> use params['searxng_locale']
+        #
+        # Conditions related to engine's traits are implemented in engine.traits
+        # module. Don't do 'locale' decissions here in the abstract layer of the
+        # search processor, just pass the value from user's choice unchanged to
+        # the engine request.
 
 
         if hasattr(self.engine, 'language') and self.engine.language:
         if hasattr(self.engine, 'language') and self.engine.language:
             params['language'] = self.engine.language
             params['language'] = self.engine.language
         else:
         else:
             params['language'] = search_query.lang
             params['language'] = search_query.lang
+
         return params
         return params
 
 
     @abstractmethod
     @abstractmethod

+ 4 - 6
searx/search/processors/online.py

@@ -51,6 +51,9 @@ class OnlineProcessor(EngineProcessor):
         super().initialize()
         super().initialize()
 
 
     def get_params(self, search_query, engine_category):
     def get_params(self, search_query, engine_category):
+        """Returns a set of :ref:`request params <engine request online>` or ``None``
+        if request is not supported.
+        """
         params = super().get_params(search_query, engine_category)
         params = super().get_params(search_query, engine_category)
         if params is None:
         if params is None:
             return None
             return None
@@ -184,11 +187,6 @@ class OnlineProcessor(EngineProcessor):
             self.handle_exception(result_container, e, suspend=True)
             self.handle_exception(result_container, e, suspend=True)
             self.logger.exception('CAPTCHA')
             self.logger.exception('CAPTCHA')
         except SearxEngineTooManyRequestsException as e:
         except SearxEngineTooManyRequestsException as e:
-            if "google" in self.engine_name:
-                self.logger.warn(
-                    "Set to 'true' the use_mobile_ui parameter in the 'engines:'"
-                    " section of your settings.yml file if google is blocked for you."
-                )
             self.handle_exception(result_container, e, suspend=True)
             self.handle_exception(result_container, e, suspend=True)
             self.logger.exception('Too many requests')
             self.logger.exception('Too many requests')
         except SearxEngineAccessDeniedException as e:
         except SearxEngineAccessDeniedException as e:
@@ -223,7 +221,7 @@ class OnlineProcessor(EngineProcessor):
                 'test': ['unique_results'],
                 'test': ['unique_results'],
             }
             }
 
 
-        if getattr(self.engine, 'supported_languages', []):
+        if getattr(self.engine, 'traits', False):
             tests['lang_fr'] = {
             tests['lang_fr'] = {
                 'matrix': {'query': 'paris', 'lang': 'fr'},
                 'matrix': {'query': 'paris', 'lang': 'fr'},
                 'result_container': ['not_empty', ('has_language', 'fr')],
                 'result_container': ['not_empty', ('has_language', 'fr')],

+ 2 - 2
searx/search/processors/online_currency.py

@@ -38,8 +38,8 @@ class OnlineCurrencyProcessor(OnlineProcessor):
     engine_type = 'online_currency'
     engine_type = 'online_currency'
 
 
     def get_params(self, search_query, engine_category):
     def get_params(self, search_query, engine_category):
-        """Returns a set of *request params* or ``None`` if search query does not match
-        to :py:obj:`parser_re`."""
+        """Returns a set of :ref:`request params <engine request online_currency>`
+        or ``None`` if search query does not match to :py:obj:`parser_re`."""
 
 
         params = super().get_params(search_query, engine_category)
         params = super().get_params(search_query, engine_category)
         if params is None:
         if params is None:

+ 3 - 2
searx/search/processors/online_dictionary.py

@@ -18,8 +18,9 @@ class OnlineDictionaryProcessor(OnlineProcessor):
     engine_type = 'online_dictionary'
     engine_type = 'online_dictionary'
 
 
     def get_params(self, search_query, engine_category):
     def get_params(self, search_query, engine_category):
-        """Returns a set of *request params* or ``None`` if search query does not match
-        to :py:obj:`parser_re`."""
+        """Returns a set of :ref:`request params <engine request online_dictionary>` or
+        ``None`` if search query does not match to :py:obj:`parser_re`.
+        """
         params = super().get_params(search_query, engine_category)
         params = super().get_params(search_query, engine_category)
         if params is None:
         if params is None:
             return None
             return None

+ 3 - 2
searx/search/processors/online_url_search.py

@@ -20,9 +20,10 @@ class OnlineUrlSearchProcessor(OnlineProcessor):
     engine_type = 'online_url_search'
     engine_type = 'online_url_search'
 
 
     def get_params(self, search_query, engine_category):
     def get_params(self, search_query, engine_category):
-        """Returns a set of *request params* or ``None`` if search query does not match
-        to at least one of :py:obj:`re_search_urls`.
+        """Returns a set of :ref:`request params <engine request online>` or ``None`` if
+        search query does not match to :py:obj:`re_search_urls`.
         """
         """
+
         params = super().get_params(search_query, engine_category)
         params = super().get_params(search_query, engine_category)
         if params is None:
         if params is None:
             return None
             return None

+ 2 - 16
searx/settings.yml

@@ -731,22 +731,9 @@ engines:
   - name: google
   - name: google
     engine: google
     engine: google
     shortcut: go
     shortcut: go
-    # see https://docs.searxng.org/src/searx.engines.google.html#module-searx.engines.google
-    use_mobile_ui: false
     # additional_tests:
     # additional_tests:
     #   android: *test_android
     #   android: *test_android
 
 
-  # - name: google italian
-  #   engine: google
-  #   shortcut: goit
-  #   use_mobile_ui: false
-  #   language: it
-
-  # - name: google mobile ui
-  #   engine: google
-  #   shortcut: gomui
-  #   use_mobile_ui: true
-
   - name: google images
   - name: google images
     engine: google_images
     engine: google_images
     shortcut: goi
     shortcut: goi
@@ -1758,9 +1745,8 @@ engines:
     engine: peertube
     engine: peertube
     shortcut: ptb
     shortcut: ptb
     paging: true
     paging: true
-    # https://instances.joinpeertube.org/instances
-    base_url: https://peertube.biz/
-    # base_url: https://tube.tardis.world/
+    # alternatives see: https://instances.joinpeertube.org/instances
+    # base_url: https://tube.4aem.com
     categories: videos
     categories: videos
     disabled: true
     disabled: true
     timeout: 6.0
     timeout: 6.0

+ 4 - 4
searx/settings_defaults.py

@@ -12,13 +12,13 @@ import logging
 from base64 import b64decode
 from base64 import b64decode
 from os.path import dirname, abspath
 from os.path import dirname, abspath
 
 
-from searx.languages import language_codes as languages
+from .sxng_locales import sxng_locales
 
 
 searx_dir = abspath(dirname(__file__))
 searx_dir = abspath(dirname(__file__))
 
 
 logger = logging.getLogger('searx')
 logger = logging.getLogger('searx')
 OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
 OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
-LANGUAGE_CODES = ['all', 'auto'] + list(l[0] for l in languages)
+SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
 SIMPLE_STYLE = ('auto', 'light', 'dark')
 SIMPLE_STYLE = ('auto', 'light', 'dark')
 CATEGORIES_AS_TABS = {
 CATEGORIES_AS_TABS = {
     'general': {},
     'general': {},
@@ -156,8 +156,8 @@ SCHEMA = {
         'safe_search': SettingsValue((0, 1, 2), 0),
         'safe_search': SettingsValue((0, 1, 2), 0),
         'autocomplete': SettingsValue(str, ''),
         'autocomplete': SettingsValue(str, ''),
         'autocomplete_min': SettingsValue(int, 4),
         'autocomplete_min': SettingsValue(int, 4),
-        'default_lang': SettingsValue(tuple(LANGUAGE_CODES + ['']), ''),
-        'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES),
+        'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
+        'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS),
         'ban_time_on_fail': SettingsValue(numbers.Real, 5),
         'ban_time_on_fail': SettingsValue(numbers.Real, 5),
         'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
         'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
         'suspended_times': {
         'suspended_times': {

+ 69 - 22
searx/languages.py → searx/sxng_locales.py

@@ -1,73 +1,120 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-# list of language codes
-# this file is generated automatically by utils/fetch_languages.py
-language_codes = (
-    ('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
-    ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'),
-    ('be-BY', 'Беларуская', 'Беларусь', 'Belarusian', '\U0001f1e7\U0001f1fe'),
+'''List of SearXNG's locale codes.
+
+This file is generated automatically by::
+
+   ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+'''
+
+sxng_locales = (
+    ('ar', 'العربية', '', 'Arabic', '\U0001f310'),
+    ('bg', 'Български', '', 'Bulgarian', '\U0001f310'),
     ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
     ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
+    ('ca', 'Català', '', 'Catalan', '\U0001f310'),
     ('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'),
     ('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'),
+    ('cs', 'Čeština', '', 'Czech', '\U0001f310'),
     ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
     ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
+    ('da', 'Dansk', '', 'Danish', '\U0001f310'),
     ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
     ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
     ('de', 'Deutsch', '', 'German', '\U0001f310'),
     ('de', 'Deutsch', '', 'German', '\U0001f310'),
     ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
     ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
     ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
     ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
     ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
     ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
+    ('el', 'Ελληνικά', '', 'Greek', '\U0001f310'),
     ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
     ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
     ('en', 'English', '', 'English', '\U0001f310'),
     ('en', 'English', '', 'English', '\U0001f310'),
     ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
     ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
     ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
     ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
     ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
     ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
     ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
     ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
+    ('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'),
     ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
     ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
     ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
     ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
+    ('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'),
     ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
     ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
+    ('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'),
     ('es', 'Español', '', 'Spanish', '\U0001f310'),
     ('es', 'Español', '', 'Spanish', '\U0001f310'),
     ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
     ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
     ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
     ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
     ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
     ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
     ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
     ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
+    ('es-US', 'Español', 'Estados Unidos', 'Spanish', '\U0001f1fa\U0001f1f8'),
+    ('et', 'Eesti', '', 'Estonian', '\U0001f310'),
     ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
     ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
-    ('fa-IR', 'فارسی', 'ایران', 'Persian', '\U0001f1ee\U0001f1f7'),
+    ('fi', 'Suomi', '', 'Finnish', '\U0001f310'),
     ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
     ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
-    ('fil-PH', 'Filipino', 'Pilipinas', 'Filipino', '\U0001f1f5\U0001f1ed'),
     ('fr', 'Français', '', 'French', '\U0001f310'),
     ('fr', 'Français', '', 'French', '\U0001f310'),
     ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
     ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
     ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
     ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
     ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
     ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
     ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
     ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
-    ('he-IL', 'עברית', 'ישראל', 'Hebrew', '\U0001f1ee\U0001f1f1'),
-    ('hi-IN', 'हिन्दी', 'भारत', 'Hindi', '\U0001f1ee\U0001f1f3'),
-    ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
+    ('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f7'),
+    ('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'),
+    ('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'),
+    ('hu', 'Magyar', '', 'Hungarian', '\U0001f310'),
     ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
     ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
+    ('id', 'Indonesia', '', 'Indonesian', '\U0001f310'),
     ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
     ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
-    ('is-IS', 'Íslenska', 'Ísland', 'Icelandic', '\U0001f1ee\U0001f1f8'),
+    ('is', 'Íslenska', '', 'Icelandic', '\U0001f310'),
+    ('it', 'Italiano', '', 'Italian', '\U0001f310'),
+    ('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'),
     ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
     ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
+    ('ja', '日本語', '', 'Japanese', '\U0001f310'),
     ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
     ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
+    ('ko', '한국어', '', 'Korean', '\U0001f310'),
     ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
     ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
-    ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
-    ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'),
+    ('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'),
+    ('lv', 'Latviešu', '', 'Latvian', '\U0001f310'),
+    ('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'),
+    ('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'),
     ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
     ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
     ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
     ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
     ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
     ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
-    ('no-NO', 'Norsk', '', 'Norwegian (Bokmål)', '\U0001f1f3\U0001f1f4'),
+    ('pl', 'Polski', '', 'Polish', '\U0001f310'),
     ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
     ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
     ('pt', 'Português', '', 'Portuguese', '\U0001f310'),
     ('pt', 'Português', '', 'Portuguese', '\U0001f310'),
     ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
     ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
     ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
     ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
+    ('ro', 'Română', '', 'Romanian', '\U0001f310'),
     ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
     ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
+    ('ru', 'Русский', '', 'Russian', '\U0001f310'),
     ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
     ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
-    ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
-    ('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'),
-    ('sr-RS', 'Српски', 'Србија', 'Serbian', '\U0001f1f7\U0001f1f8'),
+    ('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'),
+    ('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'),
+    ('sr', 'Српски', '', 'Serbian', '\U0001f310'),
+    ('sv', 'Svenska', '', 'Swedish', '\U0001f310'),
     ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
     ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
-    ('sw-TZ', 'Kiswahili', 'Tanzania', 'Swahili', '\U0001f1f9\U0001f1ff'),
+    ('th', 'ไทย', '', 'Thai', '\U0001f310'),
     ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
     ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
+    ('tr', 'Türkçe', '', 'Turkish', '\U0001f310'),
     ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
     ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
-    ('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
-    ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
+    ('uk', 'Українська', '', 'Ukrainian', '\U0001f310'),
+    ('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'),
     ('zh', '中文', '', 'Chinese', '\U0001f310'),
     ('zh', '中文', '', 'Chinese', '\U0001f310'),
     ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
     ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
-    ('zh-HK', '中文', '中國香港', 'Chinese', '\U0001f1ed\U0001f1f0'),
+    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
     ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
     ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
 )
 )
+'''
+A list of five-digit tuples:
+
+0. SearXNG's internal locale tag (a language or region tag)
+1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
+2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
+   Empty string for language tags.
+3. English language name (from :py:obj:`babel.core.Locale.english_name`)
+4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
+   are represented by a globe (🌐)
+
+.. code:: python
+
+   ('en',    'English', '',              'English', '🌐'),
+   ('en-CA', 'English', 'Canada',        'English', '🇨🇦'),
+   ('en-US', 'English', 'United States', 'English', '🇺🇸'),
+   ..
+   ('fr',    'Français', '',             'French',  '🌐'),
+   ('fr-BE', 'Français', 'Belgique',     'French',  '🇧🇪'),
+   ('fr-CA', 'Français', 'Canada',       'French',  '🇨🇦'),
+
+:meta hide-value:
+'''

+ 4 - 4
searx/templates/simple/filters/languages.html

@@ -1,12 +1,12 @@
 <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
 <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
-	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }} [all]</option>
 	<option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>
 	<option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>
 		{{- _('Auto-detect') -}}
 		{{- _('Auto-detect') -}}
 		{%- if current_language == 'auto' %} ({{ search_language }}){%- endif -%}
 		{%- if current_language == 'auto' %} ({{ search_language }}){%- endif -%}
 	</option>
 	</option>
-	{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
-	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
-		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}
+	{%- for sxng_tag,lang_name,country_name,english_name,flag in sxng_locales | sort(attribute=1) -%}
+	<option value="{{ sxng_tag }}" {% if sxng_tag == current_language %}selected="selected"{% endif %}>
+		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %} - {{ country_name }} {% endif %} [{{sxng_tag}}]
 	</option>
 	</option>
 	{%- endfor -%}
 	{%- endfor -%}
 </select>
 </select>

+ 4 - 4
searx/templates/simple/preferences.html

@@ -115,10 +115,10 @@
       <legend id="pref_language">{{ _('Search language') }}</legend>
       <legend id="pref_language">{{ _('Search language') }}</legend>
       <p class="value">{{- '' -}}
       <p class="value">{{- '' -}}
         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
-          <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-          <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }}</option>
-          {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
-          <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option>
+          <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }} [all]</option>
+          <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }} [auto]</option>
+          {%- for sxng_tag,lang_name,country_name,english_name,flag in sxng_locales | sort(attribute=1) -%}
+          <option value="{{ sxng_tag }}" {% if sxng_tag == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %} - {{ country_name }} {% endif %} [{{sxng_tag}}]</option>
           {%- endfor -%}
           {%- endfor -%}
         </select>{{- '' -}}
         </select>{{- '' -}}
       </p>
       </p>

+ 5 - 93
searx/utils.py

@@ -18,13 +18,11 @@ from urllib.parse import urljoin, urlparse
 
 
 from lxml import html
 from lxml import html
 from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
 from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
-from babel.core import get_global
-
 
 
 from searx import settings
 from searx import settings
 from searx.data import USER_AGENTS, data_dir
 from searx.data import USER_AGENTS, data_dir
 from searx.version import VERSION_TAG
 from searx.version import VERSION_TAG
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
 from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
 from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
 from searx import logger
 from searx import logger
 
 
@@ -53,8 +51,8 @@ _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
 _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
 _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
 """fasttext model to predict laguage of a search term"""
 """fasttext model to predict laguage of a search term"""
 
 
-SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in language_codes])
-"""Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`)."""
+SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales])
+"""Languages supported by most searxng engines (:py:obj:`searx.sxng_locales.sxng_locales`)."""
 
 
 
 
 class _NotSetClass:  # pylint: disable=too-few-public-methods
 class _NotSetClass:  # pylint: disable=too-few-public-methods
@@ -355,102 +353,16 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
     is_abbr = len(lang) == 2
     is_abbr = len(lang) == 2
     lang = lang.lower()
     lang = lang.lower()
     if is_abbr:
     if is_abbr:
-        for l in language_codes:
+        for l in sxng_locales:
             if l[0][:2] == lang:
             if l[0][:2] == lang:
                 return (True, l[0][:2], l[3].lower())
                 return (True, l[0][:2], l[3].lower())
         return None
         return None
-    for l in language_codes:
+    for l in sxng_locales:
         if l[1].lower() == lang or l[3].lower() == lang:
         if l[1].lower() == lang or l[3].lower() == lang:
             return (True, l[0][:2], l[3].lower())
             return (True, l[0][:2], l[3].lower())
     return None
     return None
 
 
 
 
-def _get_lang_to_lc_dict(lang_list: List[str]) -> Dict[str, str]:
-    key = str(lang_list)
-    value = _LANG_TO_LC_CACHE.get(key, None)
-    if value is None:
-        value = {}
-        for lang in lang_list:
-            value.setdefault(lang.split('-')[0], lang)
-        _LANG_TO_LC_CACHE[key] = value
-    return value
-
-
-# babel's get_global contains all sorts of miscellaneous locale and territory related data
-# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py
-def _get_from_babel(lang_code: str, key):
-    match = get_global(key).get(lang_code.replace('-', '_'))
-    # for some keys, such as territory_aliases, match may be a list
-    if isinstance(match, str):
-        return match.replace('_', '-')
-    return match
-
-
-def _match_language(lang_code: str, lang_list=[], custom_aliases={}) -> Optional[str]:  # pylint: disable=W0102
-    """auxiliary function to match lang_code in lang_list"""
-    # replace language code with a custom alias if necessary
-    if lang_code in custom_aliases:
-        lang_code = custom_aliases[lang_code]
-
-    if lang_code in lang_list:
-        return lang_code
-
-    # try to get the most likely country for this language
-    subtags = _get_from_babel(lang_code, 'likely_subtags')
-    if subtags:
-        if subtags in lang_list:
-            return subtags
-        subtag_parts = subtags.split('-')
-        new_code = subtag_parts[0] + '-' + subtag_parts[-1]
-        if new_code in custom_aliases:
-            new_code = custom_aliases[new_code]
-        if new_code in lang_list:
-            return new_code
-
-    # try to get the any supported country for this language
-    return _get_lang_to_lc_dict(lang_list).get(lang_code)
-
-
-def match_language(  # pylint: disable=W0102
-    locale_code, lang_list=[], custom_aliases={}, fallback: Optional[str] = 'en-US'
-) -> Optional[str]:
-    """get the language code from lang_list that best matches locale_code"""
-    # try to get language from given locale_code
-    language = _match_language(locale_code, lang_list, custom_aliases)
-    if language:
-        return language
-
-    locale_parts = locale_code.split('-')
-    lang_code = locale_parts[0]
-
-    # if locale_code has script, try matching without it
-    if len(locale_parts) > 2:
-        language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases)
-        if language:
-            return language
-
-    # try to get language using an equivalent country code
-    if len(locale_parts) > 1:
-        country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases')
-        if country_alias:
-            language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
-            if language:
-                return language
-
-    # try to get language using an equivalent language code
-    alias = _get_from_babel(lang_code, 'language_aliases')
-    if alias:
-        language = _match_language(alias, lang_list, custom_aliases)
-        if language:
-            return language
-
-    if lang_code != locale_code:
-        # try to get language from given language without giving the country
-        language = _match_language(lang_code, lang_list, custom_aliases)
-
-    return language or fallback
-
-
 def load_module(filename: str, module_dir: str) -> types.ModuleType:
 def load_module(filename: str, module_dir: str) -> types.ModuleType:
     modname = splitext(filename)[0]
     modname = splitext(filename)[0]
     modpath = join(module_dir, filename)
     modpath = join(module_dir, filename)

+ 33 - 40
searx/webapp.py

@@ -89,7 +89,6 @@ from searx.utils import (
     html_to_text,
     html_to_text,
     gen_useragent,
     gen_useragent,
     dict_subset,
     dict_subset,
-    match_language,
 )
 )
 from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
 from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
 from searx.query import RawTextQuery
 from searx.query import RawTextQuery
@@ -117,12 +116,13 @@ from searx.locales import (
     RTL_LOCALES,
     RTL_LOCALES,
     localeselector,
     localeselector,
     locales_initialize,
     locales_initialize,
+    match_locale,
 )
 )
 
 
 # renaming names from searx imports ...
 # renaming names from searx imports ...
 from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
 from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
-from searx.languages import language_codes as languages
 from searx.redisdb import initialize as redis_initialize
 from searx.redisdb import initialize as redis_initialize
+from searx.sxng_locales import sxng_locales
 from searx.search import SearchWithPlugins, initialize as search_initialize
 from searx.search import SearchWithPlugins, initialize as search_initialize
 from searx.network import stream as http_stream, set_context_network_name
 from searx.network import stream as http_stream, set_context_network_name
 from searx.search.checker import get_result as checker_get_result
 from searx.search.checker import get_result as checker_get_result
@@ -227,7 +227,7 @@ def _get_browser_language(req, lang_list):
         if '-' in lang:
         if '-' in lang:
             lang_parts = lang.split('-')
             lang_parts = lang.split('-')
             lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper())
             lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper())
-        locale = match_language(lang, lang_list, fallback=None)
+        locale = match_locale(lang, lang_list, fallback=None)
         if locale is not None:
         if locale is not None:
             return locale
             return locale
     return 'en'
     return 'en'
@@ -407,7 +407,7 @@ def get_client_settings():
 
 
 
 
 def render(template_name: str, **kwargs):
 def render(template_name: str, **kwargs):
-
+    # pylint: disable=too-many-statements
     kwargs['client_settings'] = str(
     kwargs['client_settings'] = str(
         base64.b64encode(
         base64.b64encode(
             bytes(
             bytes(
@@ -438,17 +438,20 @@ def render(template_name: str, **kwargs):
     kwargs['OTHER_CATEGORY'] = OTHER_CATEGORY
     kwargs['OTHER_CATEGORY'] = OTHER_CATEGORY
 
 
     # i18n
     # i18n
-    kwargs['language_codes'] = [l for l in languages if l[0] in settings['search']['languages']]
+    kwargs['sxng_locales'] = [l for l in sxng_locales if l[0] in settings['search']['languages']]
 
 
     locale = request.preferences.get_value('locale')
     locale = request.preferences.get_value('locale')
     kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale)
     kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale)
 
 
     if locale in RTL_LOCALES and 'rtl' not in kwargs:
     if locale in RTL_LOCALES and 'rtl' not in kwargs:
         kwargs['rtl'] = True
         kwargs['rtl'] = True
+
     if 'current_language' not in kwargs:
     if 'current_language' not in kwargs:
-        kwargs['current_language'] = match_language(
-            request.preferences.get_value('language'), settings['search']['languages']
-        )
+        _locale = request.preferences.get_value('language')
+        if _locale in ('auto', 'all'):
+            kwargs['current_language'] = _locale
+        else:
+            kwargs['current_language'] = match_locale(_locale, settings['search']['languages'])
 
 
     # values from settings
     # values from settings
     kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
     kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
@@ -810,6 +813,13 @@ def search():
         )
         )
     )
     )
 
 
+    if search_query.lang in ('auto', 'all'):
+        current_language = search_query.lang
+    else:
+        current_language = match_locale(
+            search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language")
+        )
+
     # search_query.lang contains the user choice (all, auto, en, ...)
     # search_query.lang contains the user choice (all, auto, en, ...)
     # when the user choice is "auto", search.search_query.lang contains the detected language
     # when the user choice is "auto", search.search_query.lang contains the detected language
     # otherwise it is equals to search_query.lang
     # otherwise it is equals to search_query.lang
@@ -832,12 +842,8 @@ def search():
             result_container.unresponsive_engines
             result_container.unresponsive_engines
         ),
         ),
         current_locale = request.preferences.get_value("locale"),
         current_locale = request.preferences.get_value("locale"),
-        current_language = match_language(
-            search_query.lang,
-            settings['search']['languages'],
-            fallback=request.preferences.get_value("language")
-        ),
-        search_language = match_language(
+        current_language = current_language,
+        search_language = match_locale(
             search.search_query.lang,
             search.search_query.lang,
             settings['search']['languages'],
             settings['search']['languages'],
             fallback=request.preferences.get_value("language")
             fallback=request.preferences.get_value("language")
@@ -907,16 +913,11 @@ def autocompleter():
     # and there is a query part
     # and there is a query part
     if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
     if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
 
 
-        # get language from cookie
-        language = request.preferences.get_value('language')
-        if not language or language == 'all':
-            language = 'en'
-        else:
-            language = language.split('-')[0]
+        # get SearXNG's locale and autocomplete backend from cookie
+        sxng_locale = request.preferences.get_value('language')
+        backend_name = request.preferences.get_value('autocomplete')
 
 
-        # run autocompletion
-        raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language)
-        for result in raw_results:
+        for result in search_autocomplete(backend_name, sug_prefix, sxng_locale):
             # attention: this loop will change raw_text_query object and this is
             # attention: this loop will change raw_text_query object and this is
             # the reason why the sug_prefix was stored before (see above)
             # the reason why the sug_prefix was stored before (see above)
             if result != sug_prefix:
             if result != sug_prefix:
@@ -1001,7 +1002,9 @@ def preferences():
             'rate80': rate80,
             'rate80': rate80,
             'rate95': rate95,
             'rate95': rate95,
             'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
             'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
-            'supports_selected_language': _is_selected_language_supported(e, request.preferences),
+            'supports_selected_language': e.traits.is_locale_supported(
+                str(request.preferences.get_value('language') or 'all')
+            ),
             'result_count': result_count,
             'result_count': result_count,
         }
         }
     # end of stats
     # end of stats
@@ -1052,7 +1055,9 @@ def preferences():
     # supports
     # supports
     supports = {}
     supports = {}
     for _, e in filtered_engines.items():
     for _, e in filtered_engines.items():
-        supports_selected_language = _is_selected_language_supported(e, request.preferences)
+        supports_selected_language = e.traits.is_locale_supported(
+            str(request.preferences.get_value('language') or 'all')
+        )
         safesearch = e.safesearch
         safesearch = e.safesearch
         time_range_support = e.time_range_support
         time_range_support = e.time_range_support
         for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
         for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
@@ -1099,16 +1104,6 @@ def preferences():
     )
     )
 
 
 
 
-def _is_selected_language_supported(engine, preferences: Preferences):  # pylint: disable=redefined-outer-name
-    language = preferences.get_value('language')
-    if language == 'all':
-        return True
-    x = match_language(
-        language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None
-    )
-    return bool(x)
-
-
 @app.route('/image_proxy', methods=['GET'])
 @app.route('/image_proxy', methods=['GET'])
 def image_proxy():
 def image_proxy():
     # pylint: disable=too-many-return-statements, too-many-branches
     # pylint: disable=too-many-return-statements, too-many-branches
@@ -1327,10 +1322,7 @@ def config():
         if not request.preferences.validate_token(engine):
         if not request.preferences.validate_token(engine):
             continue
             continue
 
 
-        supported_languages = engine.supported_languages
-        if isinstance(engine.supported_languages, dict):
-            supported_languages = list(engine.supported_languages.keys())
-
+        _languages = engine.traits.languages.keys()
         _engines.append(
         _engines.append(
             {
             {
                 'name': name,
                 'name': name,
@@ -1339,7 +1331,8 @@ def config():
                 'enabled': not engine.disabled,
                 'enabled': not engine.disabled,
                 'paging': engine.paging,
                 'paging': engine.paging,
                 'language_support': engine.language_support,
                 'language_support': engine.language_support,
-                'supported_languages': supported_languages,
+                'languages': list(_languages),
+                'regions': list(engine.traits.regions.keys()),
                 'safesearch': engine.safesearch,
                 'safesearch': engine.safesearch,
                 'time_range_support': engine.time_range_support,
                 'time_range_support': engine.time_range_support,
                 'timeout': engine.timeout,
                 'timeout': engine.timeout,

+ 7 - 2
searx/webutils.py

@@ -1,4 +1,6 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
+from __future__ import annotations
+
 import os
 import os
 import pathlib
 import pathlib
 import csv
 import csv
@@ -8,7 +10,7 @@ import re
 import inspect
 import inspect
 import itertools
 import itertools
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
-from typing import Iterable, List, Tuple, Dict
+from typing import Iterable, List, Tuple, Dict, TYPE_CHECKING
 
 
 from io import StringIO
 from io import StringIO
 from codecs import getincrementalencoder
 from codecs import getincrementalencoder
@@ -16,7 +18,10 @@ from codecs import getincrementalencoder
 from flask_babel import gettext, format_date
 from flask_babel import gettext, format_date
 
 
 from searx import logger, settings
 from searx import logger, settings
-from searx.engines import Engine, OTHER_CATEGORY
+from searx.engines import OTHER_CATEGORY
+
+if TYPE_CHECKING:
+    from searx.enginelib import Engine
 
 
 
 
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')

+ 4 - 4
searxng_extra/update/update_engine_descriptions.py

@@ -18,8 +18,8 @@ from os.path import join
 from lxml.html import fromstring
 from lxml.html import fromstring
 
 
 from searx.engines import wikidata, set_loggers
 from searx.engines import wikidata, set_loggers
-from searx.utils import extract_text, match_language
-from searx.locales import LOCALE_NAMES, locales_initialize
+from searx.utils import extract_text
+from searx.locales import LOCALE_NAMES, locales_initialize, match_locale
 from searx import searx_dir
 from searx import searx_dir
 from searx.utils import gen_useragent, detect_language
 from searx.utils import gen_useragent, detect_language
 import searx.search
 import searx.search
@@ -225,9 +225,9 @@ def fetch_website_description(engine_name, website):
             fetched_lang, desc = get_website_description(website, lang, WIKIPEDIA_LANGUAGES[lang])
             fetched_lang, desc = get_website_description(website, lang, WIKIPEDIA_LANGUAGES[lang])
             if fetched_lang is None or desc is None:
             if fetched_lang is None or desc is None:
                 continue
                 continue
-            matched_lang = match_language(fetched_lang, LANGUAGES, fallback=None)
+            matched_lang = match_locale(fetched_lang, LANGUAGES, fallback=None)
             if matched_lang is None:
             if matched_lang is None:
-                fetched_wikipedia_lang = match_language(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None)
+                fetched_wikipedia_lang = match_locale(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None)
                 matched_lang = wikipedia_languages_r.get(fetched_wikipedia_lang)
                 matched_lang = wikipedia_languages_r.get(fetched_wikipedia_lang)
             if matched_lang is not None:
             if matched_lang is not None:
                 update_description(engine_name, matched_lang, desc, website, replace=False)
                 update_description(engine_name, matched_lang, desc, website, replace=False)

+ 198 - 0
searxng_extra/update/update_engine_traits.py

@@ -0,0 +1,198 @@
+#!/usr/bin/env python
+# lint: pylint
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update :py:obj:`searx.enginelib.traits.EngineTraitsMap` and :origin:`searx/languages.py`
+
+:py:obj:`searx.enginelib.traits.EngineTraitsMap.ENGINE_TRAITS_FILE`:
+  Persistence of engines traits, fetched from the engines.
+
+:origin:`searx/languages.py`
+  Is generated  from intersecting each engine's supported traits.
+
+The script :origin:`searxng_extra/update/update_engine_traits.py` is called in
+the :origin:`CI Update data ... <.github/workflows/data-update.yml>`
+
+"""
+
+# pylint: disable=invalid-name
+from unicodedata import lookup
+from pathlib import Path
+from pprint import pformat
+import babel
+
+from searx import settings, searx_dir
+from searx import network
+from searx.engines import load_engines
+from searx.enginelib.traits import EngineTraitsMap
+
+# Output files.
+languages_file = Path(searx_dir) / 'sxng_locales.py'
+languages_file_header = """\
+# -*- coding: utf-8 -*-
+'''List of SearXNG's locale codes.
+
+This file is generated automatically by::
+
+   ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+'''
+
+sxng_locales = (
+"""
+languages_file_footer = """,
+)
+'''
+A list of five-digit tuples:
+
+0. SearXNG's internal locale tag (a language or region tag)
+1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
+2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
+   Empty string for language tags.
+3. English language name (from :py:obj:`babel.core.Locale.english_name`)
+4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
+   are represented by a globe (\U0001F310)
+
+.. code:: python
+
+   ('en',    'English', '',              'English', '\U0001f310'),
+   ('en-CA', 'English', 'Canada',        'English', '\U0001f1e8\U0001f1e6'),
+   ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
+   ..
+   ('fr',    'Français', '',             'French',  '\U0001f310'),
+   ('fr-BE', 'Français', 'Belgique',     'French',  '\U0001f1e7\U0001f1ea'),
+   ('fr-CA', 'Français', 'Canada',       'French',  '\U0001f1e8\U0001f1e6'),
+
+:meta hide-value:
+'''
+"""
+
+
+lang2emoji = {
+    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
+    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
+    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
+    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
+    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
+}
+
+
+def main():
+    load_engines(settings['engines'])
+    # traits_map = EngineTraitsMap.from_data()
+    traits_map = fetch_traits_map()
+    sxng_tag_list = filter_locales(traits_map)
+    write_languages_file(sxng_tag_list)
+
+
+def fetch_traits_map():
+    """Fetchs supported languages for each engine and writes json file with those."""
+    network.set_timeout_for_thread(10.0)
+
+    def log(msg):
+        print(msg)
+
+    traits_map = EngineTraitsMap.fetch_traits(log=log)
+    print("fetched properties from %s engines" % len(traits_map))
+    print("write json file: %s" % traits_map.ENGINE_TRAITS_FILE)
+    traits_map.save_data()
+    return traits_map
+
+
+def filter_locales(traits_map: EngineTraitsMap):
+    """Filter language & region tags by a threshold."""
+
+    min_eng_per_region = 11
+    min_eng_per_lang = 13
+
+    _ = {}
+    for eng in traits_map.values():
+        for reg in eng.regions.keys():
+            _[reg] = _.get(reg, 0) + 1
+
+    regions = set(k for k, v in _.items() if v >= min_eng_per_region)
+    lang_from_region = set(k.split('-')[0] for k in regions)
+
+    _ = {}
+    for eng in traits_map.values():
+        for lang in eng.languages.keys():
+            # ignore script types like zh_Hant, zh_Hans or sr_Latin, pa_Arab (they
+            # already counted by existence of 'zh' or 'sr', 'pa')
+            if '_' in lang:
+                # print("ignore %s" % lang)
+                continue
+            _[lang] = _.get(lang, 0) + 1
+
+    languages = set(k for k, v in _.items() if v >= min_eng_per_lang)
+
+    sxng_tag_list = set()
+    sxng_tag_list.update(regions)
+    sxng_tag_list.update(lang_from_region)
+    sxng_tag_list.update(languages)
+
+    return sxng_tag_list
+
+
+def write_languages_file(sxng_tag_list):
+
+    language_codes = []
+
+    for sxng_tag in sorted(sxng_tag_list):
+        sxng_locale: babel.Locale = babel.Locale.parse(sxng_tag, sep='-')
+
+        flag = get_unicode_flag(sxng_locale) or ''
+
+        item = (
+            sxng_tag,
+            sxng_locale.get_language_name().title(),
+            sxng_locale.get_territory_name() or '',
+            sxng_locale.english_name.split(' (')[0],
+            UnicodeEscape(flag),
+        )
+
+        language_codes.append(item)
+
+    language_codes = tuple(language_codes)
+
+    with open(languages_file, 'w', encoding='utf-8') as new_file:
+        file_content = "{header} {language_codes}{footer}".format(
+            header=languages_file_header,
+            language_codes=pformat(language_codes, width=120, indent=4)[1:-1],
+            footer=languages_file_footer,
+        )
+        new_file.write(file_content)
+        new_file.close()
+
+
+class UnicodeEscape(str):
+    """Escape unicode string in :py:obj:`pprint.pformat`"""
+
+    def __repr__(self):
+        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
+
+
+def get_unicode_flag(locale: babel.Locale):
+    """Determine a unicode flag (emoji) that fits to the ``locale``"""
+
+    emoji = lang2emoji.get(locale.language)
+    if emoji:
+        return emoji
+
+    if not locale.territory:
+        return '\U0001F310'
+
+    emoji = lang2emoji.get(locale.territory.lower())
+    if emoji:
+        return emoji
+
+    try:
+        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[0])
+        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[1])
+        # print("OK   : %s --> %s%s" % (locale, c1, c2))
+    except KeyError as exc:
+        print("ERROR: %s --> %s" % (locale, exc))
+        return None
+
+    return c1 + c2
+
+
+if __name__ == "__main__":
+    main()

+ 0 - 313
searxng_extra/update/update_languages.py

@@ -1,313 +0,0 @@
-#!/usr/bin/env python
-# lint: pylint
-
-# SPDX-License-Identifier: AGPL-3.0-or-later
-"""This script generates languages.py from intersecting each engine's supported
-languages.
-
-Output files: :origin:`searx/data/engines_languages.json` and
-:origin:`searx/languages.py` (:origin:`CI Update data ...
-<.github/workflows/data-update.yml>`).
-
-"""
-
-# pylint: disable=invalid-name
-from unicodedata import lookup
-import json
-from pathlib import Path
-from pprint import pformat
-from babel import Locale, UnknownLocaleError
-from babel.languages import get_global
-from babel.core import parse_locale
-
-from searx import settings, searx_dir
-from searx.engines import load_engines, engines
-from searx.network import set_timeout_for_thread
-
-# Output files.
-engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
-languages_file = Path(searx_dir) / 'languages.py'
-
-
-# Fetches supported languages for each engine and writes json file with those.
-def fetch_supported_languages():
-    set_timeout_for_thread(10.0)
-
-    engines_languages = {}
-    names = list(engines)
-    names.sort()
-
-    for engine_name in names:
-        if hasattr(engines[engine_name], 'fetch_supported_languages'):
-            engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
-            print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
-            if type(engines_languages[engine_name]) == list:  # pylint: disable=unidiomatic-typecheck
-                engines_languages[engine_name] = sorted(engines_languages[engine_name])
-
-    print("fetched languages from %s engines" % len(engines_languages))
-
-    # write json file
-    with open(engines_languages_file, 'w', encoding='utf-8') as f:
-        json.dump(engines_languages, f, indent=2, sort_keys=True)
-
-    return engines_languages
-
-
-# Get babel Locale object from lang_code if possible.
-def get_locale(lang_code):
-    try:
-        locale = Locale.parse(lang_code, sep='-')
-        return locale
-    except (UnknownLocaleError, ValueError):
-        return None
-
-
-lang2emoji = {
-    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
-    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
-    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
-    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
-    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
-}
-
-
-def get_unicode_flag(lang_code):
-    """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
-
-    emoji = lang2emoji.get(lang_code.lower())
-    if emoji:
-        return emoji
-
-    if len(lang_code) == 2:
-        return '\U0001F310'
-
-    language = territory = script = variant = ''
-    try:
-        language, territory, script, variant = parse_locale(lang_code, '-')
-    except ValueError as exc:
-        print(exc)
-
-    # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
-    if not territory:
-        # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
-        emoji = lang2emoji.get(language)
-        if not emoji:
-            print(
-                "%s --> language: %s / territory: %s / script: %s / variant: %s"
-                % (lang_code, language, territory, script, variant)
-            )
-        return emoji
-
-    emoji = lang2emoji.get(territory.lower())
-    if emoji:
-        return emoji
-
-    try:
-        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
-        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
-        # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
-    except KeyError as exc:
-        print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
-        return None
-
-    return c1 + c2
-
-
-def get_territory_name(lang_code):
-    country_name = None
-    locale = get_locale(lang_code)
-    try:
-        if locale is not None:
-            country_name = locale.get_territory_name()
-    except FileNotFoundError as exc:
-        print("ERROR: %s --> %s" % (locale, exc))
-    return country_name
-
-
-# Join all language lists.
-def join_language_lists(engines_languages):
-    language_list = {}
-    for engine_name in engines_languages:
-        for lang_code in engines_languages[engine_name]:
-
-            # apply custom fixes if necessary
-            if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
-                lang_code = next(
-                    lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias
-                )
-
-            locale = get_locale(lang_code)
-
-            # ensure that lang_code uses standard language and country codes
-            if locale and locale.territory:
-                lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
-            short_code = lang_code.split('-')[0]
-
-            # add language without country if not in list
-            if short_code not in language_list:
-                if locale:
-                    # get language's data from babel's Locale object
-                    language_name = locale.get_language_name().title()
-                    english_name = locale.english_name.split(' (')[0]
-                elif short_code in engines_languages['wikipedia']:
-                    # get language's data from wikipedia if not known by babel
-                    language_name = engines_languages['wikipedia'][short_code]['name']
-                    english_name = engines_languages['wikipedia'][short_code]['english_name']
-                else:
-                    language_name = None
-                    english_name = None
-
-                # add language to list
-                language_list[short_code] = {
-                    'name': language_name,
-                    'english_name': english_name,
-                    'counter': set(),
-                    'countries': {},
-                }
-
-            # add language with country if not in list
-            if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
-                country_name = ''
-                if locale:
-                    # get country name from babel's Locale object
-                    try:
-                        country_name = locale.get_territory_name()
-                    except FileNotFoundError as exc:
-                        print("ERROR: %s --> %s" % (locale, exc))
-                        locale = None
-
-                language_list[short_code]['countries'][lang_code] = {
-                    'country_name': country_name,
-                    'counter': set(),
-                }
-
-            # count engine for both language_country combination and language alone
-            language_list[short_code]['counter'].add(engine_name)
-            if lang_code != short_code:
-                language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
-
-    return language_list
-
-
-# Filter language list so it only includes the most supported languages and countries
-def filter_language_list(all_languages):
-    min_engines_per_lang = 12
-    min_engines_per_country = 7
-    # pylint: disable=consider-using-dict-items, consider-iterating-dictionary
-    main_engines = [
-        engine_name
-        for engine_name in engines.keys()
-        if 'general' in engines[engine_name].categories
-        and engines[engine_name].supported_languages
-        and not engines[engine_name].disabled
-    ]
-
-    # filter list to include only languages supported by most engines or all default general engines
-    filtered_languages = {
-        code: lang
-        for code, lang in all_languages.items()
-        if (
-            len(lang['counter']) >= min_engines_per_lang
-            or all(main_engine in lang['counter'] for main_engine in main_engines)
-        )
-    }
-
-    def _copy_lang_data(lang, country_name=None):
-        new_dict = {}
-        new_dict['name'] = all_languages[lang]['name']
-        new_dict['english_name'] = all_languages[lang]['english_name']
-        if country_name:
-            new_dict['country_name'] = country_name
-        return new_dict
-
-    # for each language get country codes supported by most engines or at least one country code
-    filtered_languages_with_countries = {}
-    for lang, lang_data in filtered_languages.items():
-        countries = lang_data['countries']
-        filtered_countries = {}
-
-        # get language's country codes with enough supported engines
-        for lang_country, country_data in countries.items():
-            if len(country_data['counter']) >= min_engines_per_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name'])
-
-        # add language without countries too if there's more than one country to choose from
-        if len(filtered_countries) > 1:
-            filtered_countries[lang] = _copy_lang_data(lang, None)
-        elif len(filtered_countries) == 1:
-            lang_country = next(iter(filtered_countries))
-
-        # if no country has enough engines try to get most likely country code from babel
-        if not filtered_countries:
-            lang_country = None
-            subtags = get_global('likely_subtags').get(lang)
-            if subtags:
-                country_code = subtags.split('_')[-1]
-                if len(country_code) == 2:
-                    lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
-
-            if lang_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang, None)
-            else:
-                filtered_countries[lang] = _copy_lang_data(lang, None)
-
-        filtered_languages_with_countries.update(filtered_countries)
-
-    return filtered_languages_with_countries
-
-
-class UnicodeEscape(str):
-    """Escape unicode string in :py:obj:`pprint.pformat`"""
-
-    def __repr__(self):
-        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
-
-
-# Write languages.py.
-def write_languages_file(languages):
-    file_headers = (
-        "# -*- coding: utf-8 -*-",
-        "# list of language codes",
-        "# this file is generated automatically by utils/fetch_languages.py",
-        "language_codes = (\n",
-    )
-
-    language_codes = []
-
-    for code in sorted(languages):
-
-        name = languages[code]['name']
-        if name is None:
-            print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
-            continue
-
-        flag = get_unicode_flag(code) or ''
-        item = (
-            code,
-            languages[code]['name'].split(' (')[0],
-            get_territory_name(code) or '',
-            languages[code].get('english_name') or '',
-            UnicodeEscape(flag),
-        )
-
-        language_codes.append(item)
-
-    language_codes = tuple(language_codes)
-
-    with open(languages_file, 'w', encoding='utf-8') as new_file:
-        file_content = "{file_headers} {language_codes},\n)\n".format(
-            # fmt: off
-            file_headers = '\n'.join(file_headers),
-            language_codes = pformat(language_codes, indent=4)[1:-1]
-            # fmt: on
-        )
-        new_file.write(file_content)
-        new_file.close()
-
-
-if __name__ == "__main__":
-    load_engines(settings['engines'])
-    _engines_languages = fetch_supported_languages()
-    _all_languages = join_language_lists(_engines_languages)
-    _filtered_languages = filter_language_list(_all_languages)
-    write_languages_file(_filtered_languages)

+ 2 - 2
searxng_extra/update/update_osm_keys_tags.py

@@ -50,7 +50,7 @@ from pathlib import Path
 from searx import searx_dir
 from searx import searx_dir
 from searx.network import set_timeout_for_thread
 from searx.network import set_timeout_for_thread
 from searx.engines import wikidata, set_loggers
 from searx.engines import wikidata, set_loggers
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
 from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
 from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
 
 
 set_loggers(wikidata, 'wikidata')
 set_loggers(wikidata, 'wikidata')
@@ -76,7 +76,7 @@ GROUP BY ?key ?item ?itemLabel
 ORDER BY ?key ?item ?itemLabel
 ORDER BY ?key ?item ?itemLabel
 """
 """
 
 
-LANGUAGES = [l[0].lower() for l in language_codes]
+LANGUAGES = [l[0].lower() for l in sxng_locales]
 
 
 PRESET_KEYS = {
 PRESET_KEYS = {
     ('wikidata',): {'en': 'Wikidata'},
     ('wikidata',): {'en': 'Wikidata'},

+ 111 - 0
tests/unit/test_locales.py

@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Test some code from module :py:obj:`searx.locales`"""
+
+from searx import locales
+from searx.sxng_locales import sxng_locales
+from tests import SearxTestCase
+
+
+class TestLocales(SearxTestCase):
+    """Implemented tests:
+
+    - :py:obj:`searx.locales.match_locale`
+    """
+
+    def test_match_locale(self):
+
+        locale_tag_list = [x[0] for x in sxng_locales]
+
+        # Test SearXNG search languages
+
+        self.assertEqual(locales.match_locale('de', locale_tag_list), 'de')
+        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr')
+        self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh')
+
+        # Test SearXNG search regions
+
+        self.assertEqual(locales.match_locale('ca-es', locale_tag_list), 'ca-ES')
+        self.assertEqual(locales.match_locale('de-at', locale_tag_list), 'de-AT')
+        self.assertEqual(locales.match_locale('de-de', locale_tag_list), 'de-DE')
+        self.assertEqual(locales.match_locale('en-UK', locale_tag_list), 'en-GB')
+        self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
+        self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
+        self.assertEqual(locales.match_locale('fr-ca', locale_tag_list), 'fr-CA')
+        self.assertEqual(locales.match_locale('fr-ch', locale_tag_list), 'fr-CH')
+        self.assertEqual(locales.match_locale('zh-cn', locale_tag_list), 'zh-CN')
+        self.assertEqual(locales.match_locale('zh-tw', locale_tag_list), 'zh-TW')
+        self.assertEqual(locales.match_locale('zh-hk', locale_tag_list), 'zh-HK')
+
+        # Test language script code
+
+        self.assertEqual(locales.match_locale('zh-hans', locale_tag_list), 'zh-CN')
+        self.assertEqual(locales.match_locale('zh-hans-cn', locale_tag_list), 'zh-CN')
+        self.assertEqual(locales.match_locale('zh-hant', locale_tag_list), 'zh-TW')
+        self.assertEqual(locales.match_locale('zh-hant-tw', locale_tag_list), 'zh-TW')
+
+        # Test individual locale lists
+
+        self.assertEqual(locales.match_locale('es', [], fallback='fallback'), 'fallback')
+
+        self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
+        self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
+        self.assertEqual(locales.match_locale('es', ['ES']), 'ES')
+        self.assertEqual(locales.match_locale('es', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
+        self.assertEqual(locales.match_locale('es-AR', ['es-AR', 'es-ES', 'es-MX']), 'es-AR')
+        self.assertEqual(locales.match_locale('es-CO', ['es-AR', 'es-ES']), 'es-ES')
+        self.assertEqual(locales.match_locale('es-CO', ['es-AR']), 'es-AR')
+
+        # Tests from the commit message of 9ae409a05a
+
+        # Assumption:
+        #   A. When a user selects a language the results should be optimized according to
+        #      the selected language.
+        #
+        #   B. When user selects a language and a territory the results should be
+        #      optimized with first priority on territory and second on language.
+
+        # Assume we have an engine that supports the follwoing locales:
+        locale_tag_list = ['zh-CN', 'zh-HK', 'nl-BE', 'fr-CA']
+
+        # Examples (Assumption A.)
+        # ------------------------
+
+        # A user selects region 'zh-TW' which should end in zh_HK.
+        # hint: CN is 'Hans' and HK ('Hant') fits better to TW ('Hant')
+        self.assertEqual(locales.match_locale('zh-TW', locale_tag_list), 'zh-HK')
+
+        # A user selects only the language 'zh' which should end in CN
+        self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh-CN')
+
+        # A user selects only the language 'fr' which should end in fr_CA
+        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-CA')
+
+        # The difference in priority on the territory is best shown with a
+        # engine that supports the following locales:
+        locale_tag_list = ['fr-FR', 'fr-CA', 'en-GB', 'nl-BE']
+
+        # A user selects only a language
+        self.assertEqual(locales.match_locale('en', locale_tag_list), 'en-GB')
+
+        # hint: the engine supports fr_FR and fr_CA since no territory is given,
+        # fr_FR takes priority ..
+        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-FR')
+
+        # Examples (Assumption B.)
+        # ------------------------
+
+        #  A user selects region 'fr-BE' which should end in nl-BE
+        self.assertEqual(locales.match_locale('fr-BE', locale_tag_list), 'nl-BE')
+
+        # If the user selects a language and there are two locales like the
+        # following:
+
+        locale_tag_list = ['fr-BE', 'fr-CH']
+
+        # The get_engine_locale selects the locale by looking at the "population
+        # percent" and this percentage has an higher amount in BE (68.%)
+        # compared to CH (21%)
+
+        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-BE')

+ 0 - 33
tests/unit/test_utils.py

@@ -87,39 +87,6 @@ class TestUtils(SearxTestCase):
         html = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
         html = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
         self.assertEqual(utils.html_to_text(html), "Lorem ipsum")
         self.assertEqual(utils.html_to_text(html), "Lorem ipsum")
 
 
-    def test_match_language(self):
-        self.assertEqual(utils.match_language('es', ['es']), 'es')
-        self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
-        self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
-
-        # handle script tags
-        self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN')
-        self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW')
-        self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN')
-        self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW')
-        self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN')
-        self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW')
-
-        aliases = {'en-GB': 'en-UK', 'he': 'iw'}
-
-        # guess country
-        self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
-        self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
-        self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
-        self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
-        self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
-        self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')
-
-        # language aliases
-        self.assertEqual(utils.match_language('iw', ['he']), 'he')
-        self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
-        self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
-        self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
-        self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
-        self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
-        self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
-        self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
-
     def test_ecma_unscape(self):
     def test_ecma_unscape(self):
         self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
         self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
         self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó')
         self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó')

+ 0 - 3
utils/templates/etc/searxng/settings.yml

@@ -52,9 +52,6 @@ enabled_plugins:
 
 
 engines:
 engines:
 
 
-  - name: google
-    use_mobile_ui: true
-
 #   - name: fdroid
 #   - name: fdroid
 #     disabled: false
 #     disabled: false
 #
 #