Browse Source

[mod] qwant: fetch engine traits (data_type: traits_v1)

Implements a fetch_traits function for the Qwant engines.

.. note::

   Includes migration of the request methode from 'supported_languages' to
   'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 2 years ago
parent
commit
c1ae2ef57c
3 changed files with 45 additions and 46 deletions
  1. 7 7
      searx/autocomplete.py
  2. 16 16
      searx/data/engine_traits.json
  3. 22 23
      searx/engines/qwant.py

+ 7 - 7
searx/autocomplete.py

@@ -126,16 +126,16 @@ def swisscows(query, _lang):
     return resp
     return resp
 
 
 
 
-def qwant(query, lang):
-    # qwant autocompleter (additional parameter : lang=en_en&count=xxx )
-    url = 'https://api.qwant.com/api/suggest?{query}'
-
-    resp = get(url.format(query=urlencode({'q': query, 'lang': lang})))
-
+def qwant(query, sxng_locale):
+    """Autocomplete from Qwant. Supports Qwant's regions."""
     results = []
     results = []
 
 
+    locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
+    url = 'https://api.qwant.com/v3/suggest?{query}'
+    resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
+
     if resp.ok:
     if resp.ok:
-        data = loads(resp.text)
+        data = resp.json()
         if data['status'] == 'success':
         if data['status'] == 'success':
             for item in data['data']['items']:
             for item in data['data']['items']:
                 results.append(item['value'])
                 results.append(item['value'])

+ 16 - 16
searx/data/engine_traits.json

@@ -3107,10 +3107,9 @@
   "qwant": {
   "qwant": {
     "all_locale": null,
     "all_locale": null,
     "custom": {},
     "custom": {},
-    "data_type": "supported_languages",
+    "data_type": "traits_v1",
     "languages": {},
     "languages": {},
-    "regions": {},
-    "supported_languages": {
+    "regions": {
       "bg-BG": "bg_BG",
       "bg-BG": "bg_BG",
       "ca-ES": "ca_ES",
       "ca-ES": "ca_ES",
       "cs-CZ": "cs_CZ",
       "cs-CZ": "cs_CZ",
@@ -3150,15 +3149,15 @@
       "th-TH": "th_TH",
       "th-TH": "th_TH",
       "zh-CN": "zh_CN",
       "zh-CN": "zh_CN",
       "zh-HK": "zh_HK"
       "zh-HK": "zh_HK"
-    }
+    },
+    "supported_languages": {}
   },
   },
   "qwant images": {
   "qwant images": {
     "all_locale": null,
     "all_locale": null,
     "custom": {},
     "custom": {},
-    "data_type": "supported_languages",
+    "data_type": "traits_v1",
     "languages": {},
     "languages": {},
-    "regions": {},
-    "supported_languages": {
+    "regions": {
       "bg-BG": "bg_BG",
       "bg-BG": "bg_BG",
       "ca-ES": "ca_ES",
       "ca-ES": "ca_ES",
       "cs-CZ": "cs_CZ",
       "cs-CZ": "cs_CZ",
@@ -3198,15 +3197,15 @@
       "th-TH": "th_TH",
       "th-TH": "th_TH",
       "zh-CN": "zh_CN",
       "zh-CN": "zh_CN",
       "zh-HK": "zh_HK"
       "zh-HK": "zh_HK"
-    }
+    },
+    "supported_languages": {}
   },
   },
   "qwant news": {
   "qwant news": {
     "all_locale": null,
     "all_locale": null,
     "custom": {},
     "custom": {},
-    "data_type": "supported_languages",
+    "data_type": "traits_v1",
     "languages": {},
     "languages": {},
-    "regions": {},
-    "supported_languages": {
+    "regions": {
       "ca-ES": "ca_ES",
       "ca-ES": "ca_ES",
       "de-AT": "de_AT",
       "de-AT": "de_AT",
       "de-CH": "de_CH",
       "de-CH": "de_CH",
@@ -3231,15 +3230,15 @@
       "nl-BE": "nl_BE",
       "nl-BE": "nl_BE",
       "nl-NL": "nl_NL",
       "nl-NL": "nl_NL",
       "pt-PT": "pt_PT"
       "pt-PT": "pt_PT"
-    }
+    },
+    "supported_languages": {}
   },
   },
   "qwant videos": {
   "qwant videos": {
     "all_locale": null,
     "all_locale": null,
     "custom": {},
     "custom": {},
-    "data_type": "supported_languages",
+    "data_type": "traits_v1",
     "languages": {},
     "languages": {},
-    "regions": {},
-    "supported_languages": {
+    "regions": {
       "bg-BG": "bg_BG",
       "bg-BG": "bg_BG",
       "ca-ES": "ca_ES",
       "ca-ES": "ca_ES",
       "cs-CZ": "cs_CZ",
       "cs-CZ": "cs_CZ",
@@ -3279,7 +3278,8 @@
       "th-TH": "th_TH",
       "th-TH": "th_TH",
       "zh-CN": "zh_CN",
       "zh-CN": "zh_CN",
       "zh-HK": "zh_HK"
       "zh-HK": "zh_HK"
-    }
+    },
+    "supported_languages": {}
   },
   },
   "startpage": {
   "startpage": {
     "all_locale": null,
     "all_locale": null,

+ 22 - 23
searx/engines/qwant.py

@@ -34,7 +34,9 @@ import babel
 
 
 from searx.exceptions import SearxEngineAPIException
 from searx.exceptions import SearxEngineAPIException
 from searx.network import raise_for_httperror
 from searx.network import raise_for_httperror
-from searx.locales import get_engine_locale
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
 
 # about
 # about
 about = {
 about = {
@@ -49,7 +51,6 @@ about = {
 # engine dependent config
 # engine dependent config
 categories = []
 categories = []
 paging = True
 paging = True
-supported_languages_url = about['website']
 qwant_categ = None  # web|news|inages|videos
 qwant_categ = None  # web|news|inages|videos
 
 
 safesearch = True
 safesearch = True
@@ -95,7 +96,7 @@ def request(query, params):
     )
     )
 
 
     # add quant's locale
     # add quant's locale
-    q_locale = get_engine_locale(params['language'], supported_languages, default='en_US')
+    q_locale = traits.get_region(params["searxng_locale"], default='en_US')
     params['url'] += '&locale=' + q_locale
     params['url'] += '&locale=' + q_locale
 
 
     # add safesearch option
     # add safesearch option
@@ -243,15 +244,20 @@ def response(resp):
     return results
     return results
 
 
 
 
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
+
+    # pylint: disable=import-outside-toplevel
+    from searx import network
+    from searx.locales import region_tag
 
 
+    resp = network.get(about['website'])
     text = resp.text
     text = resp.text
     text = text[text.find('INITIAL_PROPS') :]
     text = text[text.find('INITIAL_PROPS') :]
     text = text[text.find('{') : text.find('</script>')]
     text = text[text.find('{') : text.find('</script>')]
 
 
     q_initial_props = loads(text)
     q_initial_props = loads(text)
     q_locales = q_initial_props.get('locales')
     q_locales = q_initial_props.get('locales')
-    q_valid_locales = []
+    eng_tag_list = set()
 
 
     for country, v in q_locales.items():
     for country, v in q_locales.items():
         for lang in v['langs']:
         for lang in v['langs']:
@@ -261,25 +267,18 @@ def _fetch_supported_languages(resp):
                 # qwant-news does not support all locales from qwant-web:
                 # qwant-news does not support all locales from qwant-web:
                 continue
                 continue
 
 
-            q_valid_locales.append(_locale)
-
-    supported_languages = {}
+            eng_tag_list.add(_locale)
 
 
-    for q_locale in q_valid_locales:
+    for eng_tag in eng_tag_list:
         try:
         try:
-            locale = babel.Locale.parse(q_locale, sep='_')
-        except babel.core.UnknownLocaleError:
-            print("ERROR: can't determine babel locale of quant's locale %s" % q_locale)
+            sxng_tag = region_tag(babel.Locale.parse(eng_tag, sep='_'))
+        except babel.UnknownLocaleError:
+            print("ERROR: can't determine babel locale of quant's locale %s" % eng_tag)
             continue
             continue
 
 
-        # note: supported_languages (dict)
-        #
-        #   dict's key is a string build up from a babel.Locale object / the
-        #   notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and
-        #   language) notation and dict's values are the locale strings used by
-        #   the engine.
-
-        searxng_locale = locale.language + '-' + locale.territory  # --> params['language']
-        supported_languages[searxng_locale] = q_locale
-
-    return supported_languages
+        conflict = engine_traits.regions.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.regions[sxng_tag] = eng_tag