Browse Source

[mod] remove obsolete EngineTraits.supported_languages

All engines has been migrated from ``supported_languages`` to the
``fetch_traits`` concept.  There is no longer a need for the obsolete code that
implements the ``supported_languages`` concept.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 2 years ago
parent
commit
4d4aa13e1f

+ 0 - 11
searx/autocomplete.py

@@ -19,9 +19,6 @@ from searx.engines import (
 from searx.network import get as http_get
 from searx.network import get as http_get
 from searx.exceptions import SearxEngineResponseException
 from searx.exceptions import SearxEngineResponseException
 
 
-# a fetch_supported_languages() for XPath engines isn't available right now
-# _brave = ENGINES_LANGUAGES['brave'].keys()
-
 
 
 def get(*args, **kwargs):
 def get(*args, **kwargs):
     if 'timeout' not in kwargs:
     if 'timeout' not in kwargs:
@@ -225,14 +222,6 @@ def search_autocomplete(backend_name, query, sxng_locale):
     backend = backends.get(backend_name)
     backend = backends.get(backend_name)
     if backend is None:
     if backend is None:
         return []
         return []
-
-    if engines[backend_name].traits.data_type != "traits_v1":
-        # vintage / deprecated
-        if not sxng_locale or sxng_locale == 'all':
-            sxng_locale = 'en'
-        else:
-            sxng_locale = sxng_locale.split('-')[0]
-
     try:
     try:
         return backend(query, sxng_locale)
         return backend(query, sxng_locale)
     except (HTTPError, SearxEngineResponseException):
     except (HTTPError, SearxEngineResponseException):

+ 24 - 48
searx/data/engine_traits.json

@@ -49,8 +49,7 @@
       "uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430",
       "uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430",
       "zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09"
       "zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09"
     },
     },
-    "regions": {},
-    "supported_languages": {}
+    "regions": {}
   },
   },
   "bing": {
   "bing": {
     "all_locale": null,
     "all_locale": null,
@@ -146,8 +145,7 @@
       "zh-CN": "zh-CN",
       "zh-CN": "zh-CN",
       "zh-HK": "zh-HK",
       "zh-HK": "zh-HK",
       "zh-TW": "zh-TW"
       "zh-TW": "zh-TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "bing images": {
   "bing images": {
     "all_locale": null,
     "all_locale": null,
@@ -243,8 +241,7 @@
       "zh-CN": "zh-CN",
       "zh-CN": "zh-CN",
       "zh-HK": "zh-HK",
       "zh-HK": "zh-HK",
       "zh-TW": "zh-TW"
       "zh-TW": "zh-TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "bing news": {
   "bing news": {
     "all_locale": "en-WW",
     "all_locale": "en-WW",
@@ -316,8 +313,7 @@
       "it-IT": "it-IT",
       "it-IT": "it-IT",
       "pt-BR": "pt-BR",
       "pt-BR": "pt-BR",
       "zh-CN": "zh-CN"
       "zh-CN": "zh-CN"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "bing videos": {
   "bing videos": {
     "all_locale": null,
     "all_locale": null,
@@ -413,8 +409,7 @@
       "zh-CN": "zh-CN",
       "zh-CN": "zh-CN",
       "zh-HK": "zh-HK",
       "zh-HK": "zh-HK",
       "zh-TW": "zh-TW"
       "zh-TW": "zh-TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "dailymotion": {
   "dailymotion": {
     "all_locale": null,
     "all_locale": null,
@@ -491,8 +486,7 @@
       "vi-VN": "vi_VN",
       "vi-VN": "vi_VN",
       "zh-CN": "zh_CN",
       "zh-CN": "zh_CN",
       "zh-TW": "zh_TW"
       "zh-TW": "zh_TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "duckduckgo": {
   "duckduckgo": {
     "all_locale": "wt-wt",
     "all_locale": "wt-wt",
@@ -656,8 +650,7 @@
       "zh-CN": "cn-zh",
       "zh-CN": "cn-zh",
       "zh-HK": "hk-tzh",
       "zh-HK": "hk-tzh",
       "zh-TW": "tw-tzh"
       "zh-TW": "tw-tzh"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "duckduckgo images": {
   "duckduckgo images": {
     "all_locale": "wt-wt",
     "all_locale": "wt-wt",
@@ -821,8 +814,7 @@
       "zh-CN": "cn-zh",
       "zh-CN": "cn-zh",
       "zh-HK": "hk-tzh",
       "zh-HK": "hk-tzh",
       "zh-TW": "tw-tzh"
       "zh-TW": "tw-tzh"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "duckduckgo weather": {
   "duckduckgo weather": {
     "all_locale": "wt-wt",
     "all_locale": "wt-wt",
@@ -986,8 +978,7 @@
       "zh-CN": "cn-zh",
       "zh-CN": "cn-zh",
       "zh-HK": "hk-tzh",
       "zh-HK": "hk-tzh",
       "zh-TW": "tw-tzh"
       "zh-TW": "tw-tzh"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "google": {
   "google": {
     "all_locale": "ZZ",
     "all_locale": "ZZ",
@@ -1439,8 +1430,7 @@
       "zh-HK": "HK",
       "zh-HK": "HK",
       "zh-SG": "SG",
       "zh-SG": "SG",
       "zh-TW": "TW"
       "zh-TW": "TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "google images": {
   "google images": {
     "all_locale": "ZZ",
     "all_locale": "ZZ",
@@ -1892,8 +1882,7 @@
       "zh-HK": "HK",
       "zh-HK": "HK",
       "zh-SG": "SG",
       "zh-SG": "SG",
       "zh-TW": "TW"
       "zh-TW": "TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "google news": {
   "google news": {
     "all_locale": "ZZ",
     "all_locale": "ZZ",
@@ -2238,8 +2227,7 @@
       "zh-HK": "HK",
       "zh-HK": "HK",
       "zh-SG": "SG",
       "zh-SG": "SG",
       "zh-TW": "TW"
       "zh-TW": "TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "google scholar": {
   "google scholar": {
     "all_locale": "ZZ",
     "all_locale": "ZZ",
@@ -2691,8 +2679,7 @@
       "zh-HK": "HK",
       "zh-HK": "HK",
       "zh-SG": "SG",
       "zh-SG": "SG",
       "zh-TW": "TW"
       "zh-TW": "TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "google videos": {
   "google videos": {
     "all_locale": "ZZ",
     "all_locale": "ZZ",
@@ -3144,8 +3131,7 @@
       "zh-HK": "HK",
       "zh-HK": "HK",
       "zh-SG": "SG",
       "zh-SG": "SG",
       "zh-TW": "TW"
       "zh-TW": "TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "peertube": {
   "peertube": {
     "all_locale": null,
     "all_locale": null,
@@ -3174,8 +3160,7 @@
       "zh_Hans": "zh",
       "zh_Hans": "zh",
       "zh_Hant": "zh"
       "zh_Hant": "zh"
     },
     },
-    "regions": {},
-    "supported_languages": {}
+    "regions": {}
   },
   },
   "qwant": {
   "qwant": {
     "all_locale": null,
     "all_locale": null,
@@ -3222,8 +3207,7 @@
       "th-TH": "th_TH",
       "th-TH": "th_TH",
       "zh-CN": "zh_CN",
       "zh-CN": "zh_CN",
       "zh-HK": "zh_HK"
       "zh-HK": "zh_HK"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "qwant images": {
   "qwant images": {
     "all_locale": null,
     "all_locale": null,
@@ -3270,8 +3254,7 @@
       "th-TH": "th_TH",
       "th-TH": "th_TH",
       "zh-CN": "zh_CN",
       "zh-CN": "zh_CN",
       "zh-HK": "zh_HK"
       "zh-HK": "zh_HK"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "qwant news": {
   "qwant news": {
     "all_locale": null,
     "all_locale": null,
@@ -3303,8 +3286,7 @@
       "nl-BE": "nl_BE",
       "nl-BE": "nl_BE",
       "nl-NL": "nl_NL",
       "nl-NL": "nl_NL",
       "pt-PT": "pt_PT"
       "pt-PT": "pt_PT"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "qwant videos": {
   "qwant videos": {
     "all_locale": null,
     "all_locale": null,
@@ -3351,8 +3333,7 @@
       "th-TH": "th_TH",
       "th-TH": "th_TH",
       "zh-CN": "zh_CN",
       "zh-CN": "zh_CN",
       "zh-HK": "zh_HK"
       "zh-HK": "zh_HK"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "sepiasearch": {
   "sepiasearch": {
     "all_locale": null,
     "all_locale": null,
@@ -3381,8 +3362,7 @@
       "zh_Hans": "zh",
       "zh_Hans": "zh",
       "zh_Hant": "zh"
       "zh_Hant": "zh"
     },
     },
-    "regions": {},
-    "supported_languages": {}
+    "regions": {}
   },
   },
   "startpage": {
   "startpage": {
     "all_locale": null,
     "all_locale": null,
@@ -3521,8 +3501,7 @@
       "zh-CN": "zh-CN_CN",
       "zh-CN": "zh-CN_CN",
       "zh-HK": "zh-TW_HK",
       "zh-HK": "zh-TW_HK",
       "zh-TW": "zh-TW_TW"
       "zh-TW": "zh-TW_TW"
-    },
-    "supported_languages": {}
+    }
   },
   },
   "wikidata": {
   "wikidata": {
     "all_locale": null,
     "all_locale": null,
@@ -3610,8 +3589,7 @@
       "zh": "zh",
       "zh": "zh",
       "zh_Hant": "zh-classical"
       "zh_Hant": "zh-classical"
     },
     },
-    "regions": {},
-    "supported_languages": {}
+    "regions": {}
   },
   },
   "wikipedia": {
   "wikipedia": {
     "all_locale": null,
     "all_locale": null,
@@ -3779,8 +3757,7 @@
       "zh_Hans": "zh",
       "zh_Hans": "zh",
       "zh_Hant": "zh-classical"
       "zh_Hant": "zh-classical"
     },
     },
-    "regions": {},
-    "supported_languages": {}
+    "regions": {}
   },
   },
   "yahoo": {
   "yahoo": {
     "all_locale": "any",
     "all_locale": "any",
@@ -3820,7 +3797,6 @@
       "zh_Hans": "zh_chs",
       "zh_Hans": "zh_chs",
       "zh_Hant": "zh_cht"
       "zh_Hant": "zh_cht"
     },
     },
-    "regions": {},
-    "supported_languages": {}
+    "regions": {}
   }
   }
 }
 }

+ 0 - 7
searx/enginelib/__init__.py

@@ -134,10 +134,3 @@ class Engine:  # pylint: disable=too-few-public-methods
           require_api_key: true
           require_api_key: true
           results: HTML
           results: HTML
     """
     """
-
-    # deprecated properties
-
-    _fetch_supported_languages: Callable  # deprecated use fetch_traits
-    supported_languages: Union[List[str], Dict[str, str]]  # deprecated use traits
-    language_aliases: Dict[str, str]  # deprecated not needed when using triats
-    supported_languages_url: str  # deprecated not needed when using triats

+ 3 - 140
searx/enginelib/traits.py

@@ -13,11 +13,9 @@ used.
 from __future__ import annotations
 from __future__ import annotations
 import json
 import json
 import dataclasses
 import dataclasses
-from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING
+from typing import Dict, Union, Callable, Optional, TYPE_CHECKING
 from typing_extensions import Literal, Self
 from typing_extensions import Literal, Self
 
 
-from babel.localedata import locale_identifiers
-
 from searx import locales
 from searx import locales
 from searx.data import data_dir, ENGINE_TRAITS
 from searx.data import data_dir, ENGINE_TRAITS
 
 
@@ -79,18 +77,8 @@ class EngineTraits:
     language").
     language").
     """
     """
 
 
-    data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1'
-    """Data type, default is 'traits_v1' for vintage use 'supported_languages'.
-
-    .. hint::
-
-       For the transition period until the *fetch* functions of all the engines
-       are converted there will be the data_type 'supported_languages', which
-       maps the old logic unchanged 1:1.
-
-       Instances of data_type 'supported_languages' do not implement methods
-       like ``self.get_language(..)`` and ``self.get_region(..)``
-
+    data_type: Literal['traits_v1'] = 'traits_v1'
+    """Data type, default is 'traits_v1'.
     """
     """
 
 
     custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
     custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
@@ -139,16 +127,6 @@ class EngineTraits:
         if self.data_type == 'traits_v1':
         if self.data_type == 'traits_v1':
             return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
             return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
 
 
-        if self.data_type == 'supported_languages':  # vintage / deprecated
-            # pylint: disable=import-outside-toplevel
-            from searx.utils import match_language
-
-            if searxng_locale == 'all':
-                return True
-            x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None)
-            return bool(x)
-
-            # return bool(self.get_supported_language(searxng_locale))
         raise TypeError('engine traits of type %s is unknown' % self.data_type)
         raise TypeError('engine traits of type %s is unknown' % self.data_type)
 
 
     def copy(self):
     def copy(self):
@@ -178,10 +156,6 @@ class EngineTraits:
 
 
         if self.data_type == 'traits_v1':
         if self.data_type == 'traits_v1':
             self._set_traits_v1(engine)
             self._set_traits_v1(engine)
-
-        elif self.data_type == 'supported_languages':  # vintage / deprecated
-            self._set_supported_languages(engine)
-
         else:
         else:
             raise TypeError('engine traits of type %s is unknown' % self.data_type)
             raise TypeError('engine traits of type %s is unknown' % self.data_type)
 
 
@@ -215,106 +189,6 @@ class EngineTraits:
         # set the copied & modified traits in engine's namespace
         # set the copied & modified traits in engine's namespace
         engine.traits = traits
         engine.traits = traits
 
 
-    # -------------------------------------------------------------------------
-    # The code below is deprecated an can hopefully be deleted at one day
-    # -------------------------------------------------------------------------
-
-    supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict)
-    """depricated: does not work for engines that do support languages based on a
-    region.  With this type it is not guaranteed that the key values can be
-    parsed by :py:obj:`babel.Locale.parse`!
-    """
-
-    # language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict)
-    # """depricated: does not work for engines that do support languages based on a
-    # region.  With this type it is not guaranteed that the key values can be
-    # parsed by :py:obj:`babel.Locale.parse`!
-    # """
-
-    BABEL_LANGS = [
-        lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
-        for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
-    ]
-
-    # def get_supported_language(self, searxng_locale, default=None):  # vintage / deprecated
-    #     """Return engine's language string that *best fits* to SearXNG's locale."""
-    #     if searxng_locale == 'all' and self.all_locale is not None:
-    #         return self.all_locale
-    #     return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default)
-
-    @classmethod  # vintage / deprecated
-    def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]:
-        """DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's
-        namespace to fetch languages from the origin engine.  If function does
-        not exists, ``None`` is returned.
-        """
-
-        # pylint: disable=import-outside-toplevel
-        from searx import network
-        from searx.utils import gen_useragent
-
-        fetch_languages = getattr(engine, '_fetch_supported_languages', None)
-        if fetch_languages is None:
-            return None
-
-        # The headers has been moved here from commit 9b6ffed06: Some engines (at
-        # least bing and startpage) return a different result list of supported
-        # languages depending on the IP location where the HTTP request comes from.
-        # The IP based results (from bing) can be avoided by setting a
-        # 'Accept-Language' in the HTTP request.
-
-        headers = {
-            'User-Agent': gen_useragent(),
-            'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
-        }
-        resp = network.get(engine.supported_languages_url, headers=headers)
-        supported_languages = fetch_languages(resp)
-        if isinstance(supported_languages, list):
-            supported_languages.sort()
-
-        engine_traits = cls()
-        engine_traits.data_type = 'supported_languages'
-        engine_traits.supported_languages = supported_languages
-        return engine_traits
-
-    def _set_supported_languages(self, engine: Engine):  # vintage / deprecated
-        traits = self.copy()
-
-        # pylint: disable=import-outside-toplevel
-        from searx.utils import match_language
-
-        _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
-
-        if hasattr(engine, 'language'):
-            if engine.language not in self.supported_languages:
-                raise ValueError(_msg % (engine.name, 'language', engine.language))
-
-            if isinstance(self.supported_languages, dict):
-                traits.supported_languages = {engine.language: self.supported_languages[engine.language]}
-            else:
-                traits.supported_languages = [engine.language]
-
-        engine.language_support = bool(traits.supported_languages)
-        engine.supported_languages = traits.supported_languages
-
-        # find custom aliases for non standard language codes
-        traits.language_aliases = {}  # pylint: disable=attribute-defined-outside-init
-
-        for engine_lang in getattr(engine, 'language_aliases', {}):
-            iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None)
-            if (
-                iso_lang
-                and iso_lang != engine_lang
-                and not engine_lang.startswith(iso_lang)
-                and iso_lang not in self.supported_languages
-            ):
-                traits.language_aliases[iso_lang] = engine_lang
-
-        engine.language_aliases = traits.language_aliases
-
-        # set the copied & modified traits in engine's namespace
-        engine.traits = traits
-
 
 
 class EngineTraitsMap(Dict[str, EngineTraits]):
 class EngineTraitsMap(Dict[str, EngineTraits]):
     """A python dictionary to map :class:`EngineTraits` by engine name."""
     """A python dictionary to map :class:`EngineTraits` by engine name."""
@@ -352,17 +226,6 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
                 log("%-20s: SearXNG regions   --> %s" % (engine_name, len(traits.regions)))
                 log("%-20s: SearXNG regions   --> %s" % (engine_name, len(traits.regions)))
                 obj[engine_name] = traits
                 obj[engine_name] = traits
 
 
-            # vintage / deprecated
-            _traits = EngineTraits.fetch_supported_languages(engine)
-            if _traits is not None:
-                log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages)))
-                if traits is not None:
-                    traits.supported_languages = _traits.supported_languages
-                    obj[engine_name] = traits
-                else:
-                    obj[engine_name] = _traits
-                continue
-
         return obj
         return obj
 
 
     def set_traits(self, engine: Engine):
     def set_traits(self, engine: Engine):

+ 0 - 2
searx/engines/__init__.py

@@ -43,8 +43,6 @@ ENGINE_DEFAULT_ARGS = {
     "send_accept_language_header": False,
     "send_accept_language_header": False,
     "tokens": [],
     "tokens": [],
     "about": {},
     "about": {},
-    "supported_languages": [],  # deprecated use traits
-    "language_aliases": {},  # deprecated not needed when using traits
 }
 }
 # set automatically when an engine does not have any tab category
 # set automatically when an engine does not have any tab category
 OTHER_CATEGORY = 'other'
 OTHER_CATEGORY = 'other'

+ 3 - 3
searx/engines/gentoo.py

@@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org'
 # xpath queries
 # xpath queries
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
+xpath_content = './/div[@class="searchresult"]'
 
 
 
 
 # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
 # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
@@ -77,8 +78,6 @@ main_langs = {
     'uk': 'Українська',
     'uk': 'Українська',
     'zh': '简体中文',
     'zh': '简体中文',
 }
 }
-supported_languages = dict(lang_urls, **main_langs)
-
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
@@ -118,7 +117,8 @@ def response(resp):
         link = result.xpath(xpath_link)[0]
         link = result.xpath(xpath_link)[0]
         href = urljoin(base_url, link.attrib.get('href'))
         href = urljoin(base_url, link.attrib.get('href'))
         title = extract_text(link)
         title = extract_text(link)
+        content = extract_text(result.xpath(xpath_content))
 
 
-        results.append({'url': href, 'title': title})
+        results.append({'url': href, 'title': title, 'content': content})
 
 
     return results
     return results

+ 1 - 1
searx/search/processors/online.py

@@ -221,7 +221,7 @@ class OnlineProcessor(EngineProcessor):
                 'test': ['unique_results'],
                 'test': ['unique_results'],
             }
             }
 
 
-        if getattr(self.engine, 'supported_languages', []):
+        if getattr(self.engine, 'traits', False):
             tests['lang_fr'] = {
             tests['lang_fr'] = {
                 'matrix': {'query': 'paris', 'lang': 'fr'},
                 'matrix': {'query': 'paris', 'lang': 'fr'},
                 'result_container': ['not_empty', ('has_language', 'fr')],
                 'result_container': ['not_empty', ('has_language', 'fr')],

+ 0 - 5
searx/webapp.py

@@ -1317,11 +1317,6 @@ def config():
             continue
             continue
 
 
         _languages = engine.traits.languages.keys()
         _languages = engine.traits.languages.keys()
-        if engine.traits.data_type == 'supported_languages':  # vintage / deprecated
-            _languages = engine.traits.supported_languages
-            if isinstance(_languages, dict):
-                _languages = _languages.keys()
-
         _engines.append(
         _engines.append(
             {
             {
                 'name': name,
                 'name': name,