traits.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Engine's traits are fetched from the origin engines and stored in a JSON file
  4. in the *data folder*. Most often traits are languages and region codes and
  5. their mapping from SearXNG's representation to the representation in the origin
  6. search engine. For new traits new properties can be added to the class
  7. :py:class:`EngineTraits`.
  8. To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
  9. used.
  10. """
  11. from __future__ import annotations
  12. import json
  13. import dataclasses
  14. from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING
  15. from typing_extensions import Literal, Self
  16. from babel.localedata import locale_identifiers
  17. from searx import locales
  18. from searx.data import data_dir, ENGINE_TRAITS
  19. if TYPE_CHECKING:
  20. from . import Engine
  21. class EngineTraitsEncoder(json.JSONEncoder):
  22. """Encodes :class:`EngineTraits` to a serializable object, see
  23. :class:`json.JSONEncoder`."""
  24. def default(self, o):
  25. """Return dictionary of a :class:`EngineTraits` object."""
  26. if isinstance(o, EngineTraits):
  27. return o.__dict__
  28. return super().default(o)
  29. @dataclasses.dataclass
  30. class EngineTraits:
  31. """The class is intended to be instantiated for each engine."""
  32. regions: Dict[str, str] = dataclasses.field(default_factory=dict)
  33. """Maps SearXNG's internal representation of a region to the one of the engine.
  34. SearXNG's internal representation can be parsed by babel and the value is
  35. send to the engine:
  36. .. code:: python
  37. regions ={
  38. 'fr-BE' : <engine's region name>,
  39. }
  40. for key, egnine_region regions.items():
  41. searxng_region = babel.Locale.parse(key, sep='-')
  42. ...
  43. """
  44. languages: Dict[str, str] = dataclasses.field(default_factory=dict)
  45. """Maps SearXNG's internal representation of a language to the one of the engine.
  46. SearXNG's internal representation can be parsed by babel and the value is
  47. send to the engine:
  48. .. code:: python
  49. languages = {
  50. 'ca' : <engine's language name>,
  51. }
  52. for key, egnine_lang in languages.items():
  53. searxng_lang = babel.Locale.parse(key)
  54. ...
  55. """
  56. all_locale: Optional[str] = None
  57. """To which locale value SearXNG's ``all`` language is mapped (shown a "Default
  58. language").
  59. """
  60. data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1'
  61. """Data type, default is 'traits_v1' for vintage use 'supported_languages'.
  62. .. hint::
  63. For the transition period until the *fetch* functions of all the engines
  64. are converted there will be the data_type 'supported_languages', which
  65. maps the old logic unchanged 1:1.
  66. Instances of data_type 'supported_languages' do not implement methods
  67. like ``self.get_language(..)`` and ``self.get_region(..)``
  68. """
  69. custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
  70. """A place to store engine's custom traits, not related to the SearXNG core
  71. """
  72. def get_language(self, searxng_locale: str, default=None):
  73. """Return engine's language string that *best fits* to SearXNG's locale.
  74. :param searxng_locale: SearXNG's internal representation of locale
  75. selected by the user.
  76. :param default: engine's default language
  77. The *best fits* rules are implemented in
  78. :py:obj:`locales.get_engine_locale`. Except for the special value ``all``
  79. which is determined from :py:obj`EngineTraits.all_language`.
  80. """
  81. if searxng_locale == 'all' and self.all_locale is not None:
  82. return self.all_locale
  83. return locales.get_engine_locale(searxng_locale, self.languages, default=default)
  84. def get_region(self, searxng_locale: str, default=None):
  85. """Return engine's region string that best fits to SearXNG's locale.
  86. :param searxng_locale: SearXNG's internal representation of locale
  87. selected by the user.
  88. :param default: engine's default region
  89. The *best fits* rules are implemented in
  90. :py:obj:`locales.get_engine_locale`. Except for the special value ``all``
  91. which is determined from :py:obj`EngineTraits.all_language`.
  92. """
  93. if searxng_locale == 'all' and self.all_locale is not None:
  94. return self.all_locale
  95. return locales.get_engine_locale(searxng_locale, self.regions, default=default)
  96. def is_locale_supported(self, searxng_locale: str) -> bool:
  97. """A *locale* (SearXNG's internal representation) is considered to be supported
  98. by the engine if the *region* or the *language* is supported by the
  99. engine. For verification the functions :py:func:`self.get_region` and
  100. :py:func:`self.get_region` are used.
  101. """
  102. if self.data_type == 'traits_v1':
  103. return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
  104. if self.data_type == 'supported_languages': # vintage / deprecated
  105. # pylint: disable=import-outside-toplevel
  106. from searx.utils import match_language
  107. if searxng_locale == 'all':
  108. return True
  109. x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None)
  110. return bool(x)
  111. # return bool(self.get_supported_language(searxng_locale))
  112. raise TypeError('engine traits of type %s is unknown' % self.data_type)
  113. def copy(self):
  114. """Create a copy of the dataclass object."""
  115. return EngineTraits(**dataclasses.asdict(self))
  116. @classmethod
  117. def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
  118. """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
  119. and set properties from the origin engine in the object ``engine_traits``. If
  120. function does not exists, ``None`` is returned.
  121. """
  122. fetch_traits = getattr(engine, 'fetch_traits', None)
  123. engine_traits = None
  124. if fetch_traits:
  125. engine_traits = cls()
  126. fetch_traits(engine_traits)
  127. return engine_traits
  128. def set_traits(self, engine: Engine):
  129. """Set traits from self object in a :py:obj:`.Engine` namespace.
  130. :param engine: engine instance build by :py:func:`searx.engines.load_engine`
  131. """
  132. if self.data_type == 'traits_v1':
  133. self._set_traits_v1(engine)
  134. elif self.data_type == 'supported_languages': # vintage / deprecated
  135. self._set_supported_languages(engine)
  136. else:
  137. raise TypeError('engine traits of type %s is unknown' % self.data_type)
  138. def _set_traits_v1(self, engine: Engine):
  139. # For an engine, when there is `language: ...` in the YAML settings the engine
  140. # does support only this one language (region)::
  141. #
  142. # - name: google italian
  143. # engine: google
  144. # language: it
  145. # region: it-IT
  146. traits = self.copy()
  147. _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
  148. languages = traits.languages
  149. if hasattr(engine, 'language'):
  150. if engine.language not in languages:
  151. raise ValueError(_msg % (engine.name, 'language', engine.language))
  152. traits.languages = {engine.language: languages[engine.language]}
  153. regions = traits.regions
  154. if hasattr(engine, 'region'):
  155. if engine.region not in regions:
  156. raise ValueError(_msg % (engine.name, 'region', engine.region))
  157. traits.regions = {engine.region: regions[engine.region]}
  158. engine.language_support = bool(traits.languages or traits.regions)
  159. # set the copied & modified traits in engine's namespace
  160. engine.traits = traits
  161. # -------------------------------------------------------------------------
  162. # The code below is deprecated an can hopefully be deleted at one day
  163. # -------------------------------------------------------------------------
  164. supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict)
  165. """depricated: does not work for engines that do support languages based on a
  166. region. With this type it is not guaranteed that the key values can be
  167. parsed by :py:obj:`babel.Locale.parse`!
  168. """
  169. # language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict)
  170. # """depricated: does not work for engines that do support languages based on a
  171. # region. With this type it is not guaranteed that the key values can be
  172. # parsed by :py:obj:`babel.Locale.parse`!
  173. # """
  174. BABEL_LANGS = [
  175. lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
  176. for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
  177. ]
  178. # def get_supported_language(self, searxng_locale, default=None): # vintage / deprecated
  179. # """Return engine's language string that *best fits* to SearXNG's locale."""
  180. # if searxng_locale == 'all' and self.all_locale is not None:
  181. # return self.all_locale
  182. # return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default)
  183. @classmethod # vintage / deprecated
  184. def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]:
  185. """DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's
  186. namespace to fetch languages from the origin engine. If function does
  187. not exists, ``None`` is returned.
  188. """
  189. # pylint: disable=import-outside-toplevel
  190. from searx import network
  191. from searx.utils import gen_useragent
  192. fetch_languages = getattr(engine, '_fetch_supported_languages', None)
  193. if fetch_languages is None:
  194. return None
  195. # The headers has been moved here from commit 9b6ffed06: Some engines (at
  196. # least bing and startpage) return a different result list of supported
  197. # languages depending on the IP location where the HTTP request comes from.
  198. # The IP based results (from bing) can be avoided by setting a
  199. # 'Accept-Language' in the HTTP request.
  200. headers = {
  201. 'User-Agent': gen_useragent(),
  202. 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
  203. }
  204. resp = network.get(engine.supported_languages_url, headers=headers)
  205. supported_languages = fetch_languages(resp)
  206. if isinstance(supported_languages, list):
  207. supported_languages.sort()
  208. engine_traits = cls()
  209. engine_traits.data_type = 'supported_languages'
  210. engine_traits.supported_languages = supported_languages
  211. return engine_traits
  212. def _set_supported_languages(self, engine: Engine): # vintage / deprecated
  213. traits = self.copy()
  214. # pylint: disable=import-outside-toplevel
  215. from searx.utils import match_language
  216. _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
  217. if hasattr(engine, 'language'):
  218. if engine.language not in self.supported_languages:
  219. raise ValueError(_msg % (engine.name, 'language', engine.language))
  220. if isinstance(self.supported_languages, dict):
  221. traits.supported_languages = {engine.language: self.supported_languages[engine.language]}
  222. else:
  223. traits.supported_languages = [engine.language]
  224. engine.language_support = bool(traits.supported_languages)
  225. engine.supported_languages = traits.supported_languages
  226. # find custom aliases for non standard language codes
  227. traits.language_aliases = {} # pylint: disable=attribute-defined-outside-init
  228. for engine_lang in getattr(engine, 'language_aliases', {}):
  229. iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None)
  230. if (
  231. iso_lang
  232. and iso_lang != engine_lang
  233. and not engine_lang.startswith(iso_lang)
  234. and iso_lang not in self.supported_languages
  235. ):
  236. traits.language_aliases[iso_lang] = engine_lang
  237. engine.language_aliases = traits.language_aliases
  238. # set the copied & modified traits in engine's namespace
  239. engine.traits = traits
  240. class EngineTraitsMap(Dict[str, EngineTraits]):
  241. """A python dictionary to map :class:`EngineTraits` by engine name."""
  242. ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
  243. """File with persistence of the :py:obj:`EngineTraitsMap`."""
  244. def save_data(self):
  245. """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
  246. with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
  247. json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
  248. @classmethod
  249. def from_data(cls) -> Self:
  250. """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
  251. obj = cls()
  252. for k, v in ENGINE_TRAITS.items():
  253. obj[k] = EngineTraits(**v)
  254. return obj
  255. @classmethod
  256. def fetch_traits(cls, log: Callable) -> Self:
  257. from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
  258. names = list(engines.engines)
  259. names.sort()
  260. obj = cls()
  261. for engine_name in names:
  262. engine = engines.engines[engine_name]
  263. traits = EngineTraits.fetch_traits(engine)
  264. if traits is not None:
  265. log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
  266. log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
  267. obj[engine_name] = traits
  268. # vintage / deprecated
  269. _traits = EngineTraits.fetch_supported_languages(engine)
  270. if _traits is not None:
  271. log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages)))
  272. if traits is not None:
  273. traits.supported_languages = _traits.supported_languages
  274. obj[engine_name] = traits
  275. else:
  276. obj[engine_name] = _traits
  277. continue
  278. return obj
  279. def set_traits(self, engine: Engine):
  280. """Set traits in a :py:obj:`Engine` namespace.
  281. :param engine: engine instance build by :py:func:`searx.engines.load_engine`
  282. """
  283. engine_traits = EngineTraits(data_type='traits_v1')
  284. if engine.name in self.keys():
  285. engine_traits = self[engine.name]
  286. elif engine.engine in self.keys():
  287. # The key of the dictionary traits_map is the *engine name*
  288. # configured in settings.xml. When multiple engines are configured
  289. # in settings.yml to use the same origin engine (python module)
  290. # these additional engines can use the languages from the origin
  291. # engine. For this use the configured ``engine: ...`` from
  292. # settings.yml
  293. engine_traits = self[engine.engine]
  294. engine_traits.set_traits(engine)