traits.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Engine's traits are fetched from the origin engines and stored in a JSON file
  3. in the *data folder*. Most often traits are languages and region codes and
  4. their mapping from SearXNG's representation to the representation in the origin
  5. search engine. For new traits new properties can be added to the class
  6. :py:class:`EngineTraits`.
  7. To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
  8. used.
  9. """
  10. from __future__ import annotations
  11. import os
  12. import json
  13. import dataclasses
  14. import types
  15. from typing import Dict, Literal, Iterable, Union, Callable, Optional, TYPE_CHECKING
  16. from searx import locales
  17. from searx.data import data_dir, ENGINE_TRAITS
  18. if TYPE_CHECKING:
  19. from . import Engine
  20. class EngineTraitsEncoder(json.JSONEncoder):
  21. """Encodes :class:`EngineTraits` to a serializable object, see
  22. :class:`json.JSONEncoder`."""
  23. def default(self, o):
  24. """Return dictionary of a :class:`EngineTraits` object."""
  25. if isinstance(o, EngineTraits):
  26. return o.__dict__
  27. return super().default(o)
  28. @dataclasses.dataclass
  29. class EngineTraits:
  30. """The class is intended to be instantiated for each engine."""
  31. regions: Dict[str, str] = dataclasses.field(default_factory=dict)
  32. """Maps SearXNG's internal representation of a region to the one of the engine.
  33. SearXNG's internal representation can be parsed by babel and the value is
  34. send to the engine:
  35. .. code:: python
  36. regions ={
  37. 'fr-BE' : <engine's region name>,
  38. }
  39. for key, egnine_region regions.items():
  40. searxng_region = babel.Locale.parse(key, sep='-')
  41. ...
  42. """
  43. languages: Dict[str, str] = dataclasses.field(default_factory=dict)
  44. """Maps SearXNG's internal representation of a language to the one of the engine.
  45. SearXNG's internal representation can be parsed by babel and the value is
  46. send to the engine:
  47. .. code:: python
  48. languages = {
  49. 'ca' : <engine's language name>,
  50. }
  51. for key, egnine_lang in languages.items():
  52. searxng_lang = babel.Locale.parse(key)
  53. ...
  54. """
  55. all_locale: Optional[str] = None
  56. """To which locale value SearXNG's ``all`` language is mapped (shown a "Default
  57. language").
  58. """
  59. data_type: Literal['traits_v1'] = 'traits_v1'
  60. """Data type, default is 'traits_v1'.
  61. """
  62. custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict)
  63. """A place to store engine's custom traits, not related to the SearXNG core.
  64. """
  65. def get_language(self, searxng_locale: str, default=None):
  66. """Return engine's language string that *best fits* to SearXNG's locale.
  67. :param searxng_locale: SearXNG's internal representation of locale
  68. selected by the user.
  69. :param default: engine's default language
  70. The *best fits* rules are implemented in
  71. :py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
  72. which is determined from :py:obj:`EngineTraits.all_locale`.
  73. """
  74. if searxng_locale == 'all' and self.all_locale is not None:
  75. return self.all_locale
  76. return locales.get_engine_locale(searxng_locale, self.languages, default=default)
  77. def get_region(self, searxng_locale: str, default=None):
  78. """Return engine's region string that best fits to SearXNG's locale.
  79. :param searxng_locale: SearXNG's internal representation of locale
  80. selected by the user.
  81. :param default: engine's default region
  82. The *best fits* rules are implemented in
  83. :py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
  84. which is determined from :py:obj:`EngineTraits.all_locale`.
  85. """
  86. if searxng_locale == 'all' and self.all_locale is not None:
  87. return self.all_locale
  88. return locales.get_engine_locale(searxng_locale, self.regions, default=default)
  89. def is_locale_supported(self, searxng_locale: str) -> bool:
  90. """A *locale* (SearXNG's internal representation) is considered to be
  91. supported by the engine if the *region* or the *language* is supported
  92. by the engine.
  93. For verification the functions :py:func:`EngineTraits.get_region` and
  94. :py:func:`EngineTraits.get_language` are used.
  95. """
  96. if self.data_type == 'traits_v1':
  97. return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
  98. raise TypeError('engine traits of type %s is unknown' % self.data_type)
  99. def copy(self):
  100. """Create a copy of the dataclass object."""
  101. return EngineTraits(**dataclasses.asdict(self))
  102. @classmethod
  103. def fetch_traits(cls, engine: Engine) -> Union['EngineTraits', None]:
  104. """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
  105. and set properties from the origin engine in the object ``engine_traits``. If
  106. function does not exists, ``None`` is returned.
  107. """
  108. fetch_traits = getattr(engine, 'fetch_traits', None)
  109. engine_traits = None
  110. if fetch_traits:
  111. engine_traits = cls()
  112. fetch_traits(engine_traits)
  113. return engine_traits
  114. def set_traits(self, engine: Engine):
  115. """Set traits from self object in a :py:obj:`.Engine` namespace.
  116. :param engine: engine instance build by :py:func:`searx.engines.load_engine`
  117. """
  118. if self.data_type == 'traits_v1':
  119. self._set_traits_v1(engine)
  120. else:
  121. raise TypeError('engine traits of type %s is unknown' % self.data_type)
  122. def _set_traits_v1(self, engine: Engine):
  123. # For an engine, when there is `language: ...` in the YAML settings the engine
  124. # does support only this one language (region)::
  125. #
  126. # - name: google italian
  127. # engine: google
  128. # language: it
  129. # region: it-IT # type: ignore
  130. traits = self.copy()
  131. _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
  132. languages = traits.languages
  133. if hasattr(engine, 'language'):
  134. if engine.language not in languages:
  135. raise ValueError(_msg % (engine.name, 'language', engine.language))
  136. traits.languages = {engine.language: languages[engine.language]}
  137. regions = traits.regions
  138. if hasattr(engine, 'region'):
  139. if engine.region not in regions:
  140. raise ValueError(_msg % (engine.name, 'region', engine.region))
  141. traits.regions = {engine.region: regions[engine.region]}
  142. engine.language_support = bool(traits.languages or traits.regions)
  143. # set the copied & modified traits in engine's namespace
  144. engine.traits = traits
  145. class EngineTraitsMap(Dict[str, EngineTraits]):
  146. """A python dictionary to map :class:`EngineTraits` by engine name."""
  147. ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
  148. """File with persistence of the :py:obj:`EngineTraitsMap`."""
  149. def save_data(self):
  150. """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
  151. with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
  152. json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
  153. @classmethod
  154. def from_data(cls) -> 'EngineTraitsMap':
  155. """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
  156. obj = cls()
  157. for k, v in ENGINE_TRAITS.items():
  158. obj[k] = EngineTraits(**v)
  159. return obj
  160. @classmethod
  161. def fetch_traits(cls, log: Callable) -> 'EngineTraitsMap':
  162. from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
  163. names = list(engines.engines)
  164. names.sort()
  165. obj = cls()
  166. for engine_name in names:
  167. engine = engines.engines[engine_name]
  168. traits = None
  169. # pylint: disable=broad-exception-caught
  170. try:
  171. traits = EngineTraits.fetch_traits(engine)
  172. except Exception as exc:
  173. log("FATAL: while fetch_traits %s: %s" % (engine_name, exc))
  174. if os.environ.get('FORCE', '').lower() not in ['on', 'true', '1']:
  175. raise
  176. v = ENGINE_TRAITS.get(engine_name)
  177. if v:
  178. log("FORCE: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name)
  179. traits = EngineTraits(**v)
  180. if traits is not None:
  181. log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
  182. log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
  183. obj[engine_name] = traits
  184. return obj
  185. def set_traits(self, engine: Engine | types.ModuleType):
  186. """Set traits in a :py:obj:`Engine` namespace.
  187. :param engine: engine instance build by :py:func:`searx.engines.load_engine`
  188. """
  189. engine_traits = EngineTraits(data_type='traits_v1')
  190. if engine.name in self.keys():
  191. engine_traits = self[engine.name]
  192. elif engine.engine in self.keys():
  193. # The key of the dictionary traits_map is the *engine name*
  194. # configured in settings.xml. When multiple engines are configured
  195. # in settings.yml to use the same origin engine (python module)
  196. # these additional engines can use the languages from the origin
  197. # engine. For this use the configured ``engine: ...`` from
  198. # settings.yml
  199. engine_traits = self[engine.engine]
  200. engine_traits.set_traits(engine)