__init__.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Implementations of the framework for the SearXNG engines.
  3. - :py:obj:`searx.enginelib.EngineCache`
  4. - :py:obj:`searx.enginelib.Engine`
  5. - :py:obj:`searx.enginelib.traits`
  6. There is a command line for developer purposes and for deeper analysis. Here is
  7. an example in which the command line is called in the development environment::
  8. $ ./manage pyenv.cmd bash --norc --noprofile
  9. (py3) python -m searx.enginelib --help
  10. .. hint::
  11. The long term goal is to modularize all implementations of the engine
  12. framework here in this Python package. ToDo:
  13. - move implementations of the :ref:`searx.engines loader` to a new module in
  14. the :py:obj:`searx.enginelib` namespace.
  15. -----
  16. """
  17. from __future__ import annotations
  18. __all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
  19. from typing import List, Callable, TYPE_CHECKING, Any
  20. import string
  21. import typer
  22. from ..cache import ExpireCache, ExpireCacheCfg
  23. if TYPE_CHECKING:
  24. from searx.enginelib import traits
  25. ENGINES_CACHE = ExpireCache.build_cache(
  26. ExpireCacheCfg(
  27. name="ENGINES_CACHE",
  28. MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
  29. MAINTENANCE_PERIOD=60 * 60, # 2h
  30. )
  31. )
  32. """Global :py:obj:`searx.cache.ExpireCacheSQLite` instance where the cached
  33. values from all engines are stored. The `MAXHOLD_TIME` is 7 days and the
  34. `MAINTENANCE_PERIOD` is set to two hours."""
  35. app = typer.Typer()
  36. @app.command()
  37. def state():
  38. """Show state for the caches of the engines."""
  39. title = "cache tables and key/values"
  40. print(title)
  41. print("=" * len(title))
  42. print(ENGINES_CACHE.state().report())
  43. print()
  44. title = f"properties of {ENGINES_CACHE.cfg.name}"
  45. print(title)
  46. print("=" * len(title))
  47. print(str(ENGINES_CACHE.properties)) # type: ignore
  48. @app.command()
  49. def maintenance(force: bool = True):
  50. """Carry out maintenance on cache of the engines."""
  51. ENGINES_CACHE.maintenance(force=force)
  52. class EngineCache:
  53. """Persistent (SQLite) key/value cache that deletes its values again after
  54. ``expire`` seconds (default/max: :py:obj:`MAXHOLD_TIME
  55. <searx.cache.ExpireCacheCfg.MAXHOLD_TIME>`). This class is a wrapper around
  56. :py:obj:`ENGINES_CACHE` (:py:obj:`ExpireCacheSQLite
  57. <searx.cache.ExpireCacheSQLite>`).
  58. In the :origin:`searx/engines/demo_offline.py` engine you can find an
  59. exemplary implementation of such a cache other exaples are implemeted
  60. in:
  61. - :origin:`searx/engines/radio_browser.py`
  62. - :origin:`searx/engines/soundcloud.py`
  63. - :origin:`searx/engines/startpage.py`
  64. .. code: python
  65. from searx.enginelib import EngineCache
  66. CACHE: EngineCache
  67. def init(engine_settings):
  68. global CACHE
  69. CACHE = EngineCache(engine_settings["name"])
  70. def request(query, params):
  71. token = CACHE.get(key="token")
  72. if token is None:
  73. token = get_token()
  74. # cache token of this engine for 1h
  75. CACHE.set(key="token", value=token, expire=3600)
  76. ...
  77. For introspection of the DB, jump into developer environment and run command to
  78. show cache state::
  79. $ ./manage pyenv.cmd bash --norc --noprofile
  80. (py3) python -m searx.enginelib cache state
  81. cache tables and key/values
  82. ===========================
  83. [demo_offline ] 2025-04-22 11:32:50 count --> (int) 4
  84. [startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20
  85. [duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325
  86. [duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451
  87. [radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...]
  88. [soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp
  89. [wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7
  90. number of tables: 6
  91. number of key/value pairs: 7
  92. In the "cache tables and key/values" section, the table name (engine name) is at
  93. first position on the second there is the calculated expire date and on the
  94. third and fourth position the key/value is shown.
  95. About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore
  96. the key is a hash value of the query term (to not to store the raw query term).
  97. In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl /
  98. ExpireCache and their last modification date are shown::
  99. properties of ENGINES_CACHE
  100. ===========================
  101. [last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1
  102. [last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE :
  103. [last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe...
  104. [last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline
  105. [last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage
  106. [last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo
  107. [last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser
  108. [last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud
  109. [last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha
  110. These properties provide information about the state of the ExpireCache and
  111. control the behavior. For example, the maintenance intervals are controlled by
  112. the last modification date of the LAST_MAINTENANCE property and the hash value
  113. of the password can be used to detect whether the password has been changed (in
  114. this case the DB entries can no longer be decrypted and the entire cache must be
  115. discarded).
  116. """
  117. def __init__(self, engine_name: str, expire: int | None = None):
  118. self.expire = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
  119. _valid = "-_." + string.ascii_letters + string.digits
  120. self.table_name = "".join([c if c in _valid else "_" for c in engine_name])
  121. def set(self, key: str, value: Any, expire: int | None = None) -> bool:
  122. return ENGINES_CACHE.set(
  123. key=key,
  124. value=value,
  125. expire=expire or self.expire,
  126. table=self.table_name,
  127. )
  128. def get(self, key: str, default=None) -> Any:
  129. return ENGINES_CACHE.get(key, default=default, table=self.table_name)
  130. def secret_hash(self, name: str | bytes) -> str:
  131. return ENGINES_CACHE.secret_hash(name=name)
  132. class Engine: # pylint: disable=too-few-public-methods
  133. """Class of engine instances build from YAML settings.
  134. Further documentation see :ref:`general engine configuration`.
  135. .. hint::
  136. This class is currently never initialized and only used for type hinting.
  137. """
  138. # Common options in the engine module
  139. engine_type: str
  140. """Type of the engine (:ref:`searx.search.processors`)"""
  141. paging: bool
  142. """Engine supports multiple pages."""
  143. time_range_support: bool
  144. """Engine supports search time range."""
  145. safesearch: bool
  146. """Engine supports SafeSearch"""
  147. language_support: bool
  148. """Engine supports languages (locales) search."""
  149. language: str
  150. """For an engine, when there is ``language: ...`` in the YAML settings the engine
  151. does support only this one language:
  152. .. code:: yaml
  153. - name: google french
  154. engine: google
  155. language: fr
  156. """
  157. region: str
  158. """For an engine, when there is ``region: ...`` in the YAML settings the engine
  159. does support only this one region::
  160. .. code:: yaml
  161. - name: google belgium
  162. engine: google
  163. region: fr-BE
  164. """
  165. fetch_traits: Callable
  166. """Function to to fetch engine's traits from origin."""
  167. traits: traits.EngineTraits
  168. """Traits of the engine."""
  169. # settings.yml
  170. categories: List[str]
  171. """Specifies to which :ref:`engine categories` the engine should be added."""
  172. name: str
  173. """Name that will be used across SearXNG to define this engine. In settings, on
  174. the result page .."""
  175. engine: str
  176. """Name of the python file used to handle requests and responses to and from
  177. this search engine (file name from :origin:`searx/engines` without
  178. ``.py``)."""
  179. enable_http: bool
  180. """Enable HTTP (by default only HTTPS is enabled)."""
  181. shortcut: str
  182. """Code used to execute bang requests (``!foo``)"""
  183. timeout: float
  184. """Specific timeout for search-engine."""
  185. display_error_messages: bool
  186. """Display error messages on the web UI."""
  187. proxies: dict
  188. """Set proxies for a specific engine (YAML):
  189. .. code:: yaml
  190. proxies :
  191. http: socks5://proxy:port
  192. https: socks5://proxy:port
  193. """
  194. disabled: bool
  195. """To disable by default the engine, but not deleting it. It will allow the
  196. user to manually activate it in the settings."""
  197. inactive: bool
  198. """Remove the engine from the settings (*disabled & removed*)."""
  199. about: dict
  200. """Additional fields describing the engine.
  201. .. code:: yaml
  202. about:
  203. website: https://example.com
  204. wikidata_id: Q306656
  205. official_api_documentation: https://example.com/api-doc
  206. use_official_api: true
  207. require_api_key: true
  208. results: HTML
  209. """
  210. using_tor_proxy: bool
  211. """Using tor proxy (``true``) or not (``false``) for this engine."""
  212. send_accept_language_header: bool
  213. """When this option is activated, the language (locale) that is selected by
  214. the user is used to build and send a ``Accept-Language`` header in the
  215. request to the origin search engine."""
  216. tokens: List[str]
  217. """A list of secret tokens to make this engine *private*, more details see
  218. :ref:`private engines`."""
  219. weight: int
  220. """Weighting of the results of this engine (:ref:`weight <settings engines>`)."""