123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """Implementations of the framework for the SearXNG engines.
- - :py:obj:`searx.enginelib.EngineCache`
- - :py:obj:`searx.enginelib.Engine`
- - :py:obj:`searx.enginelib.traits`
- There is a command line for developer purposes and for deeper analysis. Here is
- an example in which the command line is called in the development environment::
- $ ./manage pyenv.cmd bash --norc --noprofile
- (py3) python -m searx.enginelib --help
- .. hint::
- The long term goal is to modularize all implementations of the engine
- framework here in this Python package. ToDo:
- - move implementations of the :ref:`searx.engines loader` to a new module in
- the :py:obj:`searx.enginelib` namespace.
- -----
- """
- from __future__ import annotations
- __all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
- from typing import List, Callable, TYPE_CHECKING, Any
- import string
- import typer
- from ..cache import ExpireCache, ExpireCacheCfg
- if TYPE_CHECKING:
- from searx.enginelib import traits
- ENGINES_CACHE = ExpireCache.build_cache(
- ExpireCacheCfg(
- name="ENGINES_CACHE",
- MAXHOLD_TIME=60 * 60 * 24 * 7, # 7 days
- MAINTENANCE_PERIOD=60 * 60, # 2h
- )
- )
- """Global :py:obj:`searx.cache.ExpireCacheSQLite` instance where the cached
- values from all engines are stored. The `MAXHOLD_TIME` is 7 days and the
- `MAINTENANCE_PERIOD` is set to two hours."""
- app = typer.Typer()
- @app.command()
- def state():
- """Show state for the caches of the engines."""
- title = "cache tables and key/values"
- print(title)
- print("=" * len(title))
- print(ENGINES_CACHE.state().report())
- print()
- title = f"properties of {ENGINES_CACHE.cfg.name}"
- print(title)
- print("=" * len(title))
- print(str(ENGINES_CACHE.properties)) # type: ignore
- @app.command()
- def maintenance(force: bool = True):
- """Carry out maintenance on cache of the engines."""
- ENGINES_CACHE.maintenance(force=force)
- class EngineCache:
- """Persistent (SQLite) key/value cache that deletes its values again after
- ``expire`` seconds (default/max: :py:obj:`MAXHOLD_TIME
- <searx.cache.ExpireCacheCfg.MAXHOLD_TIME>`). This class is a wrapper around
- :py:obj:`ENGINES_CACHE` (:py:obj:`ExpireCacheSQLite
- <searx.cache.ExpireCacheSQLite>`).
- In the :origin:`searx/engines/demo_offline.py` engine you can find an
- exemplary implementation of such a cache other exaples are implemeted
- in:
- - :origin:`searx/engines/radio_browser.py`
- - :origin:`searx/engines/soundcloud.py`
- - :origin:`searx/engines/startpage.py`
- .. code: python
- from searx.enginelib import EngineCache
- CACHE: EngineCache
- def init(engine_settings):
- global CACHE
- CACHE = EngineCache(engine_settings["name"])
- def request(query, params):
- token = CACHE.get(key="token")
- if token is None:
- token = get_token()
- # cache token of this engine for 1h
- CACHE.set(key="token", value=token, expire=3600)
- ...
- For introspection of the DB, jump into developer environment and run command to
- show cache state::
- $ ./manage pyenv.cmd bash --norc --noprofile
- (py3) python -m searx.enginelib cache state
- cache tables and key/values
- ===========================
- [demo_offline ] 2025-04-22 11:32:50 count --> (int) 4
- [startpage ] 2025-04-22 12:32:30 SC_CODE --> (str) fSOBnhEMlDfE20
- [duckduckgo ] 2025-04-22 12:32:31 4dff493e.... --> (str) 4-128634958369380006627592672385352473325
- [duckduckgo ] 2025-04-22 12:40:06 3e2583e2.... --> (str) 4-263126175288871260472289814259666848451
- [radio_browser ] 2025-04-23 11:33:08 servers --> (list) ['https://de2.api.radio-browser.info', ...]
- [soundcloud ] 2025-04-29 11:40:06 guest_client_id --> (str) EjkRJG0BLNEZquRiPZYdNtJdyGtTuHdp
- [wolframalpha ] 2025-04-22 12:40:06 code --> (str) 5aa79f86205ad26188e0e26e28fb7ae7
- number of tables: 6
- number of key/value pairs: 7
- In the "cache tables and key/values" section, the table name (engine name) is at
- first position on the second there is the calculated expire date and on the
- third and fourth position the key/value is shown.
- About duckduckgo: The *vqd coode* of ddg depends on the query term and therefore
- the key is a hash value of the query term (to not to store the raw query term).
- In the "properties of ENGINES_CACHE" section all properties of the SQLiteAppl /
- ExpireCache and their last modification date are shown::
- properties of ENGINES_CACHE
- ===========================
- [last modified: 2025-04-22 11:32:27] DB_SCHEMA : 1
- [last modified: 2025-04-22 11:32:27] LAST_MAINTENANCE :
- [last modified: 2025-04-22 11:32:27] crypt_hash : ca612e3566fdfd7cf7efe...
- [last modified: 2025-04-22 11:32:30] CACHE-TABLE--demo_offline: demo_offline
- [last modified: 2025-04-22 11:32:30] CACHE-TABLE--startpage: startpage
- [last modified: 2025-04-22 11:32:31] CACHE-TABLE--duckduckgo: duckduckgo
- [last modified: 2025-04-22 11:33:08] CACHE-TABLE--radio_browser: radio_browser
- [last modified: 2025-04-22 11:40:06] CACHE-TABLE--soundcloud: soundcloud
- [last modified: 2025-04-22 11:40:06] CACHE-TABLE--wolframalpha: wolframalpha
- These properties provide information about the state of the ExpireCache and
- control the behavior. For example, the maintenance intervals are controlled by
- the last modification date of the LAST_MAINTENANCE property and the hash value
- of the password can be used to detect whether the password has been changed (in
- this case the DB entries can no longer be decrypted and the entire cache must be
- discarded).
- """
- def __init__(self, engine_name: str, expire: int | None = None):
- self.expire = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME
- _valid = "-_." + string.ascii_letters + string.digits
- self.table_name = "".join([c if c in _valid else "_" for c in engine_name])
- def set(self, key: str, value: Any, expire: int | None = None) -> bool:
- return ENGINES_CACHE.set(
- key=key,
- value=value,
- expire=expire or self.expire,
- table=self.table_name,
- )
- def get(self, key: str, default=None) -> Any:
- return ENGINES_CACHE.get(key, default=default, table=self.table_name)
- def secret_hash(self, name: str | bytes) -> str:
- return ENGINES_CACHE.secret_hash(name=name)
- class Engine: # pylint: disable=too-few-public-methods
- """Class of engine instances build from YAML settings.
- Further documentation see :ref:`general engine configuration`.
- .. hint::
- This class is currently never initialized and only used for type hinting.
- """
- # Common options in the engine module
- engine_type: str
- """Type of the engine (:ref:`searx.search.processors`)"""
- paging: bool
- """Engine supports multiple pages."""
- time_range_support: bool
- """Engine supports search time range."""
- safesearch: bool
- """Engine supports SafeSearch"""
- language_support: bool
- """Engine supports languages (locales) search."""
- language: str
- """For an engine, when there is ``language: ...`` in the YAML settings the engine
- does support only this one language:
- .. code:: yaml
- - name: google french
- engine: google
- language: fr
- """
- region: str
- """For an engine, when there is ``region: ...`` in the YAML settings the engine
- does support only this one region::
- .. code:: yaml
- - name: google belgium
- engine: google
- region: fr-BE
- """
- fetch_traits: Callable
- """Function to to fetch engine's traits from origin."""
- traits: traits.EngineTraits
- """Traits of the engine."""
- # settings.yml
- categories: List[str]
- """Specifies to which :ref:`engine categories` the engine should be added."""
- name: str
- """Name that will be used across SearXNG to define this engine. In settings, on
- the result page .."""
- engine: str
- """Name of the python file used to handle requests and responses to and from
- this search engine (file name from :origin:`searx/engines` without
- ``.py``)."""
- enable_http: bool
- """Enable HTTP (by default only HTTPS is enabled)."""
- shortcut: str
- """Code used to execute bang requests (``!foo``)"""
- timeout: float
- """Specific timeout for search-engine."""
- display_error_messages: bool
- """Display error messages on the web UI."""
- proxies: dict
- """Set proxies for a specific engine (YAML):
- .. code:: yaml
- proxies :
- http: socks5://proxy:port
- https: socks5://proxy:port
- """
- disabled: bool
- """To disable by default the engine, but not deleting it. It will allow the
- user to manually activate it in the settings."""
- inactive: bool
- """Remove the engine from the settings (*disabled & removed*)."""
- about: dict
- """Additional fields describing the engine.
- .. code:: yaml
- about:
- website: https://example.com
- wikidata_id: Q306656
- official_api_documentation: https://example.com/api-doc
- use_official_api: true
- require_api_key: true
- results: HTML
- """
- using_tor_proxy: bool
- """Using tor proxy (``true``) or not (``false``) for this engine."""
- send_accept_language_header: bool
- """When this option is activated, the language (locale) that is selected by
- the user is used to build and send a ``Accept-Language`` header in the
- request to the origin search engine."""
- tokens: List[str]
- """A list of secret tokens to make this engine *private*, more details see
- :ref:`private engines`."""
- weight: int
- """Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
|