Browse Source

[mod] data: implement a simple currencies (SQL) database (#4836)

To reduce the memory footprint, this patch no longer loads the JSON data
completely into memory.  Instead, there is an SQL database based on
`ExpireCacheSQLite`.

The class CurrenciesDB is a simple DB application that encapsulates the
DB (queries and initialization) and provides convenient methods like
`name_to_iso4217` and `iso4217_to_name`.

Related:

- https://github.com/searxng/searxng/discussions/1892
- https://github.com/searxng/searxng/pull/3458#issuecomment-2900807671
- https://github.com/searxng/searxng/pull/4650

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 week ago
parent
commit
848c8d0544

+ 6 - 10
searx/data/__init__.py

@@ -4,21 +4,17 @@
   make data.all
 
 """
-
+from __future__ import annotations
 
 __all__ = ["ahmia_blacklist_loader"]
 
 import json
-from pathlib import Path
 import typing
 
-from searx import logger
-
-log = logger.getChild("data")
+from .core import log, data_dir
+from .currencies import CurrenciesDB
 
-data_dir = Path(__file__).parent
-
-CURRENCIES: dict[str, typing.Any]
+CURRENCIES: CurrenciesDB
 USER_AGENTS: dict[str, typing.Any]
 EXTERNAL_URLS: dict[str, typing.Any]
 WIKIDATA_UNITS: dict[str, typing.Any]
@@ -29,7 +25,7 @@ ENGINE_TRAITS: dict[str, typing.Any]
 LOCALES: dict[str, typing.Any]
 
 lazy_globals = {
-    "CURRENCIES": None,
+    "CURRENCIES": CurrenciesDB(),
     "USER_AGENTS": None,
     "EXTERNAL_URLS": None,
     "WIKIDATA_UNITS": None,
@@ -41,7 +37,6 @@ lazy_globals = {
 }
 
 data_json_files = {
-    "CURRENCIES": "currencies.json",
     "USER_AGENTS": "useragents.json",
     "EXTERNAL_URLS": "external_urls.json",
     "WIKIDATA_UNITS": "wikidata_units.json",
@@ -63,6 +58,7 @@ def __getattr__(name):
         return data
 
     log.debug("init searx.data.%s", name)
+
     with open(data_dir / data_json_files[name], encoding='utf-8') as f:
         lazy_globals[name] = json.load(f)
 

+ 29 - 0
searx/data/core.py

@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# pylint: disable=missing-module-docstring
+from __future__ import annotations
+
+import pathlib
+
+from searx import logger
+from searx.cache import ExpireCacheCfg, ExpireCacheSQLite
+
+log = logger.getChild("data")
+
+data_dir = pathlib.Path(__file__).parent
+
+_DATA_CACHE: ExpireCacheSQLite = None  # type: ignore
+
+
+def get_cache():
+
+    global _DATA_CACHE  # pylint: disable=global-statement
+
+    if _DATA_CACHE is None:
+        _DATA_CACHE = ExpireCacheSQLite.build_cache(
+            ExpireCacheCfg(
+                name="DATA_CACHE",
+                # MAX_VALUE_LEN=1024 * 200,  # max. 200kB length for a *serialized* value.
+                # MAXHOLD_TIME=60 * 60 * 24 * 7 * 4,  # 4 weeks
+            )
+        )
+    return _DATA_CACHE

+ 55 - 0
searx/data/currencies.py

@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Simple implementation to store currencies data in a SQL database."""
+
+from __future__ import annotations
+
+__all__ = ["CurrenciesDB"]
+
+import json
+import pathlib
+
+from .core import get_cache, log
+
+
+class CurrenciesDB:
+    # pylint: disable=missing-class-docstring
+
+    ctx_names = "data_currencies_names"
+    ctx_iso4217 = "data_currencies_iso4217"
+
+    json_file = pathlib.Path(__file__).parent / "currencies.json"
+
+    def __init__(self):
+        self.cache = get_cache()
+
+    def init(self):
+        if self.cache.properties("currencies loaded") != "OK":
+            self.load()
+            self.cache.properties.set("currencies loaded", "OK")
+        # F I X M E:
+        #     do we need a maintenance .. rember: database is stored
+        #     in /tmp and will be rebuild during the reboot anyway
+
+    def load(self):
+        log.debug("init searx.data.CURRENCIES")
+        with open(self.json_file, encoding="utf-8") as f:
+            data_dict = json.load(f)
+        for key, value in data_dict["names"].items():
+            self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
+        for key, value in data_dict["iso4217"].items():
+            self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
+
+    def name_to_iso4217(self, name):
+        self.init()
+
+        ret_val = self.cache.get(key=name, default=name, ctx=self.ctx_names)
+        if isinstance(ret_val, list):
+            # if more alternatives, use the last in the list
+            ret_val = ret_val[-1]
+        return ret_val
+
+    def iso4217_to_name(self, iso4217, language):
+        self.init()
+
+        iso4217_languages: dict = self.cache.get(key=iso4217, default={}, ctx=self.ctx_names)
+        return iso4217_languages.get(language, iso4217)

+ 7 - 4
searx/engines/currency_convert.py

@@ -3,6 +3,7 @@
 """
 
 import json
+from searx.result_types import EngineResults
 
 # about
 about = {
@@ -28,13 +29,15 @@ def request(_query, params):
     return params
 
 
-def response(resp):
+def response(resp) -> EngineResults:
+    res = EngineResults()
+
     # remove first and last lines to get only json
     json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
     try:
         conversion_rate = float(json.loads(json_resp)["to"][0]["mid"])
     except IndexError:
-        return []
+        return res
     answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
         resp.search_params['amount'],
         resp.search_params['from'],
@@ -46,5 +49,5 @@ def response(resp):
     )
 
     url = f"https://duckduckgo.com/?q={resp.search_params['from']}+to+{resp.search_params['to']}"
-
-    return [{"answer": answer, "url": url}]
+    res.add(res.types.Answer(answer=answer, url=url))
+    return res

+ 7 - 17
searx/search/processors/online_currency.py

@@ -12,24 +12,13 @@ from .online import OnlineProcessor
 parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
 
 
-def normalize_name(name):
+def normalize_name(name: str):
+    name = name.strip()
     name = name.lower().replace('-', ' ').rstrip('s')
     name = re.sub(' +', ' ', name)
     return unicodedata.normalize('NFKD', name).lower()
 
 
-def name_to_iso4217(name):
-    name = normalize_name(name)
-    currency = CURRENCIES['names'].get(name, [name])
-    if isinstance(currency, str):
-        return currency
-    return currency[-1]
-
-
-def iso4217_to_name(iso4217, language):
-    return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
-
-
 class OnlineCurrencyProcessor(OnlineProcessor):
     """Processor class used by ``online_currency`` engines."""
 
@@ -52,14 +41,15 @@ class OnlineCurrencyProcessor(OnlineProcessor):
             amount = float(amount_str)
         except ValueError:
             return None
-        from_currency = name_to_iso4217(from_currency.strip())
-        to_currency = name_to_iso4217(to_currency.strip())
+
+        from_currency = CURRENCIES.name_to_iso4217(normalize_name(from_currency))
+        to_currency = CURRENCIES.name_to_iso4217(normalize_name(to_currency))
 
         params['amount'] = amount
         params['from'] = from_currency
         params['to'] = to_currency
-        params['from_name'] = iso4217_to_name(from_currency, 'en')
-        params['to_name'] = iso4217_to_name(to_currency, 'en')
+        params['from_name'] = CURRENCIES.iso4217_to_name(from_currency, "en")
+        params['to_name'] = CURRENCIES.iso4217_to_name(to_currency, "en")
         return params
 
     def get_default_tests(self):

+ 2 - 4
searxng_extra/update/update_currencies.py

@@ -15,9 +15,7 @@ import json
 
 from searx.locales import LOCALE_NAMES, locales_initialize
 from searx.engines import wikidata, set_loggers
-from searx.data import data_dir
-
-DATA_FILE = data_dir / 'currencies.json'
+from searx.data.currencies import CurrenciesDB
 
 set_loggers(wikidata, 'wikidata')
 locales_initialize()
@@ -149,7 +147,7 @@ def main():
         if len(db['names'][name]) == 1:
             db['names'][name] = db['names'][name][0]
 
-    with DATA_FILE.open('w', encoding='utf8') as f:
+    with CurrenciesDB.json_file.open('w', encoding='utf8') as f:
         json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)