Browse Source

[fix] prevent multiple, parallel initializations of tables in the cache DB (#4991)

Depending on the respective runtime behavior, it could happen that the initial
loading of the DB tables in the cache was performed multiple times and in
parallel. The concurrent accesses then led to the `sqlite3.OperationalError:
database is locked` exception as in #4951.

Since this problem depends significantly on the runtimes (e.g., how long it
takes to retrieve the content for a table), this error could not be observed in
all installations.

Closes: https://github.com/searxng/searxng/issues/4951

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 5 days ago
parent
commit
2fe8540903

+ 2 - 1
searx/data/currencies.py

@@ -24,8 +24,9 @@ class CurrenciesDB:
 
     def init(self):
         if self.cache.properties("currencies loaded") != "OK":
-            self.load()
+            # To avoid parallel initializations, the property is set first
             self.cache.properties.set("currencies loaded", "OK")
+            self.load()
         # F I X M E:
         #     do we need a maintenance .. rember: database is stored
         #     in /tmp and will be rebuild during the reboot anyway

+ 2 - 3
searx/data/tracker_patterns.py

@@ -7,7 +7,6 @@ import typing
 __all__ = ["TrackerPatternsDB"]
 
 import re
-import pathlib
 from collections.abc import Iterator
 from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
 
@@ -22,7 +21,6 @@ class TrackerPatternsDB:
     # pylint: disable=missing-class-docstring
 
     ctx_name = "data_tracker_patterns"
-    json_file = pathlib.Path(__file__).parent / "tracker_patterns.json"
 
     CLEAR_LIST_URL = [
         # ClearURL rule lists, the first one that responds HTTP 200 is used
@@ -42,8 +40,9 @@ class TrackerPatternsDB:
 
     def init(self):
         if self.cache.properties("tracker_patterns loaded") != "OK":
-            self.load()
+            # To avoid parallel initializations, the property is set first
             self.cache.properties.set("tracker_patterns loaded", "OK")
+            self.load()
         # F I X M E:
         #     do we need a maintenance .. rember: database is stored
         #     in /tmp and will be rebuild during the reboot anyway

+ 5 - 0
searx/plugins/tracker_url_remover.py

@@ -13,6 +13,7 @@ from searx.data import TRACKER_PATTERNS
 from . import Plugin, PluginInfo
 
 if typing.TYPE_CHECKING:
+    import flask
     from searx.search import SearchWithPlugins
     from searx.extended_types import SXNG_Request
     from searx.result_types import Result, LegacyResult
@@ -37,6 +38,10 @@ class SXNGPlugin(Plugin):
             preference_section="privacy",
         )
 
+    def init(self, app: "flask.Flask") -> bool:
+        TRACKER_PATTERNS.init()
+        return True
+
     def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
 
         result.filter_urls(self.filter_url_field)

+ 4 - 0
searx/search/processors/online_currency.py

@@ -24,6 +24,10 @@ class OnlineCurrencyProcessor(OnlineProcessor):
 
     engine_type = 'online_currency'
 
+    def initialize(self):
+        CURRENCIES.init()
+        super().initialize()
+
     def get_params(self, search_query, engine_category):
         """Returns a set of :ref:`request params <engine request online_currency>`
         or ``None`` if search query does not match to :py:obj:`parser_re`."""