Browse Source

[fix] prevent multiple, parallel initializations of tables in the cache DB (#4991)

Depending on the respective runtime behavior, it could happen that the initial
loading of the DB tables in the cache was performed multiple times and in
parallel. The concurrent accesses then led to the `sqlite3.OperationalError:
database is locked` exception as in #4951.

Since this problem depends significantly on the runtimes (e.g., how long it
takes to retrieve the content for a table), this error could not be observed in
all installations.

Closes: https://github.com/searxng/searxng/issues/4951

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 5 days ago
parent
commit
2fe8540903

+ 2 - 1
searx/data/currencies.py

@@ -24,8 +24,9 @@ class CurrenciesDB:
 
 
     def init(self):
     def init(self):
         if self.cache.properties("currencies loaded") != "OK":
         if self.cache.properties("currencies loaded") != "OK":
-            self.load()
+            # To avoid parallel initializations, the property is set first
             self.cache.properties.set("currencies loaded", "OK")
             self.cache.properties.set("currencies loaded", "OK")
+            self.load()
         # F I X M E:
         # F I X M E:
         #     do we need a maintenance .. rember: database is stored
         #     do we need a maintenance .. rember: database is stored
         #     in /tmp and will be rebuild during the reboot anyway
         #     in /tmp and will be rebuild during the reboot anyway

+ 2 - 3
searx/data/tracker_patterns.py

@@ -7,7 +7,6 @@ import typing
 __all__ = ["TrackerPatternsDB"]
 __all__ = ["TrackerPatternsDB"]
 
 
 import re
 import re
-import pathlib
 from collections.abc import Iterator
 from collections.abc import Iterator
 from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
 from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
 
 
@@ -22,7 +21,6 @@ class TrackerPatternsDB:
     # pylint: disable=missing-class-docstring
     # pylint: disable=missing-class-docstring
 
 
     ctx_name = "data_tracker_patterns"
     ctx_name = "data_tracker_patterns"
-    json_file = pathlib.Path(__file__).parent / "tracker_patterns.json"
 
 
     CLEAR_LIST_URL = [
     CLEAR_LIST_URL = [
         # ClearURL rule lists, the first one that responds HTTP 200 is used
         # ClearURL rule lists, the first one that responds HTTP 200 is used
@@ -42,8 +40,9 @@ class TrackerPatternsDB:
 
 
     def init(self):
     def init(self):
         if self.cache.properties("tracker_patterns loaded") != "OK":
         if self.cache.properties("tracker_patterns loaded") != "OK":
-            self.load()
+            # To avoid parallel initializations, the property is set first
             self.cache.properties.set("tracker_patterns loaded", "OK")
             self.cache.properties.set("tracker_patterns loaded", "OK")
+            self.load()
         # F I X M E:
         # F I X M E:
         #     do we need a maintenance .. rember: database is stored
         #     do we need a maintenance .. rember: database is stored
         #     in /tmp and will be rebuild during the reboot anyway
         #     in /tmp and will be rebuild during the reboot anyway

+ 5 - 0
searx/plugins/tracker_url_remover.py

@@ -13,6 +13,7 @@ from searx.data import TRACKER_PATTERNS
 from . import Plugin, PluginInfo
 from . import Plugin, PluginInfo
 
 
 if typing.TYPE_CHECKING:
 if typing.TYPE_CHECKING:
+    import flask
     from searx.search import SearchWithPlugins
     from searx.search import SearchWithPlugins
     from searx.extended_types import SXNG_Request
     from searx.extended_types import SXNG_Request
     from searx.result_types import Result, LegacyResult
     from searx.result_types import Result, LegacyResult
@@ -37,6 +38,10 @@ class SXNGPlugin(Plugin):
             preference_section="privacy",
             preference_section="privacy",
         )
         )
 
 
+    def init(self, app: "flask.Flask") -> bool:
+        TRACKER_PATTERNS.init()
+        return True
+
     def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
     def on_result(self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result) -> bool:
 
 
         result.filter_urls(self.filter_url_field)
         result.filter_urls(self.filter_url_field)

+ 4 - 0
searx/search/processors/online_currency.py

@@ -24,6 +24,10 @@ class OnlineCurrencyProcessor(OnlineProcessor):
 
 
     engine_type = 'online_currency'
     engine_type = 'online_currency'
 
 
+    def initialize(self):
+        CURRENCIES.init()
+        super().initialize()
+
     def get_params(self, search_query, engine_category):
     def get_params(self, search_query, engine_category):
         """Returns a set of :ref:`request params <engine request online_currency>`
         """Returns a set of :ref:`request params <engine request online_currency>`
         or ``None`` if search query does not match to :py:obj:`parser_re`."""
         or ``None`` if search query does not match to :py:obj:`parser_re`."""