Browse Source

[fix] ResourceWarning: unclosed database in sqlite3

Reported:

- https://github.com/inetol-infrastructure/searxng-container/issues/5

Related:

- https://github.com/searxng/searxng/issues/4405#issuecomment-2692352352

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 2 months ago
parent
commit
4a594f1b53
4 changed files with 204 additions and 77 deletions
  1. 1 1
      docs/dev/engines/offline_concept.rst
  2. 8 1
      searx/favicons/__init__.py
  3. 18 1
      searx/favicons/cache.py
  4. 177 74
      searx/sqlitedb.py

+ 1 - 1
docs/dev/engines/offline_concept.rst

@@ -29,7 +29,7 @@ Programming Interface
   parameter. This function can be omitted, if there is no need to setup anything
   parameter. This function can be omitted, if there is no need to setup anything
   in advance.
   in advance.
 
 
-:py:func:`search(query, params) <searx.engines.demo_offline.searc>`
+:py:func:`search(query, params) <searx.engines.demo_offline.search>`
   Each offline engine has a function named ``search``.  This function is
   Each offline engine has a function named ``search``.  This function is
   responsible to perform a search and return the results in a presentable
   responsible to perform a search and return the results in a presentable
   format. (Where *presentable* means presentable by the selected result
   format. (Where *presentable* means presentable by the selected result

+ 8 - 1
searx/favicons/__init__.py

@@ -1,5 +1,12 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Implementations for providing the favicons in SearXNG"""
+"""Implementations for providing the favicons in SearXNG.
+
+There is a command line for developer purposes and for deeper analysis.  Here is
+an example in which the command line is called in the development environment::
+
+  $ ./manage pyenv.cmd bash --norc --noprofile
+  (py3) python -m searx.favicons --help
+"""
 
 
 from __future__ import annotations
 from __future__ import annotations
 
 

+ 18 - 1
searx/favicons/cache.py

@@ -236,6 +236,12 @@ class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache):
     model in the SQLite DB is implemented using the abstract class
     model in the SQLite DB is implemented using the abstract class
     :py:obj:`sqlitedb.SQLiteAppl`.
     :py:obj:`sqlitedb.SQLiteAppl`.
 
 
+    For introspection of the DB, jump into developer environment and run command
+    to show cache state::
+
+        $ ./manage pyenv.cmd bash --norc --noprofile
+        (py3) python -m searx.favicons cache state
+
     The following configurations are required / supported:
     The following configurations are required / supported:
 
 
     - :py:obj:`FaviconCacheConfig.db_url`
     - :py:obj:`FaviconCacheConfig.db_url`
@@ -357,6 +363,10 @@ CREATE TABLE IF NOT EXISTS blob_map (
             if sha256 != FALLBACK_ICON:
             if sha256 != FALLBACK_ICON:
                 conn.execute(self.SQL_INSERT_BLOBS, (sha256, bytes_c, mime, data))
                 conn.execute(self.SQL_INSERT_BLOBS, (sha256, bytes_c, mime, data))
             conn.execute(self.SQL_INSERT_BLOB_MAP, (sha256, resolver, authority))
             conn.execute(self.SQL_INSERT_BLOB_MAP, (sha256, resolver, authority))
+        # hint: the with context of the connection object closes the transaction
+        # but not the DB connection.  The connection has to be closed by the
+        # caller of self.connect()!
+        conn.close()
 
 
         return True
         return True
 
 
@@ -376,7 +386,8 @@ CREATE TABLE IF NOT EXISTS blob_map (
             return
             return
         self.properties.set("LAST_MAINTENANCE", "")  # hint: this (also) sets the m_time of the property!
         self.properties.set("LAST_MAINTENANCE", "")  # hint: this (also) sets the m_time of the property!
 
 
-        # do maintenance tasks
+        # Do maintenance tasks.  This can be take a little more time, to avoid
+        # DB locks, etablish a new DB connecton.
 
 
         with self.connect() as conn:
         with self.connect() as conn:
 
 
@@ -407,6 +418,12 @@ CREATE TABLE IF NOT EXISTS blob_map (
                     conn.execute("DELETE FROM blob_map WHERE sha256 IN ('%s')" % "','".join(sha_list))
                     conn.execute("DELETE FROM blob_map WHERE sha256 IN ('%s')" % "','".join(sha_list))
                     logger.debug("dropped %s blobs with total size of %s bytes", len(sha_list), c)
                     logger.debug("dropped %s blobs with total size of %s bytes", len(sha_list), c)
 
 
+        # Vacuuming the WALs
+        # https://www.theunterminatedstring.com/sqlite-vacuuming/
+
+        conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+        conn.close()
+
     def _query_val(self, sql, default=None):
     def _query_val(self, sql, default=None):
         val = self.DB.execute(sql).fetchone()
         val = self.DB.execute(sql).fetchone()
         if val is not None:
         if val is not None:

+ 177 - 74
searx/sqlitedb.py

@@ -7,20 +7,83 @@
 :py:obj:`SQLiteProperties`:
 :py:obj:`SQLiteProperties`:
   Class to manage properties stored in a database.
   Class to manage properties stored in a database.
 
 
-----
+Examplarical implementations based on :py:obj:`SQLiteAppl`:
+
+:py:obj:`searx.cache.ExpireCacheSQLite` :
+  Cache that manages key/value pairs in a SQLite DB, in which the key/value
+  pairs are deleted after an "expire" time.  This type of cache is used, for
+  example, for the engines, see :py:obj:`searx.enginelib.EngineCache`.
+
+:py:obj:`searx.favicons.cache.FaviconCacheSQLite` :
+  Favicon cache that manages the favicon BLOBs in a SQLite DB.
 
 
+----
 """
 """
 from __future__ import annotations
 from __future__ import annotations
 
 
-import sys
+import abc
+import datetime
 import re
 import re
 import sqlite3
 import sqlite3
+import sys
 import threading
 import threading
-import abc
+import uuid
 
 
 from searx import logger
 from searx import logger
 
 
-logger = logger.getChild('sqlitedb')
+logger = logger.getChild("sqlitedb")
+
+THREAD_LOCAL = threading.local()
+
+
+class DBSession:
+    """A *thead-local* DB session"""
+
+    @classmethod
+    def get_connect(cls, app: SQLiteAppl) -> sqlite3.Connection:
+        """Returns a thread local DB connection.  The connection is only
+        established once per thread.
+        """
+        if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
+            THREAD_LOCAL.DBSession_map = {}
+
+        session = THREAD_LOCAL.DBSession_map.get(app.db_url)
+        if session is None:
+            session = cls(app)
+        return session.conn
+
+    def __init__(self, app: SQLiteAppl):
+        self.uuid = uuid.uuid4()
+        self.app = app
+        self._conn = None
+        # self.__del__ will be called, when thread ends
+        if getattr(THREAD_LOCAL, "DBSession_map", None) is None:
+            THREAD_LOCAL.DBSession_map = {}
+        THREAD_LOCAL.DBSession_map[self.app.db_url] = self
+
+    @property
+    def conn(self) -> sqlite3.Connection:
+        msg = f"[{threading.current_thread().ident}] DBSession: " f"{self.app.__class__.__name__}({self.app.db_url})"
+        if self._conn is None:
+            self._conn = self.app.connect()
+            logger.debug("%s --> created new connection", msg)
+        # else:
+        #     logger.debug("%s --> already connected", msg)
+
+        return self._conn
+
+    def __del__(self):
+        try:
+            if self._conn is not None:
+                # HINT: Don't use Python's logging facility in a destructor, it
+                # will produce error reports when python aborts the process or
+                # thread, because at this point objects that the logging module
+                # needs, do not exist anymore.
+                # msg = f"DBSession: close [{self.uuid}] {self.app.__class__.__name__}({self.app.db_url})"
+                # logger.debug(msg)
+                self._conn.close()
+        except Exception:  # pylint: disable=broad-exception-caught
+            pass
 
 
 
 
 class SQLiteAppl(abc.ABC):
 class SQLiteAppl(abc.ABC):
@@ -51,13 +114,18 @@ class SQLiteAppl(abc.ABC):
     """
     """
 
 
     SQLITE_JOURNAL_MODE = "WAL"
     SQLITE_JOURNAL_MODE = "WAL"
+    """``SQLiteAppl`` applications are optimzed for WAL_ mode, its not recommend
+    to change the journal mode (see :py:obj:`SQLiteAppl.tear_down`).
+
+    .. _WAL: https://sqlite.org/wal.html
+    """
     SQLITE_CONNECT_ARGS = {
     SQLITE_CONNECT_ARGS = {
         # "timeout": 5.0,
         # "timeout": 5.0,
         # "detect_types": 0,
         # "detect_types": 0,
         "check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
         "check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
         "cached_statements": 0,  # https://github.com/python/cpython/issues/118172
         "cached_statements": 0,  # https://github.com/python/cpython/issues/118172
         # "uri": False,
         # "uri": False,
-        "autocommit": False,
+        "isolation_level": None,
     }  # fmt:skip
     }  # fmt:skip
     """Connection arguments (:py:obj:`sqlite3.connect`)
     """Connection arguments (:py:obj:`sqlite3.connect`)
 
 
@@ -66,10 +134,6 @@ class SQLiteAppl(abc.ABC):
       ``serialized``.  The check is more of a hindrance in this case because it
       ``serialized``.  The check is more of a hindrance in this case because it
       would prevent a DB connector from being used in multiple threads.
       would prevent a DB connector from being used in multiple threads.
 
 
-    ``autocommit``:
-      Is disabled by default.  Note: autocommit option has been added in Python
-      3.12.
-
     ``cached_statements``:
     ``cached_statements``:
       Is set to ``0`` by default.  Note: Python 3.12+ fetch result are not
       Is set to ``0`` by default.  Note: Python 3.12+ fetch result are not
       consistent in multi-threading application and causing an API misuse error.
       consistent in multi-threading application and causing an API misuse error.
@@ -89,9 +153,17 @@ class SQLiteAppl(abc.ABC):
 
 
         self.db_url = db_url
         self.db_url = db_url
         self.properties = SQLiteProperties(db_url)
         self.properties = SQLiteProperties(db_url)
-        self.thread_local = threading.local()
         self._init_done = False
         self._init_done = False
         self._compatibility()
         self._compatibility()
+        # atexit.register(self.tear_down)
+
+    # def tear_down(self):
+    #     """:ref:`Vacuuming the WALs` upon normal interpreter termination
+    #     (:py:obj:`atexit.register`).
+
+    #     .. _SQLite: Vacuuming the WALs: https://www.theunterminatedstring.com/sqlite-vacuuming/
+    #     """
+    #     self.DB.execute("PRAGMA wal_checkpoint(TRUNCATE)")
 
 
     def _compatibility(self):
     def _compatibility(self):
 
 
@@ -113,19 +185,31 @@ class SQLiteAppl(abc.ABC):
                 "SQLite runtime library version %s is not supported (require >= 3.35)", sqlite3.sqlite_version
                 "SQLite runtime library version %s is not supported (require >= 3.35)", sqlite3.sqlite_version
             )
             )
 
 
+    def _connect(self) -> sqlite3.Connection:
+        conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS)  # type: ignore
+        conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
+        self.register_functions(conn)
+        return conn
+
     def connect(self) -> sqlite3.Connection:
     def connect(self) -> sqlite3.Connection:
         """Creates a new DB connection (:py:obj:`SQLITE_CONNECT_ARGS`).  If not
         """Creates a new DB connection (:py:obj:`SQLITE_CONNECT_ARGS`).  If not
-        already done, the DB schema is set up
+        already done, the DB schema is set up.  The caller must take care of
+        closing the resource.  Alternatively, :py:obj:`SQLiteAppl.DB` can also
+        be used (the resource behind `self.DB` is automatically closed when the
+        process or thread is terminated).
         """
         """
         if sys.version_info < (3, 12):
         if sys.version_info < (3, 12):
             # Prior Python 3.12 there is no "autocommit" option
             # Prior Python 3.12 there is no "autocommit" option
             self.SQLITE_CONNECT_ARGS.pop("autocommit", None)
             self.SQLITE_CONNECT_ARGS.pop("autocommit", None)
 
 
-        self.init()
-        logger.debug("%s: connect to DB: %s // %s", self.__class__.__name__, self.db_url, self.SQLITE_CONNECT_ARGS)
-        conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS)  # type: ignore
-        conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
-        self.register_functions(conn)
+        msg = (
+            f"[{threading.current_thread().ident}] {self.__class__.__name__}({self.db_url})"
+            f" {self.SQLITE_CONNECT_ARGS} // {self.SQLITE_JOURNAL_MODE}"
+        )
+        logger.debug(msg)
+
+        with self._connect() as conn:
+            self.init(conn)
         return conn
         return conn
 
 
     def register_functions(self, conn):
     def register_functions(self, conn):
@@ -150,7 +234,7 @@ class SQLiteAppl(abc.ABC):
         .. _re.search: https://docs.python.org/3/library/re.html#re.search
         .. _re.search: https://docs.python.org/3/library/re.html#re.search
         """
         """
 
 
-        conn.create_function('regexp', 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True)
+        conn.create_function("regexp", 2, lambda x, y: 1 if re.search(x, y) else 0, deterministic=True)
 
 
     @property
     @property
     def DB(self) -> sqlite3.Connection:
     def DB(self) -> sqlite3.Connection:
@@ -168,57 +252,66 @@ class SQLiteAppl(abc.ABC):
             https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
             https://docs.python.org/3/library/sqlite3.html#sqlite3-controlling-transactions
         """
         """
 
 
-        if getattr(self.thread_local, 'DB', None) is None:
-            self.thread_local.DB = self.connect()
-
-        # Theoretically it is possible to reuse the DB cursor across threads as
-        # of Python 3.12, in practice the threading of the cursor seems to me to
-        # be so faulty that I prefer to establish one connection per thread
-
-        self.thread_local.DB.commit()
-        return self.thread_local.DB
-
-        # In "serialized" mode, SQLite can be safely used by multiple threads
-        # with no restriction.
-        #
-        # if self.SQLITE_THREADING_MODE != "serialized":
-        #     if getattr(self.thread_local, 'DB', None) is None:
-        #         self.thread_local.DB = self.connect()
-        #     return self.thread_local.DB
-        #
-        # if self._DB is None:
-        #     self._DB = self.connect()  # pylint: disable=attribute-defined-outside-init
-        # return self._DB
-
-    def init(self):
+        conn = None
+
+        if self.SQLITE_THREADING_MODE == "serialized":
+            # Theoretically it is possible to reuse the DB cursor across threads
+            # as of Python 3.12, in practice the threading of the cursor seems
+            # to me a little faulty that I prefer to establish one connection
+            # per thread.
+            #
+            # may we can activate this code one day ..
+            # if self._DB is None:
+            #     self._DB = self.connect()
+            # conn = self._DB
+            conn = DBSession.get_connect(self)
+        else:
+            conn = DBSession.get_connect(self)
+
+        # Since more than one instance of SQLiteAppl share the same DB
+        # connection, we need to make sure that each SQLiteAppl instance has run
+        # its init method at least once.
+        self.init(conn)
+
+        return conn
+
+    def init(self, conn: sqlite3.Connection) -> bool:
         """Initializes the DB schema and properties, is only executed once even
         """Initializes the DB schema and properties, is only executed once even
-        if called several times."""
+        if called several times.
+
+        If the initialization has not yet taken place, it is carried out and a
+        `True` is returned to the caller at the end.  If the initialization has
+        already been carried out in the past, `False` is returned.
+        """
 
 
         if self._init_done:
         if self._init_done:
-            return
+            return False
         self._init_done = True
         self._init_done = True
 
 
         logger.debug("init DB: %s", self.db_url)
         logger.debug("init DB: %s", self.db_url)
-        self.properties.init()
+        self.properties.init(conn)
+
         ver = self.properties("DB_SCHEMA")
         ver = self.properties("DB_SCHEMA")
         if ver is None:
         if ver is None:
-            with self.properties.DB:
-                self.create_schema(self.properties.DB)
+            with conn:
+                self.create_schema(conn)
         else:
         else:
             ver = int(ver)
             ver = int(ver)
             if ver != self.DB_SCHEMA:
             if ver != self.DB_SCHEMA:
                 raise sqlite3.DatabaseError("Expected DB schema v%s, DB schema is v%s" % (self.DB_SCHEMA, ver))
                 raise sqlite3.DatabaseError("Expected DB schema v%s, DB schema is v%s" % (self.DB_SCHEMA, ver))
             logger.debug("DB_SCHEMA = %s", ver)
             logger.debug("DB_SCHEMA = %s", ver)
 
 
-    def create_schema(self, conn):
+        return True
+
+    def create_schema(self, conn: sqlite3.Connection):
 
 
         logger.debug("create schema ..")
         logger.debug("create schema ..")
+        self.properties.set("DB_SCHEMA", self.DB_SCHEMA)
+        self.properties.set("LAST_MAINTENANCE", "")
         with conn:
         with conn:
             for table_name, sql in self.DDL_CREATE_TABLES.items():
             for table_name, sql in self.DDL_CREATE_TABLES.items():
                 conn.execute(sql)
                 conn.execute(sql)
                 self.properties.set(f"Table {table_name} created", table_name)
                 self.properties.set(f"Table {table_name} created", table_name)
-                self.properties.set("DB_SCHEMA", self.DB_SCHEMA)
-                self.properties.set("LAST_MAINTENANCE", "")
 
 
 
 
 class SQLiteProperties(SQLiteAppl):
 class SQLiteProperties(SQLiteAppl):
@@ -253,33 +346,32 @@ CREATE TABLE IF NOT EXISTS properties (
         "    ON CONFLICT(name) DO UPDATE"
         "    ON CONFLICT(name) DO UPDATE"
         "   SET value=excluded.value, m_time=strftime('%s', 'now')"
         "   SET value=excluded.value, m_time=strftime('%s', 'now')"
     )
     )
+    SQL_DELETE = "DELETE FROM properties WHERE name = ?"
     SQL_TABLE_EXISTS = (
     SQL_TABLE_EXISTS = (
         "SELECT name FROM sqlite_master"
         "SELECT name FROM sqlite_master"
         " WHERE type='table' AND name='properties'"
         " WHERE type='table' AND name='properties'"
     )  # fmt:skip
     )  # fmt:skip
     SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
     SQLITE_CONNECT_ARGS = dict(SQLiteAppl.SQLITE_CONNECT_ARGS)
-    SQLITE_CONNECT_ARGS["autocommit"] = True  # This option has no effect before Python 3.12
 
 
     def __init__(self, db_url: str):  # pylint: disable=super-init-not-called
     def __init__(self, db_url: str):  # pylint: disable=super-init-not-called
 
 
         self.db_url = db_url
         self.db_url = db_url
-        self.thread_local = threading.local()
         self._init_done = False
         self._init_done = False
         self._compatibility()
         self._compatibility()
 
 
-    def init(self):
+    def init(self, conn: sqlite3.Connection) -> bool:
         """Initializes DB schema of the properties in the DB."""
         """Initializes DB schema of the properties in the DB."""
 
 
         if self._init_done:
         if self._init_done:
-            return
+            return False
         self._init_done = True
         self._init_done = True
         logger.debug("init properties of DB: %s", self.db_url)
         logger.debug("init properties of DB: %s", self.db_url)
-        with self.DB as conn:
-            res = conn.execute(self.SQL_TABLE_EXISTS)
-            if res.fetchone() is None:  # DB schema needs to be be created
-                self.create_schema(conn)
+        res = conn.execute(self.SQL_TABLE_EXISTS)
+        if res.fetchone() is None:  # DB schema needs to be be created
+            self.create_schema(conn)
+        return True
 
 
-    def __call__(self, name, default=None):
+    def __call__(self, name: str, default=None):
         """Returns the value of the property ``name`` or ``default`` if property
         """Returns the value of the property ``name`` or ``default`` if property
         not exists in DB."""
         not exists in DB."""
 
 
@@ -288,36 +380,47 @@ CREATE TABLE IF NOT EXISTS properties (
             return default
             return default
         return res[0]
         return res[0]
 
 
-    def set(self, name, value):
+    def set(self, name: str, value: str | int):
         """Set ``value`` of property ``name`` in DB.  If property already
         """Set ``value`` of property ``name`` in DB.  If property already
         exists, update the ``m_time`` (and the value)."""
         exists, update the ``m_time`` (and the value)."""
 
 
-        self.DB.execute(self.SQL_SET, (name, value))
+        with self.DB:
+            self.DB.execute(self.SQL_SET, (name, value))
 
 
-        if sys.version_info <= (3, 12):
-            # Prior Python 3.12 there is no "autocommit" option / lets commit
-            # explicitely.
-            self.DB.commit()
+    def delete(self, name: str) -> int:
+        """Delete of property ``name`` from DB."""
+        with self.DB:
+            cur = self.DB.execute(self.SQL_DELETE, (name,))
+        return cur.rowcount
 
 
-    def row(self, name, default=None):
+    def row(self, name: str, default=None):
         """Returns the DB row of property ``name`` or ``default`` if property
         """Returns the DB row of property ``name`` or ``default`` if property
         not exists in DB."""
         not exists in DB."""
 
 
-        cur = self.DB.cursor()
-        cur.execute("SELECT * FROM properties WHERE name = ?", (name,))
-        res = cur.fetchone()
-        if res is None:
+        res = self.DB.execute("SELECT * FROM properties WHERE name = ?", (name,))
+        row = res.fetchone()
+        if row is None:
             return default
             return default
-        col_names = [column[0] for column in cur.description]
-        return dict(zip(col_names, res))
 
 
-    def m_time(self, name, default: int = 0) -> int:
+        col_names = [column[0] for column in row.description]
+        return dict(zip(col_names, row))
+
+    def m_time(self, name: str, default: int = 0) -> int:
         """Last modification time of this property."""
         """Last modification time of this property."""
-        res = self.DB.execute(self.SQL_M_TIME, (name,)).fetchone()
-        if res is None:
+        res = self.DB.execute(self.SQL_M_TIME, (name,))
+        row = res.fetchone()
+        if row is None:
             return default
             return default
-        return int(res[0])
+        return int(row[0])
 
 
     def create_schema(self, conn):
     def create_schema(self, conn):
         with conn:
         with conn:
             conn.execute(self.DDL_PROPERTIES)
             conn.execute(self.DDL_PROPERTIES)
+
+    def __str__(self) -> str:
+        lines = []
+        for row in self.DB.execute("SELECT name, value, m_time FROM properties"):
+            name, value, m_time = row
+            m_time = datetime.datetime.fromtimestamp(m_time).strftime("%Y-%m-%d %H:%M:%S")
+            lines.append(f"[last modified: {m_time}] {name:20s}: {value}")
+        return "\n".join(lines)