Browse Source

[mod] revision of the settings_loader

The intention of this PR is to modernize the settings_loader implementations.
The concept is old (remember, this is partly from 2014), back then we only had
one config file, meanwhile we have had a folder with config files for a very
long time.  Callers can now load a YAML configuration from this folder as
follows ::

    settings_loader.get_yaml_cfg('my-config.yml')

- BTW this is a fix of #3557.

- Further the `existing_filename_or_none` construct dates back to times when
  there was not yet a `pathlib.Path` in all Python versions we supported in the
  past.

- Typehints have been added wherever appropriate

At the same time, this patch should also be downward compatible and not
introduce a new environment variable. The localization of the folder with the
configurations is further based on:

    SEARXNG_SETTINGS_PATH (wich defaults to /etc/searxng/settings.yml)

Which means, the default config folder is `/etc/searxng/`.

ATTENTION: intended functional changes!

 If SEARXNG_SETTINGS_PATH was set and pointed to a not existing file, the
 previous implementation silently loaded the default configuration.  This
 behavior has been changed: if the file or folder does not exist, an
 EnvironmentError exception will be thrown in future.

Closes: https://github.com/searxng/searxng/issues/3557
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 10 months ago
parent
commit
2039060b64

+ 2 - 0
docs/admin/settings/index.rst

@@ -1,3 +1,5 @@
+.. _searxng settings.yml:
+
 ========
 Settings
 ========

+ 8 - 0
docs/src/searx.settings.rst

@@ -0,0 +1,8 @@
+.. _searx.settings_loader:
+
+===============
+Settings Loader
+===============
+
+.. automodule:: searx.settings_loader
+   :members:

+ 3 - 1
manage

@@ -54,7 +54,9 @@ fi
 
 YAMLLINT_FILES=()
 while IFS= read -r line; do
-   YAMLLINT_FILES+=("$line")
+    if [ "$line" != "tests/unit/settings/syntaxerror_settings.yml" ]; then
+        YAMLLINT_FILES+=("$line")
+    fi
 done <<< "$(git ls-files './tests/*.yml' './searx/*.yml' './utils/templates/etc/searxng/*.yml')"
 
 RST_FILES=(

+ 4 - 4
searx/plugins/hostnames.py

@@ -96,7 +96,7 @@ from flask_babel import gettext
 
 from searx import settings
 from searx.plugins import logger
-from searx.settings_loader import get_yaml_file
+from searx.settings_loader import get_yaml_cfg
 
 name = gettext('Hostnames plugin')
 description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname')
@@ -118,7 +118,7 @@ def _load_regular_expressions(settings_key):
 
     # load external file with configuration
     if isinstance(setting_value, str):
-        setting_value = get_yaml_file(setting_value)
+        setting_value = get_yaml_cfg(setting_value)
 
     if isinstance(setting_value, list):
         return {re.compile(r) for r in setting_value}
@@ -163,10 +163,10 @@ def _matches_parsed_url(result, pattern):
 def on_result(_request, _search, result):
     for pattern, replacement in replacements.items():
         if _matches_parsed_url(result, pattern):
-            logger.debug(result['url'])
+            # logger.debug(result['url'])
             result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
             result['url'] = urlunparse(result[parsed])
-            logger.debug(result['url'])
+            # logger.debug(result['url'])
 
         for url_field in _url_fields:
             if not result.get(url_field):

+ 122 - 59
searx/settings_loader.py

@@ -1,68 +1,116 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-# pylint: disable=missing-module-docstring, too-many-branches
+"""Implementations for loading configurations from YAML files.  This essentially
+includes the configuration of the (:ref:`SearXNG appl <searxng settings.yml>`)
+server. The default configuration for the application server is loaded from the
+:origin:`DEFAULT_SETTINGS_FILE <searx/settings.yml>`.  This default
+configuration can be completely replaced or :ref:`customized individually
+<use_default_settings.yml>` and the ``SEARXNG_SETTINGS_PATH`` environment
+variable can be used to set the location from which the local customizations are
+to be loaded. The rules used for this can be found in the
+:py:obj:`get_user_cfg_folder` function.
 
-from typing import Optional
-from os import environ
-from os.path import dirname, join, abspath, isfile
+- By default, local configurations are expected in folder ``/etc/searxng`` from
+  where applications can load them with the :py:obj:`get_yaml_cfg` function.
+
+- By default, customized :ref:`SearXNG appl <searxng settings.yml>` settings are
+  expected in a file named ``settings.yml``.
+
+"""
+
+from __future__ import annotations
+
+import os.path
 from collections.abc import Mapping
 from itertools import filterfalse
+from pathlib import Path
 
 import yaml
 
 from searx.exceptions import SearxSettingsException
 
+searx_dir = os.path.abspath(os.path.dirname(__file__))
 
-searx_dir = abspath(dirname(__file__))
-
-
-def existing_filename_or_none(file_name: str) -> Optional[str]:
-    if isfile(file_name):
-        return file_name
-    return None
+SETTINGS_YAML = Path("settings.yml")
+DEFAULT_SETTINGS_FILE = Path(searx_dir) / SETTINGS_YAML
+"""The :origin:`searx/settings.yml` file with all the default settings."""
 
 
-def load_yaml(file_name):
+def load_yaml(file_name: str | Path):
+    """Load YAML config from a file."""
     try:
         with open(file_name, 'r', encoding='utf-8') as settings_yaml:
-            return yaml.safe_load(settings_yaml)
+            return yaml.safe_load(settings_yaml) or {}
     except IOError as e:
-        raise SearxSettingsException(e, file_name) from e
+        raise SearxSettingsException(e, str(file_name)) from e
     except yaml.YAMLError as e:
-        raise SearxSettingsException(e, file_name) from e
+        raise SearxSettingsException(e, str(file_name)) from e
+
+
+def get_yaml_cfg(file_name: str | Path) -> dict:
+    """Shortcut to load a YAML config from a file, located in the
+
+    - :py:obj:`get_user_cfg_folder` or
+    - in the ``searx`` folder of the SearXNG installation
+    """
+
+    folder = get_user_cfg_folder() or Path(searx_dir)
+    fname = folder / file_name
+    if not fname.is_file():
+        raise FileNotFoundError(f"File {fname} does not exist!")
+
+    return load_yaml(fname)
 
 
-def get_yaml_file(file_name):
-    path = existing_filename_or_none(join(searx_dir, file_name))
-    if path is None:
-        raise FileNotFoundError(f"File {file_name} does not exist!")
+def get_user_cfg_folder() -> Path | None:
+    """Returns folder where the local configurations are located.
 
-    return load_yaml(path)
+    1. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a
+       folder (e.g. ``/etc/mysxng/``), all local configurations are expected in
+       this folder.  The settings of the :ref:`SearXNG appl <searxng
+       settings.yml>` then expected in ``settings.yml``
+       (e.g. ``/etc/mysxng/settings.yml``).
 
+    2. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a file
+       (e.g. ``/etc/mysxng/myinstance.yml``), this file contains the settings of
+       the :ref:`SearXNG appl <searxng settings.yml>` and the folder
+       (e.g. ``/etc/mysxng/``) is used for all other configurations.
 
-def get_default_settings_path():
-    return existing_filename_or_none(join(searx_dir, 'settings.yml'))
+       This type (``SEARXNG_SETTINGS_PATH`` points to a file) is suitable for
+       use cases in which different profiles of the :ref:`SearXNG appl <searxng
+       settings.yml>` are to be managed, such as in test scenarios.
 
+    3. If folder ``/etc/searxng`` exists, it is used.
+
+    In case none of the above path exists, ``None`` is returned.  In case of
+    environment ``SEARXNG_SETTINGS_PATH`` is set, but the (folder or file) does
+    not exists, a :py:obj:`EnvironmentError` is raised.
 
-def get_user_settings_path() -> Optional[str]:
-    """Get an user settings file.
-    By descending priority:
-    1. ``environ['SEARXNG_SETTINGS_PATH']``
-    2. ``/etc/searxng/settings.yml`` except if ``SEARXNG_DISABLE_ETC_SETTINGS`` is ``true`` or ``1``
-    3. ``None``
     """
 
-    # check the environment variable SEARXNG_SETTINGS_PATH
-    # if the environment variable is defined, this is the last check
-    if 'SEARXNG_SETTINGS_PATH' in environ:
-        return existing_filename_or_none(environ['SEARXNG_SETTINGS_PATH'])
+    folder = None
+    settings_path = os.environ.get("SEARXNG_SETTINGS_PATH")
+
+    # Disable default /etc/searxng is intended exclusively for internal testing purposes
+    # and is therefore not documented!
+    disable_etc = os.environ.get('SEARXNG_DISABLE_ETC_SETTINGS', '').lower() in ('1', 'true')
+
+    if settings_path:
+        # rule 1. and 2.
+        settings_path = Path(settings_path)
+        if settings_path.is_dir():
+            folder = settings_path
+        elif settings_path.is_file():
+            folder = settings_path.parent
+        else:
+            raise EnvironmentError(1, f"{settings_path} not exists!", settings_path)
 
-    # if SEARXNG_DISABLE_ETC_SETTINGS don't look any further
-    if environ.get('SEARXNG_DISABLE_ETC_SETTINGS', '').lower() in ('1', 'true'):
-        return None
+    if not folder and not disable_etc:
+        # default: rule 3.
+        folder = Path("/etc/searxng")
+        if not folder.is_dir():
+            folder = None
 
-    # check /etc/searxng/settings.yml
-    # (continue with other locations if the file is not found)
-    return existing_filename_or_none('/etc/searxng/settings.yml')
+    return folder
 
 
 def update_dict(default_dict, user_dict):
@@ -74,7 +122,9 @@ def update_dict(default_dict, user_dict):
     return default_dict
 
 
-def update_settings(default_settings, user_settings):
+def update_settings(default_settings: dict, user_settings: dict):
+    # pylint: disable=too-many-branches
+
     # merge everything except the engines
     for k, v in user_settings.items():
         if k not in ('use_default_settings', 'engines'):
@@ -124,6 +174,7 @@ def update_settings(default_settings, user_settings):
 
 
 def is_use_default_settings(user_settings):
+
     use_default_settings = user_settings.get('use_default_settings')
     if use_default_settings is True:
         return True
@@ -134,25 +185,37 @@ def is_use_default_settings(user_settings):
     raise ValueError('Invalid value for use_default_settings')
 
 
-def load_settings(load_user_settings=True):
-    default_settings_path = get_default_settings_path()
-    user_settings_path = get_user_settings_path()
-    if user_settings_path is None or not load_user_settings:
-        # no user settings
-        return (load_yaml(default_settings_path), 'load the default settings from {}'.format(default_settings_path))
+def load_settings(load_user_settings=True) -> tuple[dict, str]:
+    """Function for loading the settings of the SearXNG application
+    (:ref:`settings.yml <searxng settings.yml>`)."""
 
-    # user settings
-    user_settings = load_yaml(user_settings_path)
-    if is_use_default_settings(user_settings):
+    msg = f"load the default settings from {DEFAULT_SETTINGS_FILE}"
+    cfg = load_yaml(DEFAULT_SETTINGS_FILE)
+    cfg_folder = get_user_cfg_folder()
+
+    if not load_user_settings or not cfg_folder:
+        return cfg, msg
+
+    settings_yml = os.environ.get("SEARXNG_SETTINGS_PATH")
+    if settings_yml and Path(settings_yml).is_file():
+        # see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a file
+        settings_yml = Path(settings_yml).name
+    else:
+        # see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a folder
+        settings_yml = SETTINGS_YAML
+
+    cfg_file = cfg_folder / settings_yml
+    if not cfg_file.exists():
+        return cfg, msg
+
+    msg = f"load the user settings from {cfg_file}"
+    user_cfg = load_yaml(cfg_file)
+
+    if is_use_default_settings(user_cfg):
         # the user settings are merged with the default configuration
-        default_settings = load_yaml(default_settings_path)
-        update_settings(default_settings, user_settings)
-        return (
-            default_settings,
-            'merge the default settings ( {} ) and the user settings ( {} )'.format(
-                default_settings_path, user_settings_path
-            ),
-        )
-
-    # the user settings, fully replace the default configuration
-    return (user_settings, 'load the user settings from {}'.format(user_settings_path))
+        msg = f"merge the default settings ( {DEFAULT_SETTINGS_FILE} ) and the user settings ( {cfg_file} )"
+        update_settings(cfg, user_cfg)
+    else:
+        cfg = user_cfg
+
+    return cfg, msg

+ 2 - 2
searx/webapp.py

@@ -61,7 +61,7 @@ from searx.botdetection import link_token
 from searx.data import ENGINE_DESCRIPTIONS
 from searx.results import Timing
 from searx.settings_defaults import OUTPUT_FORMATS
-from searx.settings_loader import get_default_settings_path
+from searx.settings_loader import DEFAULT_SETTINGS_FILE
 from searx.exceptions import SearxParameterException
 from searx.engines import (
     DEFAULT_CATEGORY,
@@ -1347,7 +1347,7 @@ def run():
         port=settings['server']['port'],
         host=settings['server']['bind_address'],
         threaded=True,
-        extra_files=[get_default_settings_path()],
+        extra_files=[DEFAULT_SETTINGS_FILE],
     )
 
 

+ 1 - 0
tests/unit/settings/syntaxerror_settings.yml

@@ -1,2 +1,3 @@
 Test:
   "**********"
+  xxx

+ 18 - 30
tests/unit/test_settings_loader.py

@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # pylint: disable=missing-module-docstring
 
-from os.path import dirname, join, abspath
+from pathlib import Path
+
+import os
 from unittest.mock import patch
 
 from searx.exceptions import SearxSettingsException
@@ -9,7 +11,8 @@ from searx import settings_loader
 from tests import SearxTestCase
 
 
-test_dir = abspath(dirname(__file__))
+def _settings(f_name):
+    return str(Path(__file__).parent.absolute() / "settings" / f_name)
 
 
 class TestLoad(SearxTestCase):  # pylint: disable=missing-class-docstring
@@ -18,16 +21,9 @@ class TestLoad(SearxTestCase):  # pylint: disable=missing-class-docstring
             settings_loader.load_yaml('/dev/zero')
 
         with self.assertRaises(SearxSettingsException):
-            settings_loader.load_yaml(join(test_dir, '/settings/syntaxerror_settings.yml'))
-
-        with self.assertRaises(SearxSettingsException):
-            settings_loader.load_yaml(join(test_dir, '/settings/empty_settings.yml'))
+            settings_loader.load_yaml(_settings("syntaxerror_settings.yml"))
 
-    def test_existing_filename_or_none(self):
-        self.assertIsNone(settings_loader.existing_filename_or_none('/dev/zero'))
-
-        bad_settings_path = join(test_dir, 'settings/syntaxerror_settings.yml')
-        self.assertEqual(settings_loader.existing_filename_or_none(bad_settings_path), bad_settings_path)
+        self.assertEqual(settings_loader.load_yaml(_settings("empty_settings.yml")), {})
 
 
 class TestDefaultSettings(SearxTestCase):  # pylint: disable=missing-class-docstring
@@ -55,24 +51,22 @@ class TestUserSettings(SearxTestCase):  # pylint: disable=missing-class-docstrin
             self.assertFalse(settings_loader.is_use_default_settings({'use_default_settings': 0}))
 
     def test_user_settings_not_found(self):
-        with patch.dict(settings_loader.environ, {'SEARXNG_SETTINGS_PATH': '/dev/null'}):
-            settings, msg = settings_loader.load_settings()
-            self.assertTrue(msg.startswith('load the default settings from'))
-            self.assertEqual(settings['server']['secret_key'], "ultrasecretkey")
+        with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("not_exists.yml")}):
+            with self.assertRaises(EnvironmentError):
+                _s, _m = settings_loader.load_settings()
+        with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': "/folder/not/exists"}):
+            with self.assertRaises(EnvironmentError):
+                _s, _m = settings_loader.load_settings()
 
     def test_user_settings(self):
-        with patch.dict(
-            settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_simple.yml')}
-        ):
+        with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_simple.yml")}):
             settings, msg = settings_loader.load_settings()
             self.assertTrue(msg.startswith('merge the default settings'))
             self.assertEqual(settings['server']['secret_key'], "user_secret_key")
             self.assertEqual(settings['server']['default_http_headers']['Custom-Header'], "Custom-Value")
 
     def test_user_settings_remove(self):
-        with patch.dict(
-            settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove.yml')}
-        ):
+        with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_remove.yml")}):
             settings, msg = settings_loader.load_settings()
             self.assertTrue(msg.startswith('merge the default settings'))
             self.assertEqual(settings['server']['secret_key'], "user_secret_key")
@@ -83,9 +77,7 @@ class TestUserSettings(SearxTestCase):  # pylint: disable=missing-class-docstrin
             self.assertIn('wikipedia', engine_names)
 
     def test_user_settings_remove2(self):
-        with patch.dict(
-            settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove2.yml')}
-        ):
+        with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_remove2.yml")}):
             settings, msg = settings_loader.load_settings()
             self.assertTrue(msg.startswith('merge the default settings'))
             self.assertEqual(settings['server']['secret_key'], "user_secret_key")
@@ -101,9 +93,7 @@ class TestUserSettings(SearxTestCase):  # pylint: disable=missing-class-docstrin
             self.assertEqual(newengine[0]['engine'], 'dummy')
 
     def test_user_settings_keep_only(self):
-        with patch.dict(
-            settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_keep_only.yml')}
-        ):
+        with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_keep_only.yml")}):
             settings, msg = settings_loader.load_settings()
             self.assertTrue(msg.startswith('merge the default settings'))
             engine_names = [engine['name'] for engine in settings['engines']]
@@ -112,9 +102,7 @@ class TestUserSettings(SearxTestCase):  # pylint: disable=missing-class-docstrin
             self.assertEqual(len(settings['engines'][2]), 1)
 
     def test_custom_settings(self):
-        with patch.dict(
-            settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings.yml')}
-        ):
+        with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings.yml")}):
             settings, msg = settings_loader.load_settings()
             self.assertTrue(msg.startswith('load the user settings from'))
             self.assertEqual(settings['server']['port'], 9000)