Browse Source

[mod] limiter: blocklist and passlist (ip_lists)

A blocklist and a passlist can be configured in /etc/searxng/limiter.toml::

    [botdetection.ip_lists]
    pass_ip = [
      '51.15.252.168',  # IPv4 of check.searx.space
    ]

    block_ip = [
      '93.184.216.34',  # IPv4 of example.org
    ]

Closes: https://github.com/searxng/searxng/issues/2127
Closes: https://github.com/searxng/searxng/pull/2129
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
f3763d73ad

+ 3 - 0
docs/src/searx.botdetection.rst

@@ -15,6 +15,9 @@ Bot Detection
 .. automodule:: searx.botdetection.limiter
 .. automodule:: searx.botdetection.limiter
   :members:
   :members:
 
 
+.. automodule:: searx.botdetection.ip_lists
+  :members:
+
 
 
 Rate limit
 Rate limit
 ==========
 ==========

+ 4 - 5
searx/botdetection/_helpers.py

@@ -6,8 +6,8 @@ from __future__ import annotations
 from ipaddress import (
 from ipaddress import (
     IPv4Network,
     IPv4Network,
     IPv6Network,
     IPv6Network,
+    IPv4Address,
     IPv6Address,
     IPv6Address,
-    ip_address,
     ip_network,
     ip_network,
 )
 )
 import flask
 import flask
@@ -46,11 +46,10 @@ def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkz
     return flask.make_response(('Too Many Requests', 429))
     return flask.make_response(('Too Many Requests', 429))
 
 
 
 
-def get_network(real_ip: str, cfg: config.Config) -> IPv4Network | IPv6Network:
+def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
     """Returns the (client) network of whether the real_ip is part of."""
     """Returns the (client) network of whether the real_ip is part of."""
 
 
-    ip = ip_address(real_ip)
-    if isinstance(ip, IPv6Address):
+    if real_ip.version == 6:
         prefix = cfg['real_ip.ipv6_prefix']
         prefix = cfg['real_ip.ipv6_prefix']
     else:
     else:
         prefix = cfg['real_ip.ipv4_prefix']
         prefix = cfg['real_ip.ipv4_prefix']
@@ -99,7 +98,7 @@ def get_real_ip(request: flask.Request) -> str:
         from .limiter import get_cfg  # pylint: disable=import-outside-toplevel, cyclic-import
         from .limiter import get_cfg  # pylint: disable=import-outside-toplevel, cyclic-import
 
 
         forwarded_for = [x.strip() for x in forwarded_for.split(',')]
         forwarded_for = [x.strip() for x in forwarded_for.split(',')]
-        x_for: int = get_cfg()['real_ip.x_for']
+        x_for: int = get_cfg()['real_ip.x_for']  # type: ignore
         forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
         forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
 
 
     if not real_ip:
     if not real_ip:

+ 5 - 3
searx/botdetection/ip_limit.py

@@ -49,14 +49,16 @@ import werkzeug
 from searx.tools import config
 from searx.tools import config
 
 
 from searx import redisdb
 from searx import redisdb
-from searx import logger
 from searx.redislib import incr_sliding_window, drop_counter
 from searx.redislib import incr_sliding_window, drop_counter
 
 
 from . import link_token
 from . import link_token
-from ._helpers import too_many_requests
+from ._helpers import (
+    too_many_requests,
+    logger,
+)
 
 
 
 
-logger = logger.getChild('botdetection.ip_limit')
+logger = logger.getChild('ip_limit')
 
 
 BURST_WINDOW = 20
 BURST_WINDOW = 20
 """Time (sec) before sliding window for *burst* requests expires."""
 """Time (sec) before sliding window for *burst* requests expires."""

+ 85 - 0
searx/botdetection/ip_lists.py

@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+""".. _botdetection.ip_lists:
+
+Method ``ip_lists``
+-------------------
+
+The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
+:py:obj:`pass-lists <pass_ip>`.
+
+.. code:: toml
+
+   [botdetection.ip_lists]
+
+   pass_ip = [
+    '140.238.172.132', # IPv4 of check.searx.space
+    '192.168.0.0/16',  # IPv4 private network
+    'fe80::/10'        # IPv6 linklocal
+   ]
+   block_ip = [
+      '93.184.216.34', # IPv4 of example.org
+      '257.1.1.1',     # invalid IP --> will be ignored, logged in ERROR class
+   ]
+
+"""
+# pylint: disable=unused-argument
+
+from __future__ import annotations
+from typing import Tuple
+from ipaddress import (
+    ip_network,
+    IPv4Address,
+    IPv6Address,
+)
+
+from searx.tools import config
+from ._helpers import logger
+
+logger = logger.getChild('ip_limit')
+
+SEARXNG_ORG = [
+    # https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
+    '140.238.172.132',  # IPv4 check.searx.space
+    '2603:c022:0:4900::/56',  # IPv6 check.searx.space
+]
+"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
+
+
+def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
+    """Checks if the IP on the subnet is in one of the members of the
+    ``botdetection.ip_lists.pass_ip`` list.
+    """
+
+    if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
+        for net in SEARXNG_ORG:
+            net = ip_network(net, strict=False)
+            if real_ip.version == net.version and real_ip in net:
+                return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
+    return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)
+
+
+def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
+    """Checks if the IP on the subnet is in one of the members of the
+    ``botdetection.ip_lists.block_ip`` list.
+    """
+
+    block, msg = ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.block_ip', cfg)
+    if block:
+        msg += " To remove IP from list, please contact the maintainer of the service."
+    return block, msg
+
+
+def ip_is_subnet_of_member_in_list(
+    real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
+) -> Tuple[bool, str]:
+
+    for net in cfg.get(list_name, default=[]):
+        try:
+            net = ip_network(net, strict=False)
+        except ValueError:
+            logger.error("invalid IP %s in %s", net, list_name)
+            continue
+        if real_ip.version == net.version and real_ip in net:
+            return True, f"IP matches {net.compressed} in {list_name}."
+    return False, f"IP is not a member of an item in the f{list_name} list"

+ 31 - 2
searx/botdetection/limiter.py

@@ -40,6 +40,7 @@ and set the redis-url connection. Check the value, it depends on your redis DB
 from __future__ import annotations
 from __future__ import annotations
 
 
 from pathlib import Path
 from pathlib import Path
+from ipaddress import ip_address
 import flask
 import flask
 import werkzeug
 import werkzeug
 
 
@@ -53,6 +54,7 @@ from . import (
     http_connection,
     http_connection,
     http_user_agent,
     http_user_agent,
     ip_limit,
     ip_limit,
+    ip_lists,
 )
 )
 
 
 from ._helpers import (
 from ._helpers import (
@@ -84,16 +86,41 @@ def get_cfg() -> config.Config:
 
 
 
 
 def filter_request(request: flask.Request) -> werkzeug.Response | None:
 def filter_request(request: flask.Request) -> werkzeug.Response | None:
+    # pylint: disable=too-many-return-statements
 
 
     cfg = get_cfg()
     cfg = get_cfg()
-    real_ip = get_real_ip(request)
+    real_ip = ip_address(get_real_ip(request))
     network = get_network(real_ip, cfg)
     network = get_network(real_ip, cfg)
+
+    if request.path == '/healthz':
+        return None
+
+    # link-local
+
     if network.is_link_local:
     if network.is_link_local:
         return None
         return None
 
 
-    if request.path == '/healthz':
+    # block- & pass- lists
+    #
+    # 1. The IP of the request is first checked against the pass-list; if the IP
+    #    matches an entry in the list, the request is not blocked.
+    # 2. If no matching entry is found in the pass-list, then a check is made against
+    #    the block list; if the IP matches an entry in the list, the request is
+    #    blocked.
+    # 3. If the IP is not in either list, the request is not blocked.
+
+    match, msg = ip_lists.pass_ip(real_ip, cfg)
+    if match:
+        logger.warning("PASS %s: matched PASSLIST - %s", network.compressed, msg)
         return None
         return None
 
 
+    match, msg = ip_lists.block_ip(real_ip, cfg)
+    if match:
+        logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
+        return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
+
+    # methods applied on /
+
     for func in [
     for func in [
         http_user_agent,
         http_user_agent,
     ]:
     ]:
@@ -101,6 +128,8 @@ def filter_request(request: flask.Request) -> werkzeug.Response | None:
         if val is not None:
         if val is not None:
             return val
             return val
 
 
+    # methods applied on /search
+
     if request.path == '/search':
     if request.path == '/search':
 
 
         for func in [
         for func in [

+ 19 - 1
searx/botdetection/limiter.toml

@@ -16,7 +16,25 @@ ipv6_prefix = 48
 # (networks) are not monitored by the ip_limit
 # (networks) are not monitored by the ip_limit
 filter_link_local = false
 filter_link_local = false
 
 
-# acrivate link_token method in the ip_limit method
+# activate link_token method in the ip_limit method
 link_token = false
 link_token = false
 
 
+[botdetection.ip_lists]
 
 
+# In the limiter, the ip_lists method has priority over all other methods -> if
+# an IP is in the pass_ip list, it has unrestricted access and it is also not
+# checked if e.g. the "user agent" suggests a bot (e.g. curl).
+
+block_ip = [
+  # '93.184.216.34',  # IPv4 of example.org
+  # '257.1.1.1',      # invalid IP --> will be ignored, logged in ERROR class
+]
+
+pass_ip = [
+  # '192.168.0.0/16',      # IPv4 private network
+  # 'fe80::/10'            # IPv6 linklocal / wins over botdetection.ip_limit.filter_link_local
+]
+
+# Activate passlist of (hardcoded) IPs from the SearXNG organization,
+# e.g. `check.searx.space`.
+pass_searxng_org = true

+ 2 - 1
searx/botdetection/link_token.py

@@ -39,6 +39,7 @@ from __future__ import annotations
 from ipaddress import (
 from ipaddress import (
     IPv4Network,
     IPv4Network,
     IPv6Network,
     IPv6Network,
+    ip_address,
 )
 )
 
 
 import string
 import string
@@ -107,7 +108,7 @@ def ping(request: flask.Request, token: str):
         return
         return
 
 
     cfg = limiter.get_cfg()
     cfg = limiter.get_cfg()
-    real_ip = get_real_ip(request)
+    real_ip = ip_address(get_real_ip(request))
     network = get_network(real_ip, cfg)
     network = get_network(real_ip, cfg)
 
 
     ping_key = get_ping_key(network, request)
     ping_key = get_ping_key(network, request)