Browse Source

[mod] botdetection: HTTP Fetch Metadata Request Headers

HTTP Fetch Metadata Request Headers [1][2] are used to detect bot requests. Bots
with invalid *Fetch Metadata* will be redirected to the intro (`index`)  page.

[1] https://www.w3.org/TR/fetch-metadata/
[2] https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 6 months ago
parent
commit
fe08bb1d90

+ 3 - 0
docs/src/searx.botdetection.rst

@@ -53,6 +53,9 @@ Probe HTTP headers
 .. automodule:: searx.botdetection.http_user_agent
 .. automodule:: searx.botdetection.http_user_agent
   :members:
   :members:
 
 
+.. automodule:: searx.botdetection.sec_fetch
+  :members:
+
 .. _botdetection config:
 .. _botdetection config:
 
 
 Config
 Config

+ 3 - 0
searx/botdetection/_helpers.py

@@ -34,6 +34,9 @@ def dump_request(request: SXNG_Request):
         + " || Content-Length: %s" % request.headers.get('Content-Length')
         + " || Content-Length: %s" % request.headers.get('Content-Length')
         + " || Connection: %s" % request.headers.get('Connection')
         + " || Connection: %s" % request.headers.get('Connection')
         + " || User-Agent: %s" % request.headers.get('User-Agent')
         + " || User-Agent: %s" % request.headers.get('User-Agent')
+        + " || Sec-Fetch-Site: %s" % request.headers.get('Sec-Fetch-Site')
+        + " || Sec-Fetch-Mode: %s" % request.headers.get('Sec-Fetch-Mode')
+        + " || Sec-Fetch-Dest: %s" % request.headers.get('Sec-Fetch-Dest')
     )
     )
 
 
 
 

+ 61 - 0
searx/botdetection/http_sec_fetch.py

@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Method ``http_sec_fetch``
+-------------------------
+
+The ``http_sec_fetch`` method protect resources from web attacks with `Fetch
+Metadata`_.  A request is filtered out in case of:
+
+- http header Sec-Fetch-Mode_ is invalid
+- http header Sec-Fetch-Dest_ is invalid
+
+.. _Fetch Metadata:
+   https://developer.mozilla.org/en-US/docs/Glossary/Fetch_metadata_request_header
+
+.. Sec-Fetch-Dest:
+   https://developer.mozilla.org/en-US/docs/Web/API/Request/destination
+
+.. Sec-Fetch-Mode:
+   https://developer.mozilla.org/en-US/docs/Web/API/Request/mode
+
+
+"""
+# pylint: disable=unused-argument
+
+from __future__ import annotations
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+)
+
+import flask
+import werkzeug
+
+from searx.extended_types import SXNG_Request
+
+from . import config
+from ._helpers import logger
+
+
+def filter_request(
+    network: IPv4Network | IPv6Network,
+    request: SXNG_Request,
+    cfg: config.Config,
+) -> werkzeug.Response | None:
+
+    val = request.headers.get("Sec-Fetch-Mode", "")
+    if val != "navigate":
+        logger.debug("invalid Sec-Fetch-Mode '%s'", val)
+        return flask.redirect(flask.url_for('index'), code=302)
+
+    val = request.headers.get("Sec-Fetch-Site", "")
+    if val not in ('same-origin', 'same-site', 'none'):
+        logger.debug("invalid Sec-Fetch-Site '%s'", val)
+        flask.redirect(flask.url_for('index'), code=302)
+
+    val = request.headers.get("Sec-Fetch-Dest", "")
+    if val != "document":
+        logger.debug("invalid Sec-Fetch-Dest '%s'", val)
+        flask.redirect(flask.url_for('index'), code=302)
+
+    return None

+ 7 - 2
searx/limiter.py

@@ -112,6 +112,7 @@ from searx.botdetection import (
     http_accept_encoding,
     http_accept_encoding,
     http_accept_language,
     http_accept_language,
     http_user_agent,
     http_user_agent,
+    http_sec_fetch,
     ip_limit,
     ip_limit,
     ip_lists,
     ip_lists,
     get_network,
     get_network,
@@ -179,16 +180,17 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None:
         logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
         logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
         return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
         return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
 
 
-    # methods applied on /
+    # methods applied on all requests
 
 
     for func in [
     for func in [
         http_user_agent,
         http_user_agent,
     ]:
     ]:
         val = func.filter_request(network, request, cfg)
         val = func.filter_request(network, request, cfg)
         if val is not None:
         if val is not None:
+            logger.debug(f"NOT OK ({func.__name__}): {network}: %s", dump_request(sxng_request))
             return val
             return val
 
 
-    # methods applied on /search
+    # methods applied on /search requests
 
 
     if request.path == '/search':
     if request.path == '/search':
 
 
@@ -197,11 +199,14 @@ def filter_request(request: SXNG_Request) -> werkzeug.Response | None:
             http_accept_encoding,
             http_accept_encoding,
             http_accept_language,
             http_accept_language,
             http_user_agent,
             http_user_agent,
+            http_sec_fetch,
             ip_limit,
             ip_limit,
         ]:
         ]:
             val = func.filter_request(network, request, cfg)
             val = func.filter_request(network, request, cfg)
             if val is not None:
             if val is not None:
+                logger.debug(f"NOT OK ({func.__name__}): {network}: %s", dump_request(sxng_request))
                 return val
                 return val
+
     logger.debug(f"OK {network}: %s", dump_request(sxng_request))
     logger.debug(f"OK {network}: %s", dump_request(sxng_request))
     return None
     return None