Browse Source

[fix] correct determination of the IP for the request

For correct determination of the IP to the request the function
botdetection.get_real_ip() is implemented.  This fonction is used in the
ip_limit and link_token method of the botdetection and it is used in the
self_info plugin.

A documentation about the X-Forwarded-For header has been added.

[1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566211059

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
38431d2e14

+ 17 - 3
searx/botdetection/__init__.py

@@ -2,11 +2,25 @@
 # lint: pylint
 """.. _botdetection src:
 
-Bot detection methods
----------------------
+X-Forwarded-For
+===============
 
-The methods implemented in this python package are use by the :ref:`limiter src`.
+.. attention::
+
+   A correct setup of the HTTP request headers ``X-Forwarded-For`` and
+   ``X-Real-IP`` is essential to be able to assign a request to an IP correctly:
+
+   - `NGINX RequestHeader`_
+   - `Apache RequestHeader`_
+
+.. _NGINX RequestHeader:
+    https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site
+.. _Apache RequestHeader:
+    https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site
+
+.. autofunction:: searx.botdetection.get_real_ip
 
 """
 
 from ._helpers import dump_request
+from ._helpers import get_real_ip

+ 2 - 4
searx/botdetection/ip_limit.py

@@ -49,7 +49,7 @@ from searx import logger
 from searx.redislib import incr_sliding_window, drop_counter
 
 from . import link_token
-from ._helpers import too_many_requests
+from ._helpers import too_many_requests, get_real_ip
 
 
 logger = logger.getChild('botdetection.ip_limit')
@@ -89,9 +89,7 @@ def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkz
     # pylint: disable=too-many-return-statements
     redis_client = redisdb.client()
 
-    client_ip = request.headers.get('X-Forwarded-For', '')
-    if not client_ip:
-        logger.error("missing HTTP header X-Forwarded-For")
+    client_ip = get_real_ip(request)
 
     if request.args.get('format', 'html') != 'html':
         c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW)

+ 6 - 1
searx/botdetection/limiter.toml

@@ -1,3 +1,8 @@
 [botdetection.ip_limit]
 
-link_token = false
+link_token = false
+
+[real_ip]
+
+# Number of values to trust for X-Forwarded-For.
+x_for = 1

+ 3 - 4
searx/botdetection/link_token.py

@@ -43,6 +43,7 @@ import flask
 from searx import logger
 from searx import redisdb
 from searx.redislib import secret_hash
+from ._helpers import get_real_ip
 
 TOKEN_LIVE_TIME = 600
 """Livetime (sec) of limiter's CSS token."""
@@ -73,7 +74,7 @@ def is_suspicious(request: flask.Request, renew: bool = False):
     if not redis_client.get(ping_key):
         logger.warning(
             "missing ping (IP: %s) / request: %s",
-            request.headers.get('X-Forwarded-For', ''),
+            get_real_ip(request),
             ping_key,
         )
         return True
@@ -111,9 +112,7 @@ def get_ping_key(request: flask.Request):
         PING_KEY
         + "["
         + secret_hash(
-            request.headers.get('X-Forwarded-For', '')
-            + request.headers.get('Accept-Language', '')
-            + request.headers.get('User-Agent', '')
+            get_real_ip(request) + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
         )
         + "]"
     )

+ 7 - 24
searx/plugins/self_info.py

@@ -1,21 +1,11 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-module-docstring,invalid-name
 
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-
-(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
-'''
-from flask_babel import gettext
 import re
+from flask_babel import gettext
+
+from searx.botdetection._helpers import get_real_ip
 
 name = gettext('Self Information')
 description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".')
@@ -28,18 +18,11 @@ query_examples = ''
 p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
 
 
-# attach callback to the post search hook
-#  request: flask request object
-#  ctx: the whole local context of the pre search hook
 def post_search(request, search):
     if search.search_query.pageno > 1:
         return True
     if search.search_query.query == 'ip':
-        x_forwarded_for = request.headers.getlist("X-Forwarded-For")
-        if x_forwarded_for:
-            ip = x_forwarded_for[0]
-        else:
-            ip = request.remote_addr
+        ip = get_real_ip(request)
         search.result_container.answers['ip'] = {'answer': ip}
     elif p.match(search.search_query.query):
         ua = request.user_agent

+ 7 - 5
tests/unit/test_plugins.py

@@ -50,9 +50,13 @@ class SelfIPTest(SearxTestCase):
         self.assertTrue(len(store.plugins) == 1)
 
         # IP test
-        request = Mock(remote_addr='127.0.0.1')
-        request.headers.getlist.return_value = []
-        search = get_search_mock(query='ip', pageno=1)
+        request = Mock()
+        request.remote_addr = '127.0.0.1'
+        request.headers = {'X-Forwarded-For': '1.2.3.4, 127.0.0.1', 'X-Real-IP': '127.0.0.1'}
+        search = get_search_mock(
+            query='ip',
+            pageno=1,
+        )
         store.call(store.plugins, 'post_search', request, search)
         self.assertTrue('127.0.0.1' in search.result_container.answers["ip"]["answer"])
 
@@ -62,7 +66,6 @@ class SelfIPTest(SearxTestCase):
 
         # User agent test
         request = Mock(user_agent='Mock')
-        request.headers.getlist.return_value = []
 
         search = get_search_mock(query='user-agent', pageno=1)
         store.call(store.plugins, 'post_search', request, search)
@@ -98,7 +101,6 @@ class HashPluginTest(SearxTestCase):
         self.assertTrue(len(store.plugins) == 1)
 
         request = Mock(remote_addr='127.0.0.1')
-        request.headers.getlist.return_value = []
 
         # MD5
         search = get_search_mock(query='md5 test', pageno=1)