_helpers.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. # pylint: disable=missing-module-docstring, invalid-name
  4. from typing import Optional
  5. import flask
  6. import werkzeug
  7. from searx import logger
  8. logger = logger.getChild('botdetection')
  9. def dump_request(request: flask.Request):
  10. return (
  11. "%s: %s" % (get_real_ip(request), request.path)
  12. + " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
  13. + " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
  14. + " || form: %s" % request.form
  15. + " || Accept: %s" % request.headers.get('Accept')
  16. + " || Accept-Language: %s" % request.headers.get('Accept-Language')
  17. + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
  18. + " || Content-Type: %s" % request.headers.get('Content-Type')
  19. + " || Content-Length: %s" % request.headers.get('Content-Length')
  20. + " || Connection: %s" % request.headers.get('Connection')
  21. + " || User-Agent: %s" % request.headers.get('User-Agent')
  22. )
  23. def too_many_requests(request: flask.Request, log_msg: str) -> Optional[werkzeug.Response]:
  24. log_prefix = 'BLOCK %s: ' % get_real_ip(request)
  25. logger.debug(log_prefix + log_msg)
  26. return flask.make_response(('Too Many Requests', 429))
  27. def get_real_ip(request: flask.Request) -> str:
  28. """Returns real IP of the request. Since not all proxies set all the HTTP
  29. headers and incoming headers can be faked it may happen that the IP cannot
  30. be determined correctly.
  31. .. sidebar:: :py:obj:`flask.Request.remote_addr`
  32. SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
  33. This function tries to get the remote IP in the order listed below,
  34. additional some tests are done and if inconsistencies or errors are
  35. detected, they are logged.
  36. The remote IP of the request is taken from (first match):
  37. - X-Forwarded-For_ header
  38. - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
  39. - :py:obj:`flask.Request.remote_addr`
  40. .. _ProxyFix:
  41. https://werkzeug.palletsprojects.com/middleware/proxy_fix/
  42. .. _X-Forwarded-For:
  43. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
  44. """
  45. forwarded_for = request.headers.get("X-Forwarded-For")
  46. real_ip = request.headers.get('X-Real-IP')
  47. remote_addr = request.remote_addr
  48. logger.debug("X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr)
  49. if not forwarded_for:
  50. logger.error("X-Forwarded-For header is not set!")
  51. else:
  52. from .limiter import get_cfg # pylint: disable=import-outside-toplevel, cyclic-import
  53. forwarded_for = [x.strip() for x in forwarded_for.split(',')]
  54. x_for: int = get_cfg()['real_ip.x_for']
  55. forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
  56. if not real_ip:
  57. logger.error("X-Real-IP header is not set!")
  58. if forwarded_for and real_ip and forwarded_for != real_ip:
  59. logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
  60. if forwarded_for and remote_addr and forwarded_for != remote_addr:
  61. logger.warning(
  62. "IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
  63. )
  64. if real_ip and remote_addr and real_ip != remote_addr:
  65. logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
  66. request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
  67. logger.debug("get_real_ip() -> %s", request_ip)
  68. return request_ip