_helpers.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring, invalid-name
  3. from __future__ import annotations
  4. from ipaddress import (
  5. IPv4Network,
  6. IPv6Network,
  7. IPv4Address,
  8. IPv6Address,
  9. ip_network,
  10. ip_address,
  11. )
  12. import flask
  13. import werkzeug
  14. from searx import logger
  15. from searx.extended_types import SXNG_Request
  16. from . import config
  17. logger = logger.getChild('botdetection')
  18. def dump_request(request: SXNG_Request):
  19. return (
  20. request.path
  21. + " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
  22. + " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
  23. + " || form: %s" % request.form
  24. + " || Accept: %s" % request.headers.get('Accept')
  25. + " || Accept-Language: %s" % request.headers.get('Accept-Language')
  26. + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
  27. + " || Content-Type: %s" % request.headers.get('Content-Type')
  28. + " || Content-Length: %s" % request.headers.get('Content-Length')
  29. + " || Connection: %s" % request.headers.get('Connection')
  30. + " || User-Agent: %s" % request.headers.get('User-Agent')
  31. + " || Sec-Fetch-Site: %s" % request.headers.get('Sec-Fetch-Site')
  32. + " || Sec-Fetch-Mode: %s" % request.headers.get('Sec-Fetch-Mode')
  33. + " || Sec-Fetch-Dest: %s" % request.headers.get('Sec-Fetch-Dest')
  34. )
  35. def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
  36. """Returns a HTTP 429 response object and writes a ERROR message to the
  37. 'botdetection' logger. This function is used in part by the filter methods
  38. to return the default ``Too Many Requests`` response.
  39. """
  40. logger.debug("BLOCK %s: %s", network.compressed, log_msg)
  41. return flask.make_response(('Too Many Requests', 429))
  42. def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
  43. """Returns the (client) network of whether the real_ip is part of."""
  44. if real_ip.version == 6:
  45. prefix = cfg['real_ip.ipv6_prefix']
  46. else:
  47. prefix = cfg['real_ip.ipv4_prefix']
  48. network = ip_network(f"{real_ip}/{prefix}", strict=False)
  49. # logger.debug("get_network(): %s", network.compressed)
  50. return network
  51. _logged_errors = []
  52. def _log_error_only_once(err_msg):
  53. if err_msg not in _logged_errors:
  54. logger.error(err_msg)
  55. _logged_errors.append(err_msg)
  56. def get_real_ip(request: SXNG_Request) -> str:
  57. """Returns real IP of the request. Since not all proxies set all the HTTP
  58. headers and incoming headers can be faked it may happen that the IP cannot
  59. be determined correctly.
  60. .. sidebar:: :py:obj:`flask.Request.remote_addr`
  61. SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
  62. This function tries to get the remote IP in the order listed below,
  63. additional some tests are done and if inconsistencies or errors are
  64. detected, they are logged.
  65. The remote IP of the request is taken from (first match):
  66. - X-Forwarded-For_ header
  67. - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
  68. - :py:obj:`flask.Request.remote_addr`
  69. .. _ProxyFix:
  70. https://werkzeug.palletsprojects.com/middleware/proxy_fix/
  71. .. _X-Forwarded-For:
  72. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
  73. """
  74. forwarded_for = request.headers.get("X-Forwarded-For")
  75. real_ip = request.headers.get('X-Real-IP')
  76. remote_addr = request.remote_addr
  77. # logger.debug(
  78. # "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
  79. # )
  80. if not forwarded_for:
  81. _log_error_only_once("X-Forwarded-For header is not set!")
  82. else:
  83. from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
  84. forwarded_for = [x.strip() for x in forwarded_for.split(',')]
  85. x_for: int = cfg['real_ip.x_for'] # type: ignore
  86. forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
  87. if not real_ip:
  88. _log_error_only_once("X-Real-IP header is not set!")
  89. if forwarded_for and real_ip and forwarded_for != real_ip:
  90. logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
  91. if forwarded_for and remote_addr and forwarded_for != remote_addr:
  92. logger.warning(
  93. "IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
  94. )
  95. if real_ip and remote_addr and real_ip != remote_addr:
  96. logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
  97. request_ip = ip_address(forwarded_for or real_ip or remote_addr or '0.0.0.0')
  98. if request_ip.version == 6 and request_ip.ipv4_mapped:
  99. request_ip = request_ip.ipv4_mapped
  100. # logger.debug("get_real_ip() -> %s", request_ip)
  101. return str(request_ip)