ip_limit.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """.. _botdetection.ip_limit:
  4. Method ``ip_limit``
  5. -------------------
  6. The ``ip_limit`` method counts request from an IP in *sliding windows*. If
  7. there are to many requests in a sliding window, the request is evaluated as a
  8. bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_
  9. header. To take privacy only the hash value of an IP is stored in the redis DB
  10. and at least for a maximum of 10 minutes.
  11. The :py:obj:`.link_token` method can be used to investigate whether a request is
  12. *suspicious*. To activate the :py:obj:`.link_token` method in the
  13. :py:obj:`.ip_limit` method add the following to your
  14. ``/etc/searxng/limiter.toml``:
  15. .. code:: toml
  16. [botdetection.ip_limit]
  17. link_token = true
  18. If the :py:obj:`.link_token` method is activated and a request is *suspicious*
  19. the request rates are reduced:
  20. - :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
  21. - :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
  22. To intercept bots that get their IPs from a range of IPs, there is a
  23. :py:obj:`SUSPICIOUS_IP_WINDOW`. In this window the suspicious IPs are stored
  24. for a longer time. IPs stored in this sliding window have a maximum of
  25. :py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked. As soon as the IP
  26. makes a request that is not suspicious, the sliding window for this IP is
  27. droped.
  28. .. _X-Forwarded-For:
  29. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
  30. """
  31. from typing import Optional
  32. import flask
  33. import werkzeug
  34. from searx.tools import config
  35. from searx import redisdb
  36. from searx import logger
  37. from searx.redislib import incr_sliding_window, drop_counter
  38. from . import link_token
  39. from ._helpers import too_many_requests, get_real_ip
  40. logger = logger.getChild('botdetection.ip_limit')
  41. BURST_WINDOW = 20
  42. """Time (sec) before sliding window for *burst* requests expires."""
  43. BURST_MAX = 15
  44. """Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
  45. BURST_MAX_SUSPICIOUS = 2
  46. """Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
  47. LONG_WINDOW = 600
  48. """Time (sec) before the longer sliding window expires."""
  49. LONG_MAX = 150
  50. """Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
  51. LONG_MAX_SUSPICIOUS = 10
  52. """Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
  53. API_WONDOW = 3600
  54. """Time (sec) before sliding window for API requests (format != html) expires."""
  55. API_MAX = 4
  56. """Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
  57. SUSPICIOUS_IP_WINDOW = 3600 * 24
  58. """Time (sec) before sliding window for one suspicious IP expires."""
  59. SUSPICIOUS_IP_MAX = 3
  60. """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
  61. def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
  62. # pylint: disable=too-many-return-statements
  63. redis_client = redisdb.client()
  64. client_ip = get_real_ip(request)
  65. if request.args.get('format', 'html') != 'html':
  66. c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW)
  67. if c > API_MAX:
  68. return too_many_requests(request, "too many request in API_WINDOW")
  69. if cfg['botdetection.ip_limit.link_token']:
  70. suspicious = link_token.is_suspicious(request, True)
  71. if not suspicious:
  72. # this IP is no longer suspicious: release ip again / delete the counter of this IP
  73. drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip)
  74. return None
  75. # this IP is suspicious: count requests from this IP
  76. c = incr_sliding_window(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip, SUSPICIOUS_IP_WINDOW)
  77. if c > SUSPICIOUS_IP_MAX:
  78. logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", client_ip)
  79. return flask.redirect(flask.url_for('index'), code=302)
  80. c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW)
  81. if c > BURST_MAX_SUSPICIOUS:
  82. return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
  83. c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW)
  84. if c > LONG_MAX_SUSPICIOUS:
  85. return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
  86. return None
  87. # vanilla limiter without extensions counts BURST_MAX and LONG_MAX
  88. c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW)
  89. if c > BURST_MAX:
  90. return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX)")
  91. c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW)
  92. if c > LONG_MAX:
  93. return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX)")
  94. return None