ip_limit.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. """.. _botdetection.ip_limit:
  2. Method ``ip_limit``
  3. -------------------
  4. The ``ip_limit`` method counts request from an IP in *sliding windows*. If
  5. there are to many requests in a sliding window, the request is evaluated as a
  6. bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_
  7. header. To take privacy only the hash value of an IP is stored in the redis DB
  8. and at least for a maximum of 10 minutes.
  9. The :py:obj:`.link_token` method can be used to investigate whether a request is
  10. *suspicious*. To activate the :py:obj:`.link_token` method in the
  11. :py:obj:`.ip_limit` method add the following to your
  12. ``/etc/searxng/limiter.toml``:
  13. .. code:: toml
  14. [botdetection.ip_limit]
  15. link_token = true
  16. If the :py:obj:`.link_token` method is activated and a request is *suspicious*
  17. the request rates are reduced:
  18. - :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
  19. - :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
  20. To intercept bots that get their IPs from a range of IPs, there is a
  21. :py:obj:`SUSPICIOUS_IP_WINDOW`. In this window the suspicious IPs are stored
  22. for a longer time. IPs stored in this sliding window have a maximum of
  23. :py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked. As soon as the IP
  24. makes a request that is not suspicious, the sliding window for this IP is
  25. droped.
  26. .. _X-Forwarded-For:
  27. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
  28. """
  29. from typing import Optional, Tuple
  30. import flask
  31. from searx.tools import config
  32. from searx import redisdb
  33. from searx import logger
  34. from searx.redislib import incr_sliding_window, drop_counter
  35. from . import link_token
  36. logger = logger.getChild('botdetection.ip_limit')
  37. BURST_WINDOW = 20
  38. """Time (sec) before sliding window for *burst* requests expires."""
  39. BURST_MAX = 15
  40. """Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
  41. BURST_MAX_SUSPICIOUS = 2
  42. """Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
  43. LONG_WINDOW = 600
  44. """Time (sec) before the longer sliding window expires."""
  45. LONG_MAX = 150
  46. """Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
  47. LONG_MAX_SUSPICIOUS = 10
  48. """Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
  49. API_WONDOW = 3600
  50. """Time (sec) before sliding window for API requests (format != html) expires."""
  51. API_MAX = 4
  52. """Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
  53. SUSPICIOUS_IP_WINDOW = 3600 * 24
  54. """Time (sec) before sliding window for one suspicious IP expires."""
  55. SUSPICIOUS_IP_MAX = 3
  56. """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
  57. def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
  58. redis_client = redisdb.client()
  59. x_forwarded_for = request.headers.get('X-Forwarded-For', '')
  60. if not x_forwarded_for:
  61. logger.error("missing HTTP header X-Forwarded-For")
  62. if request.args.get('format', 'html') != 'html':
  63. c = incr_sliding_window(redis_client, 'IP limit - API_WONDOW:' + x_forwarded_for, API_WONDOW)
  64. if c > API_MAX:
  65. return 429, "BLOCK %s: API limit exceeded"
  66. suspicious = False
  67. suspicious_ip_counter = 'IP limit - SUSPICIOUS_IP_WINDOW:' + x_forwarded_for
  68. if cfg['botdetection.ip_limit.link_token']:
  69. suspicious = link_token.is_suspicious(request)
  70. if suspicious:
  71. # this IP is suspicious: count requests from this IP
  72. c = incr_sliding_window(redis_client, suspicious_ip_counter, SUSPICIOUS_IP_WINDOW)
  73. if c > SUSPICIOUS_IP_MAX:
  74. return 429, f"bot detected, too many request from {x_forwarded_for} in SUSPICIOUS_IP_WINDOW"
  75. c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
  76. if c > BURST_MAX_SUSPICIOUS:
  77. return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS"
  78. c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW)
  79. if c > LONG_MAX_SUSPICIOUS:
  80. return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS"
  81. else:
  82. if cfg['botdetection.ip_limit.link_token']:
  83. # this IP is no longer suspicious: release ip again / delete the counter of this IP
  84. drop_counter(redis_client, suspicious_ip_counter)
  85. c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
  86. if c > BURST_MAX:
  87. return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX"
  88. c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW)
  89. if c > LONG_MAX:
  90. return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX"
  91. return None