Browse Source

Merge pull request #1967 from tiekoetter/suspended_time-settings

settings.yml: add search.suspended_times
Alexandre Flament 2 years ago
parent
commit
52d4155997

+ 15 - 3
searx/exceptions.py

@@ -69,11 +69,19 @@ class SearxEngineAPIException(SearxEngineResponseException):
 class SearxEngineAccessDeniedException(SearxEngineResponseException):
     """The website is blocking the access"""
 
-    def __init__(self, suspended_time=24 * 3600, message='Access denied'):
+    SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
+
+    def __init__(self, suspended_time=None, message='Access denied'):
+        suspended_time = suspended_time or self._get_default_suspended_time()
         super().__init__(message + ', suspended_time=' + str(suspended_time))
         self.suspended_time = suspended_time
         self.message = message
 
+    def _get_default_suspended_time(self):
+        from searx import get_setting
+
+        return get_setting(self.SUSPEND_TIME_SETTING)
+
 
 class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
     """The website has returned a CAPTCHA
@@ -81,7 +89,9 @@ class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
     By default, searx stops sending requests to this engine for 1 day.
     """
 
-    def __init__(self, suspended_time=24 * 3600, message='CAPTCHA'):
+    SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
+
+    def __init__(self, suspended_time=None, message='CAPTCHA'):
         super().__init__(message=message, suspended_time=suspended_time)
 
 
@@ -91,7 +101,9 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
     By default, searx stops sending requests to this engine for 1 hour.
     """
 
-    def __init__(self, suspended_time=3600, message='Too many request'):
+    SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
+
+    def __init__(self, suspended_time=None, message='Too many request'):
         super().__init__(message=message, suspended_time=suspended_time)
 
 

+ 11 - 3
searx/network/raise_for_httperror.py

@@ -9,6 +9,7 @@ from searx.exceptions import (
     SearxEngineTooManyRequestsException,
     SearxEngineAccessDeniedException,
 )
+from searx import get_setting
 
 
 def is_cloudflare_challenge(resp):
@@ -33,15 +34,22 @@ def raise_for_cloudflare_captcha(resp):
         if is_cloudflare_challenge(resp):
             # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
             # suspend for 2 weeks
-            raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15)
+            raise SearxEngineCaptchaException(
+                message='Cloudflare CAPTCHA', suspended_time=get_setting('search.suspended_times.cf_SearxEngineCaptcha')
+            )
 
         if is_cloudflare_firewall(resp):
-            raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24)
+            raise SearxEngineAccessDeniedException(
+                message='Cloudflare Firewall',
+                suspended_time=get_setting('search.suspended_times.cf_SearxEngineAccessDenied'),
+            )
 
 
 def raise_for_recaptcha(resp):
     if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
-        raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7)
+        raise SearxEngineCaptchaException(
+            message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha')
+        )
 
 
 def raise_for_captcha(resp):

+ 14 - 0
searx/settings.yml

@@ -45,6 +45,20 @@ search:
   ban_time_on_fail: 5
   # max ban time in seconds after engine errors
   max_ban_time_on_fail: 120
+  suspend_times:
+    # Engine suspension time after error (in seconds; set to 0 to disable)
+    # For error "Access denied" and "HTTP error [402, 403]"
+    SearxEngineAccessDenied: 86400
+    # For error "CAPTCHA"
+    SearxEngineCaptcha: 86400
+    # For error "Too many request" and "HTTP error 429"
+    SearxEngineTooManyRequests: 3600
+    # Cloudflare CAPTCHA
+    cf_SearxEngineCaptcha: 1296000
+    cf_SearxEngineAccessDenied: 86400
+    # ReCAPTCHA
+    recaptcha_SearxEngineCaptcha: 604800
+
   # remove format to deny access, use lower case.
   # formats: [html, csv, json, rss]
   formats:

+ 8 - 0
searx/settings_defaults.py

@@ -160,6 +160,14 @@ SCHEMA = {
         'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES),
         'ban_time_on_fail': SettingsValue(numbers.Real, 5),
         'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
+        'suspended_times': {
+            'SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400),
+            'SearxEngineCaptcha': SettingsValue(numbers.Real, 86400),
+            'SearxEngineTooManyRequests': SettingsValue(numbers.Real, 3600),
+            'cf_SearxEngineCaptcha': SettingsValue(numbers.Real, 1296000),
+            'cf_SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400),
+            'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800),
+        },
         'formats': SettingsValue(list, OUTPUT_FORMATS),
     },
     'server': {

+ 41 - 0
tests/unit/test_exceptions.py

@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+from tests import SearxTestCase
+import searx.exceptions
+from searx import get_setting
+
+
+class TestExceptions(SearxTestCase):
+    def test_default_suspend_time(self):
+        with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e:
+            raise searx.exceptions.SearxEngineAccessDeniedException()
+        self.assertEqual(
+            e.exception.suspended_time,
+            get_setting(searx.exceptions.SearxEngineAccessDeniedException.SUSPEND_TIME_SETTING),
+        )
+
+        with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e:
+            raise searx.exceptions.SearxEngineCaptchaException()
+        self.assertEqual(
+            e.exception.suspended_time, get_setting(searx.exceptions.SearxEngineCaptchaException.SUSPEND_TIME_SETTING)
+        )
+
+        with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e:
+            raise searx.exceptions.SearxEngineTooManyRequestsException()
+        self.assertEqual(
+            e.exception.suspended_time,
+            get_setting(searx.exceptions.SearxEngineTooManyRequestsException.SUSPEND_TIME_SETTING),
+        )
+
+    def test_custom_suspend_time(self):
+        with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e:
+            raise searx.exceptions.SearxEngineAccessDeniedException(suspended_time=1337)
+        self.assertEqual(e.exception.suspended_time, 1337)
+
+        with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e:
+            raise searx.exceptions.SearxEngineCaptchaException(suspended_time=1409)
+        self.assertEqual(e.exception.suspended_time, 1409)
+
+        with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e:
+            raise searx.exceptions.SearxEngineTooManyRequestsException(suspended_time=1543)
+        self.assertEqual(e.exception.suspended_time, 1543)