Browse Source

Merge pull request #2132 from dalf/update_pr_1967

search.suspended_time settings: bug fixes
Alexandre Flament 2 years ago
parent
commit
9d102fb08f

+ 27 - 0
docs/admin/engines/settings.rst

@@ -110,6 +110,13 @@ Global Settings
      default_lang: ""
      default_lang: ""
      ban_time_on_fail: 5
      ban_time_on_fail: 5
      max_ban_time_on_fail: 120
      max_ban_time_on_fail: 120
+     suspended_times:
+       SearxEngineAccessDenied: 86400
+       SearxEngineCaptcha: 86400
+       SearxEngineTooManyRequests: 3600
+       cf_SearxEngineCaptcha: 1296000
+       cf_SearxEngineAccessDenied: 86400
+       recaptcha_SearxEngineCaptcha: 604800
      formats:
      formats:
        - html
        - html
 
 
@@ -159,6 +166,25 @@ Global Settings
 ``max_ban_time_on_fail``:
 ``max_ban_time_on_fail``:
   Max ban time in seconds after engine errors.
   Max ban time in seconds after engine errors.
 
 
+``suspended_times``:
+  Engine suspension time after error (in seconds; set to 0 to disable)
+
+  ``SearxEngineAccessDenied``: 86400
+    For error "Access denied" and "HTTP error [402, 403]"
+
+  ``SearxEngineCaptcha``: 86400
+    For error "CAPTCHA"
+
+  ``SearxEngineTooManyRequests``: 3600
+    For error "Too many request" and "HTTP error 429"
+
+  Cloudflare CAPTCHA:
+     - ``cf_SearxEngineCaptcha``: 1296000
+     - ``cf_SearxEngineAccessDenied``: 86400
+
+  Google CAPTCHA:
+    - ``recaptcha_SearxEngineCaptcha``: 604800
+
 ``formats``:
 ``formats``:
   Result formats available from web, remove format to deny access (use lower
   Result formats available from web, remove format to deny access (use lower
   case).
   case).
@@ -168,6 +194,7 @@ Global Settings
   - ``json``
   - ``json``
   - ``rss``
   - ``rss``
 
 
+
 .. _settings server:
 .. _settings server:
 
 
 ``server:``
 ``server:``

+ 8 - 0
docs/src/searx.exceptions.rst

@@ -0,0 +1,8 @@
+.. _searx.exceptions:
+
+==================
+SearXNG Exceptions
+==================
+
+.. automodule:: searx.exceptions
+  :members:

+ 1 - 2
searx/engines/startpage.py

@@ -62,8 +62,7 @@ sc_code = ''
 def raise_captcha(resp):
 def raise_captcha(resp):
 
 
     if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
     if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
-        # suspend CAPTCHA for 7 days
-        raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600)
+        raise SearxEngineCaptchaException()
 
 
 
 
 def get_sc_code(headers):
 def get_sc_code(headers):

+ 26 - 25
searx/exceptions.py

@@ -1,29 +1,19 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-
-(C) 2017- by Alexandre Flament, <alex@al-f.net>
-'''
-
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Exception types raised by SearXNG modules.
+"""
 
 
 from typing import Optional, Union
 from typing import Optional, Union
 
 
 
 
 class SearxException(Exception):
 class SearxException(Exception):
-    pass
+    """Base SearXNG exception."""
 
 
 
 
 class SearxParameterException(SearxException):
 class SearxParameterException(SearxException):
+    """Raised when query miss a required paramater"""
+
     def __init__(self, name, value):
     def __init__(self, name, value):
         if value == '' or value is None:
         if value == '' or value is None:
             message = 'Empty ' + name + ' parameter'
             message = 'Empty ' + name + ' parameter'
@@ -70,26 +60,35 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
     """The website is blocking the access"""
     """The website is blocking the access"""
 
 
     SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
     SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
-
-    def __init__(self, suspended_time=None, message='Access denied'):
+    """This settings contains the default suspended time (default 86400 sec / 1
+    day)."""
+
+    def __init__(self, suspended_time: int = None, message: str = 'Access denied'):
+        """Generic exception to raise when an engine denies access to the results.
+
+        :param suspended_time: How long the engine is going to be suspended in
+            second. Defaults to None.
+        :type suspended_time: int, None
+        :param message: Internal message.  Defaults to ``Access denied``
+        :type message: str
+        """
         suspended_time = suspended_time or self._get_default_suspended_time()
         suspended_time = suspended_time or self._get_default_suspended_time()
         super().__init__(message + ', suspended_time=' + str(suspended_time))
         super().__init__(message + ', suspended_time=' + str(suspended_time))
         self.suspended_time = suspended_time
         self.suspended_time = suspended_time
         self.message = message
         self.message = message
 
 
     def _get_default_suspended_time(self):
     def _get_default_suspended_time(self):
-        from searx import get_setting
+        from searx import get_setting  # pylint: disable=C0415
 
 
         return get_setting(self.SUSPEND_TIME_SETTING)
         return get_setting(self.SUSPEND_TIME_SETTING)
 
 
 
 
 class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
 class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
-    """The website has returned a CAPTCHA
-
-    By default, searx stops sending requests to this engine for 1 day.
-    """
+    """The website has returned a CAPTCHA."""
 
 
     SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
     SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
+    """This settings contains the default suspended time (default 86400 sec / 1
+    day)."""
 
 
     def __init__(self, suspended_time=None, message='CAPTCHA'):
     def __init__(self, suspended_time=None, message='CAPTCHA'):
         super().__init__(message=message, suspended_time=suspended_time)
         super().__init__(message=message, suspended_time=suspended_time)
@@ -102,6 +101,8 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
     """
     """
 
 
     SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
     SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
+    """This settings contains the default suspended time (default 3660 sec / 1
+    hour)."""
 
 
     def __init__(self, suspended_time=None, message='Too many request'):
     def __init__(self, suspended_time=None, message='Too many request'):
         super().__init__(message=message, suspended_time=suspended_time)
         super().__init__(message=message, suspended_time=suspended_time)

+ 1 - 3
searx/network/raise_for_httperror.py

@@ -72,9 +72,7 @@ def raise_for_httperror(resp):
     if resp.status_code and resp.status_code >= 400:
     if resp.status_code and resp.status_code >= 400:
         raise_for_captcha(resp)
         raise_for_captcha(resp)
         if resp.status_code in (402, 403):
         if resp.status_code in (402, 403):
-            raise SearxEngineAccessDeniedException(
-                message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24
-            )
+            raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code))
         if resp.status_code == 429:
         if resp.status_code == 429:
             raise SearxEngineTooManyRequestsException()
             raise SearxEngineTooManyRequestsException()
         resp.raise_for_status()
         resp.raise_for_status()

+ 1 - 1
searx/settings.yml

@@ -45,7 +45,7 @@ search:
   ban_time_on_fail: 5
   ban_time_on_fail: 5
   # max ban time in seconds after engine errors
   # max ban time in seconds after engine errors
   max_ban_time_on_fail: 120
   max_ban_time_on_fail: 120
-  suspend_times:
+  suspended_times:
     # Engine suspension time after error (in seconds; set to 0 to disable)
     # Engine suspension time after error (in seconds; set to 0 to disable)
     # For error "Access denied" and "HTTP error [402, 403]"
     # For error "Access denied" and "HTTP error [402, 403]"
     SearxEngineAccessDenied: 86400
     SearxEngineAccessDenied: 86400