Browse Source

[mod] add option max_page

Related: https://github.com/searxng/searxng/issues/2982
Closes: https://github.com/searxng/searxng/issues/2972

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
2274d55d5a

+ 1 - 0
searx/engines/__init__.py

@@ -45,6 +45,7 @@ ENGINE_DEFAULT_ARGS = {
     "using_tor_proxy": False,
     "using_tor_proxy": False,
     "send_accept_language_header": False,
     "send_accept_language_header": False,
     "tokens": [],
     "tokens": [],
+    "max_page": 0,
 }
 }
 # set automatically when an engine does not have any tab category
 # set automatically when an engine does not have any tab category
 DEFAULT_CATEGORY = 'other'
 DEFAULT_CATEGORY = 'other'

+ 1 - 0
searx/engines/google.py

@@ -48,6 +48,7 @@ about = {
 # engine dependent config
 # engine dependent config
 categories = ['general', 'web']
 categories = ['general', 'web']
 paging = True
 paging = True
+max_page = 50
 time_range_support = True
 time_range_support = True
 safesearch = True
 safesearch = True
 
 

+ 1 - 0
searx/engines/google_images.py

@@ -47,6 +47,7 @@ about = {
 # engine dependent config
 # engine dependent config
 categories = ['images', 'web']
 categories = ['images', 'web']
 paging = True
 paging = True
+max_page = 50
 time_range_support = True
 time_range_support = True
 safesearch = True
 safesearch = True
 send_accept_language_header = True
 send_accept_language_header = True

+ 1 - 0
searx/engines/google_scholar.py

@@ -51,6 +51,7 @@ about = {
 # engine dependent config
 # engine dependent config
 categories = ['science', 'scientific publications']
 categories = ['science', 'scientific publications']
 paging = True
 paging = True
+max_page = 50
 language_support = True
 language_support = True
 time_range_support = True
 time_range_support = True
 safesearch = False
 safesearch = False

+ 1 - 0
searx/engines/google_videos.py

@@ -57,6 +57,7 @@ about = {
 
 
 categories = ['videos', 'web']
 categories = ['videos', 'web']
 paging = True
 paging = True
+max_page = 50
 language_support = True
 language_support = True
 time_range_support = True
 time_range_support = True
 safesearch = True
 safesearch = True

+ 5 - 0
searx/search/processors/abstract.py

@@ -150,6 +150,11 @@ class EngineProcessor(ABC):
         if search_query.pageno > 1 and not self.engine.paging:
         if search_query.pageno > 1 and not self.engine.paging:
             return None
             return None
 
 
+        # if max page is reached, skip
+        max_page = self.engine.max_page or settings['search']['max_page']
+        if max_page and max_page < search_query.pageno:
+            return None
+
         # if time_range is not supported, skip
         # if time_range is not supported, skip
         if search_query.time_range and not self.engine.time_range_support:
         if search_query.time_range and not self.engine.time_range_support:
             return None
             return None

+ 1 - 0
searx/settings.yml

@@ -32,6 +32,7 @@ search:
   # Default search language - leave blank to detect from browser information or
   # Default search language - leave blank to detect from browser information or
   # use codes from 'languages.py'
   # use codes from 'languages.py'
   default_lang: "auto"
   default_lang: "auto"
+  # max_page: 0  # if engine supports paging, 0 means unlimited numbers of pages
   # Available languages
   # Available languages
   # languages:
   # languages:
   #   - all
   #   - all

+ 1 - 0
searx/settings_defaults.py

@@ -169,6 +169,7 @@ SCHEMA = {
             'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800),
             'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800),
         },
         },
         'formats': SettingsValue(list, OUTPUT_FORMATS),
         'formats': SettingsValue(list, OUTPUT_FORMATS),
+        'max_page': SettingsValue(int, 0),
     },
     },
     'server': {
     'server': {
         'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'),
         'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'),