Browse Source

[mod] yacy engine: pick base_url randomly from a list of instances

Inspired by post [1] in the disscussion we had, while yacy.searchlab.eu was
broken.

[1] https://github.com/searxng/searxng/issues/3428#issuecomment-2101080101

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 11 months ago
parent
commit
fb32425d78
2 changed files with 42 additions and 24 deletions
  1. 37 22
      searx/engines/yacy.py
  2. 5 2
      searx/settings.yml

+ 37 - 22
searx/engines/yacy.py

@@ -22,20 +22,26 @@ The engine has the following (additional) settings:
 - :py:obj:`search_mode`
 - :py:obj:`search_mode`
 - :py:obj:`search_type`
 - :py:obj:`search_type`
 
 
+The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by
+all yacy engines.
+
 .. code:: yaml
 .. code:: yaml
 
 
   - name: yacy
   - name: yacy
     engine: yacy
     engine: yacy
     categories: general
     categories: general
     search_type: text
     search_type: text
-    base_url: https://yacy.searchlab.eu
     shortcut: ya
     shortcut: ya
+    base_url:
+      - https://yacy.searchlab.eu
+      - https://search.lomig.me
+      - https://yacy.ecosys.eu
+      - https://search.webproject.link
 
 
   - name: yacy images
   - name: yacy images
     engine: yacy
     engine: yacy
     categories: images
     categories: images
     search_type: image
     search_type: image
-    base_url: https://yacy.searchlab.eu
     shortcut: yai
     shortcut: yai
     disabled: true
     disabled: true
 
 
@@ -45,6 +51,9 @@ Implementations
 """
 """
 # pylint: disable=fixme
 # pylint: disable=fixme
 
 
+from __future__ import annotations
+
+import random
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from dateutil import parser
 from dateutil import parser
@@ -87,15 +96,10 @@ search_type = 'text'
 ``video`` are not yet implemented (Pull-Requests are welcome).
 ``video`` are not yet implemented (Pull-Requests are welcome).
 """
 """
 
 
-# search-url
-base_url = 'https://yacy.searchlab.eu'
-search_url = (
-    '/yacysearch.json?{query}'
-    '&startRecord={offset}'
-    '&maximumRecords={limit}'
-    '&contentdom={search_type}'
-    '&resource={resource}'
-)
+base_url: list | str = 'https://yacy.searchlab.eu'
+"""The value is an URL or a list of URLs.  In the latter case instance will be
+selected randomly.
+"""
 
 
 
 
 def init(_):
 def init(_):
@@ -108,24 +112,35 @@ def init(_):
         raise ValueError('search_type "%s" is  not one of %s' % (search_type, valid_types))
         raise ValueError('search_type "%s" is  not one of %s' % (search_type, valid_types))
 
 
 
 
+def _base_url() -> str:
+    from searx.engines import engines  # pylint: disable=import-outside-toplevel
+
+    url = engines['yacy'].base_url  # type: ignore
+    if isinstance(url, list):
+        url = random.choice(url)
+    return url
+
+
 def request(query, params):
 def request(query, params):
+
     offset = (params['pageno'] - 1) * number_of_results
     offset = (params['pageno'] - 1) * number_of_results
+    args = {
+        'query': query,
+        'startRecord': offset,
+        'maximumRecords': number_of_results,
+        'contentdom': search_type,
+        'resource': search_mode,
+    }
 
 
-    params['url'] = base_url + search_url.format(
-        query=urlencode({'query': query}),
-        offset=offset,
-        limit=number_of_results,
-        search_type=search_type,
-        resource=search_mode,
-    )
+    # add language tag if specified
+    if params['language'] != 'all':
+        args['lr'] = 'lang_' + params['language'].split('-')[0]
+
+    params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}"
 
 
     if http_digest_auth_user and http_digest_auth_pass:
     if http_digest_auth_user and http_digest_auth_pass:
         params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
         params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
 
 
-    # add language tag if specified
-    if params['language'] != 'all':
-        params['url'] += '&lr=lang_' + params['language'].split('-')[0]
-
     return params
     return params
 
 
 
 

+ 5 - 2
searx/settings.yml

@@ -2082,7 +2082,11 @@ engines:
     engine: yacy
     engine: yacy
     categories: general
     categories: general
     search_type: text
     search_type: text
-    base_url: https://yacy.searchlab.eu
+    base_url:
+      - https://yacy.searchlab.eu
+      - https://search.lomig.me
+      - https://yacy.ecosys.eu
+      - https://search.webproject.link
     shortcut: ya
     shortcut: ya
     disabled: true
     disabled: true
     # required if you aren't using HTTPS for your local yacy instance
     # required if you aren't using HTTPS for your local yacy instance
@@ -2095,7 +2099,6 @@ engines:
     engine: yacy
     engine: yacy
     categories: images
     categories: images
     search_type: image
     search_type: image
-    base_url: https://yacy.searchlab.eu
     shortcut: yai
     shortcut: yai
     disabled: true
     disabled: true