Browse Source

[fix] update_engine_traits.py: annas archive, bing-* and zlibrary engines

Github action Update data - update_engine_traits [1] had issues in annas
archive, bing-* and zlibrary engines:

    ./manage pyenv.cmd python ./searxng_extra/update/update_engine_traits.py

[1] https://github.com/searxng/searxng/actions/runs/12530827768/job/34953392587

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 4 months ago
parent
commit
af3f272b0b
3 changed files with 34 additions and 9 deletions
  1. 5 3
      searx/engines/annas_archive.py
  2. 14 1
      searx/engines/bing.py
  3. 15 5
      searx/engines/zlibrary.py

+ 5 - 3
searx/engines/annas_archive.py

@@ -169,7 +169,7 @@ def fetch_traits(engine_traits: EngineTraits):
     lang_map = {}
     for x in eval_xpath_list(dom, "//form//input[@name='lang']"):
         eng_lang = x.get("value")
-        if eng_lang in ('', '_empty', 'nl-BE', 'und'):
+        if eng_lang in ('', '_empty', 'nl-BE', 'und') or eng_lang.startswith('anti__'):
             continue
         try:
             locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
@@ -186,10 +186,12 @@ def fetch_traits(engine_traits: EngineTraits):
         engine_traits.languages[sxng_lang] = eng_lang
 
     for x in eval_xpath_list(dom, "//form//input[@name='content']"):
-        engine_traits.custom['content'].append(x.get("value"))
+        if not x.get("value").startswith("anti__"):
+            engine_traits.custom['content'].append(x.get("value"))
 
     for x in eval_xpath_list(dom, "//form//input[@name='ext']"):
-        engine_traits.custom['ext'].append(x.get("value"))
+        if not x.get("value").startswith("anti__"):
+            engine_traits.custom['ext'].append(x.get("value"))
 
     for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
         engine_traits.custom['sort'].append(x.get("value"))

+ 14 - 1
searx/engines/bing.py

@@ -192,8 +192,21 @@ def fetch_traits(engine_traits: EngineTraits):
     # pylint: disable=import-outside-toplevel
 
     from searx.network import get  # see https://github.com/searxng/searxng/issues/762
+    from searx.utils import gen_useragent
+
+    headers = {
+        "User-Agent": gen_useragent(),
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        "Accept-Language": "en-US;q=0.5,en;q=0.3",
+        "Accept-Encoding": "gzip, deflate, br",
+        "DNT": "1",
+        "Connection": "keep-alive",
+        "Upgrade-Insecure-Requests": "1",
+        "Sec-GPC": "1",
+        "Cache-Control": "max-age=0",
+    }
 
-    resp = get("https://www.bing.com/account/general")
+    resp = get("https://www.bing.com/account/general", headers=headers)
     if not resp.ok:  # type: ignore
         print("ERROR: response from bing is not OK.")
 

+ 15 - 5
searx/engines/zlibrary.py

@@ -183,17 +183,27 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
     from searx.network import get  # see https://github.com/searxng/searxng/issues/762
     from searx.locales import language_tag
 
-    resp = get(base_url, verify=False)
+    def _use_old_values():
+        # don't change anything, re-use the existing values
+        engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
+        engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
+        engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
+
+    try:
+        resp = get(base_url, verify=False)
+    except SearxException as exc:
+        print(f"ERROR: zlibrary domain '{base_url}' is seized?")
+        print(f"  --> {exc}")
+        _use_old_values()
+        return
+
     if not resp.ok:  # type: ignore
         raise RuntimeError("Response from zlibrary's search page is not OK.")
     dom = html.fromstring(resp.text)  # type: ignore
 
     if domain_is_seized(dom):
         print(f"ERROR: zlibrary domain is seized: {base_url}")
-        # don't change anything, re-use the existing values
-        engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
-        engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
-        engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
+        _use_old_values()
         return
 
     engine_traits.all_locale = ""