Browse Source

[fix] engine annas archive - fetch traits (modified xpath selectors)

Anna’s Archive has cleaned up their languages, available file extensions and
changed the HTML form.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
3a456b1282
2 changed files with 12 additions and 64 deletions
  1. 9 61
      searx/data/engine_traits.json
  2. 3 3
      searx/engines/annas_archive.py

+ 9 - 61
searx/data/engine_traits.json

@@ -3,38 +3,24 @@
     "all_locale": "",
     "custom": {
       "content": [
-        "",
-        "journal_article",
-        "book_any",
+        "book_nonfiction",
         "book_fiction",
         "book_unknown",
-        "book_nonfiction",
+        "journal_article",
         "book_comic",
         "magazine",
         "standards_document"
       ],
       "ext": [
-        "",
         "pdf",
         "epub",
         "cbr",
-        "fb2",
         "mobi",
+        "fb2",
         "cbz",
-        "djvu",
         "azw3",
-        "fb2.zip",
-        "txt",
-        "rar",
-        "zip",
-        "doc",
-        "lit",
-        "rtf",
-        "htm",
-        "html",
-        "lrf",
-        "mht",
-        "docx"
+        "djvu",
+        "fb2.zip"
       ],
       "sort": [
         "",
@@ -48,84 +34,46 @@
     "languages": {
       "af": "af",
       "ar": "ar",
-      "az": "az",
       "be": "be",
       "bg": "bg",
       "bn": "bn",
-      "bo": "bo",
-      "bs": "bs",
       "ca": "ca",
       "cs": "cs",
+      "cy": "cy",
       "da": "da",
       "de": "de",
       "el": "el",
       "en": "en",
-      "eo": "eo",
       "es": "es",
-      "et": "et",
-      "eu": "eu",
       "fa": "fa",
-      "fi": "fi",
-      "fil": "tl",
       "fr": "fr",
-      "gl": "gl",
-      "gu": "gu",
       "he": "he",
       "hi": "hi",
       "hr": "hr",
       "hu": "hu",
-      "hy": "hy",
       "id": "id",
-      "is": "is",
       "it": "it",
       "ja": "ja",
-      "ka": "ka",
+      "jv": "jv",
       "kk": "kk",
-      "kn": "kn",
       "ko": "ko",
-      "ku": "ku",
-      "ky": "ky",
-      "lo": "lo",
       "lt": "lt",
       "lv": "lv",
-      "mk": "mk",
-      "ml": "ml",
       "mn": "mn",
-      "mr": "mr",
-      "ms": "ms",
-      "my": "my",
-      "nb": "nb",
-      "ne": "ne",
       "nl": "nl",
       "no": "no",
-      "pa": "pa",
       "pl": "pl",
-      "ps": "ps",
       "pt": "pt",
       "ro": "ro",
       "ru": "ru",
-      "sa": "sa",
-      "sd": "sd",
-      "si": "si",
-      "sk": "sk",
-      "sl": "sl",
-      "so": "so",
-      "sq": "sq",
       "sr": "sr",
       "sv": "sv",
-      "sw": "sw",
       "ta": "ta",
-      "te": "te",
-      "tg": "tg",
       "tr": "tr",
-      "tt": "tt",
-      "ug": "ug",
       "uk": "uk",
-      "ur": "ur",
-      "uz": "uz",
       "vi": "vi",
-      "yi": "yi",
-      "zh": "zh"
+      "zh": "zh",
+      "zh_Hant": "zh-Hant"
     },
     "regions": {}
   },

+ 3 - 3
searx/engines/annas_archive.py

@@ -159,7 +159,7 @@ def fetch_traits(engine_traits: EngineTraits):
     # supported language codes
 
     lang_map = {}
-    for x in eval_xpath_list(dom, "//form//select[@name='lang']//option"):
+    for x in eval_xpath_list(dom, "//form//input[@name='lang']"):
         eng_lang = x.get("value")
         if eng_lang in ('', '_empty', 'nl-BE', 'und'):
             continue
@@ -177,10 +177,10 @@ def fetch_traits(engine_traits: EngineTraits):
             continue
         engine_traits.languages[sxng_lang] = eng_lang
 
-    for x in eval_xpath_list(dom, "//form//select[@name='content']//option"):
+    for x in eval_xpath_list(dom, "//form//input[@name='content']"):
         engine_traits.custom['content'].append(x.get("value"))
 
-    for x in eval_xpath_list(dom, "//form//select[@name='ext']//option"):
+    for x in eval_xpath_list(dom, "//form//input[@name='ext']"):
         engine_traits.custom['ext'].append(x.get("value"))
 
     for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):