Browse Source

[fix] fetch_traits: brave, google, annas_archive & radio_browser

This patch fixes a bug reported by CI "Fetch traits" [1] (brave) and improves
other fetch traits functions (google, annas_archive & radio_browser).

brave:

    File "/home/runner/work/searxng/searxng/searx/engines/brave.py", line 434, in fetch_traits
      sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    File "/home/runner/work/searxng/searxng/searx/locales.py", line 155, in region_tag
    Error:     raise ValueError('%s missed a territory')

google:

  change ERROR message about unknow UI language to INFO message

radio_browser:

  country_list contains duplicates that differ only in upper/lower case

annas_archive:

  for better diff; sort the persistence of the traits

[1] https://github.com/searxng/searxng/actions/runs/10606312371/job/29433352518#step:6:41

Signed-off-by: Markus <markus@venom.fritz.box>
Markus 7 months ago
parent
commit
cdb4927b8b

+ 5 - 0
searx/engines/annas_archive.py

@@ -184,3 +184,8 @@ def fetch_traits(engine_traits: EngineTraits):
 
     for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
         engine_traits.custom['sort'].append(x.get("value"))
+
+    # for better diff; sort the persistence of these traits
+    engine_traits.custom['content'].sort()
+    engine_traits.custom['ext'].sort()
+    engine_traits.custom['sort'].sort()

+ 3 - 2
searx/engines/brave.py

@@ -430,7 +430,8 @@ def fetch_traits(engine_traits: EngineTraits):
 
         ui_lang = option.get('value')
         try:
-            if '-' in ui_lang and not ui_lang.startswith("zh-"):
+            l = babel.Locale.parse(ui_lang, sep='-')
+            if l.territory:
                 sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
             else:
                 sxng_tag = language_tag(babel.Locale.parse(ui_lang, sep='-'))
@@ -453,7 +454,7 @@ def fetch_traits(engine_traits: EngineTraits):
     if not resp.ok:  # type: ignore
         print("ERROR: response from Brave is not OK.")
 
-    country_js = resp.text[resp.text.index("options:{all") + len('options:') :]
+    country_js = resp.text[resp.text.index("options:{all") + len('options:') :]  # type: ignore
     country_js = country_js[: country_js.index("},k={default")]
     country_tags = js_variable_to_python(country_js)
 

+ 1 - 1
searx/engines/google.py

@@ -441,7 +441,7 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
         try:
             locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
         except babel.UnknownLocaleError:
-            print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
+            print("INFO:  google UI language %s (%s) is unknown by babel" % (eng_lang, x.text.split("(")[0].strip()))
             continue
         sxng_lang = language_tag(locale)
 

+ 4 - 2
searx/engines/radio_browser.py

@@ -165,10 +165,12 @@ def fetch_traits(engine_traits: EngineTraits):
 
     countrycodes = set()
     for region in country_list:
-        if region['iso_3166_1'] not in babel_reg_list:
+        # country_list contains duplicates that differ only in upper/lower case
+        _reg = region['iso_3166_1'].upper()
+        if _reg not in babel_reg_list:
             print(f"ERROR: region tag {region['iso_3166_1']} is unknown by babel")
             continue
-        countrycodes.add(region['iso_3166_1'])
+        countrycodes.add(_reg)
 
     countrycodes = list(countrycodes)
     countrycodes.sort()

+ 1 - 1
searx/locales.py

@@ -152,7 +152,7 @@ def locales_initialize():
 def region_tag(locale: babel.Locale) -> str:
     """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
     if not locale.territory:
-        raise ValueError('%s missed a territory')
+        raise ValueError('babel.Locale %s: missed a territory' % locale)
     return locale.language + '-' + locale.territory