Browse Source

[fix] make data.traits - partial revert of commit 30a8204

The entries in the catalog of search languages are build up from the "Engine
Traits" [1] and which entries are included in the catalog is controlled qby two
threshold values [2].

If possible, the values should ensure that no languages or regions disappear
from the catalog of search languages [3].

The threshold values should have been adjusted in commit 30a8204:

- ``min_eng_per_region = 18``
- ``min_eng_per_lang = 22``

Because the threshold values were not adjusted, many entries were missing in the
search language catalog.  This bug has been fixed with this patch: the threshold
values have been adjusted and the catalog of search languages has been completed
again.

[1] https://docs.searxng.org/dev/engines/enginelib.html#module-searx.enginelib.traits
[2] https://github.com/searxng/searxng/blob/96a6e3dcb2283fa7ad9db4172a00582073a166d7/searxng_extra/update/update_engine_traits.py#L104-L105
[3] https://github.com/searxng/searxng/blob/master/searx/sxng_locales.py

Closes: https://github.com/searxng/searxng/issues/4519
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 month ago
parent
commit
5986629c6b

+ 44 - 2
searx/data/engine_traits.json

@@ -6680,6 +6680,7 @@
     "custom": {},
     "data_type": "traits_v1",
     "languages": {
+      "ar": "ar",
       "ca": "ca",
       "cs": "cs",
       "de": "de",
@@ -6688,17 +6689,30 @@
       "eo": "eo",
       "es": "es",
       "eu": "eu",
+      "fa": "fa",
       "fi": "fi",
       "fr": "fr",
       "gd": "gd",
+      "gl": "gl",
+      "hr": "hr",
+      "hu": "hu",
+      "is": "is",
       "it": "it",
       "ja": "ja",
+      "kab": "kab",
       "nl": "nl",
+      "no": "no",
       "pl": "pl",
       "pt": "pt",
       "ru": "ru",
+      "sk": "sk",
+      "sq": "sq",
       "sv": "sv",
-      "zh": "zh",
+      "th": "th",
+      "tok": "tok",
+      "tr": "tr",
+      "uk": "uk",
+      "vi": "vi",
       "zh_Hans": "zh",
       "zh_Hant": "zh"
     },
@@ -6915,6 +6929,7 @@
         "BQ",
         "BR",
         "BS",
+        "BT",
         "BW",
         "BY",
         "BZ",
@@ -6937,6 +6952,7 @@
         "CY",
         "CZ",
         "DE",
+        "DJ",
         "DK",
         "DM",
         "DO",
@@ -6950,8 +6966,10 @@
         "FI",
         "FJ",
         "FK",
+        "FM",
         "FO",
         "FR",
+        "GA",
         "GB",
         "GD",
         "GE",
@@ -6961,6 +6979,7 @@
         "GI",
         "GL",
         "GM",
+        "GN",
         "GP",
         "GQ",
         "GR",
@@ -6989,6 +7008,7 @@
         "KE",
         "KG",
         "KH",
+        "KI",
         "KM",
         "KN",
         "KP",
@@ -7001,6 +7021,7 @@
         "LC",
         "LI",
         "LK",
+        "LR",
         "LS",
         "LT",
         "LU",
@@ -7011,15 +7032,18 @@
         "MD",
         "ME",
         "MG",
+        "MH",
         "MK",
         "ML",
         "MM",
         "MN",
         "MO",
         "MQ",
+        "MR",
         "MS",
         "MT",
         "MU",
+        "MV",
         "MW",
         "MX",
         "MY",
@@ -7032,6 +7056,7 @@
         "NL",
         "NO",
         "NP",
+        "NR",
         "NU",
         "NZ",
         "OM",
@@ -7055,6 +7080,7 @@
         "RU",
         "RW",
         "SA",
+        "SB",
         "SC",
         "SD",
         "SE",
@@ -7082,8 +7108,10 @@
         "TL",
         "TM",
         "TN",
+        "TO",
         "TR",
         "TT",
+        "TV",
         "TW",
         "TZ",
         "UA",
@@ -7239,6 +7267,7 @@
     "custom": {},
     "data_type": "traits_v1",
     "languages": {
+      "ar": "ar",
       "ca": "ca",
       "cs": "cs",
       "de": "de",
@@ -7247,17 +7276,30 @@
       "eo": "eo",
       "es": "es",
       "eu": "eu",
+      "fa": "fa",
       "fi": "fi",
       "fr": "fr",
       "gd": "gd",
+      "gl": "gl",
+      "hr": "hr",
+      "hu": "hu",
+      "is": "is",
       "it": "it",
       "ja": "ja",
+      "kab": "kab",
       "nl": "nl",
+      "no": "no",
       "pl": "pl",
       "pt": "pt",
       "ru": "ru",
+      "sk": "sk",
+      "sq": "sq",
       "sv": "sv",
-      "zh": "zh",
+      "th": "th",
+      "tok": "tok",
+      "tr": "tr",
+      "uk": "uk",
+      "vi": "vi",
       "zh_Hans": "zh",
       "zh_Hant": "zh"
     },

+ 15 - 1
searx/enginelib/traits.py

@@ -10,6 +10,8 @@ used.
 """
 
 from __future__ import annotations
+
+import os
 import json
 import dataclasses
 import types
@@ -219,8 +221,20 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
 
         for engine_name in names:
             engine = engines.engines[engine_name]
+            traits = None
+
+            # pylint: disable=broad-exception-caught
+            try:
+                traits = EngineTraits.fetch_traits(engine)
+            except Exception as exc:
+                log("FATAL: while fetch_traits %s: %s" % (engine_name, exc))
+                if os.environ.get('FORCE', '').lower() not in ['on', 'true', '1']:
+                    raise
+                v = ENGINE_TRAITS.get(engine_name)
+                if v:
+                    log("FORCE: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name)
+                    traits = EngineTraits(**v)
 
-            traits = EngineTraits.fetch_traits(engine)
             if traits is not None:
                 log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
                 log("%-20s: SearXNG regions   --> %s" % (engine_name, len(traits.regions)))

+ 30 - 0
searx/sxng_locales.py

@@ -11,9 +11,14 @@
 sxng_locales = (
     ('af', 'Afrikaans', '', 'Afrikaans', '\U0001f310'),
     ('ar', 'العربية', '', 'Arabic', '\U0001f310'),
+    ('ar-SA', 'العربية', 'المملكة العربية السعودية', 'Arabic', '\U0001f1f8\U0001f1e6'),
+    ('be', 'Беларуская', '', 'Belarusian', '\U0001f310'),
     ('bg', 'Български', '', 'Bulgarian', '\U0001f310'),
+    ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
     ('ca', 'Català', '', 'Catalan', '\U0001f310'),
     ('cs', 'Čeština', '', 'Czech', '\U0001f310'),
+    ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
+    ('cy', 'Cymraeg', '', 'Welsh', '\U0001f310'),
     ('da', 'Dansk', '', 'Danish', '\U0001f310'),
     ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
     ('de', 'Deutsch', '', 'German', '\U0001f310'),
@@ -21,6 +26,7 @@ sxng_locales = (
     ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
     ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
     ('el', 'Ελληνικά', '', 'Greek', '\U0001f310'),
+    ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
     ('en', 'English', '', 'English', '\U0001f310'),
     ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
     ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
@@ -29,13 +35,21 @@ sxng_locales = (
     ('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'),
     ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
     ('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'),
+    ('en-PK', 'English', 'Pakistan', 'English', '\U0001f1f5\U0001f1f0'),
+    ('en-SG', 'English', 'Singapore', 'English', '\U0001f1f8\U0001f1ec'),
     ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
     ('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'),
     ('es', 'Español', '', 'Spanish', '\U0001f310'),
     ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
     ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
+    ('es-CO', 'Español', 'Colombia', 'Spanish', '\U0001f1e8\U0001f1f4'),
     ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
     ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
+    ('es-PE', 'Español', 'Perú', 'Spanish', '\U0001f1f5\U0001f1ea'),
+    ('et', 'Eesti', '', 'Estonian', '\U0001f310'),
+    ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
+    ('eu', 'Euskara', '', 'Basque', '\U0001f310'),
+    ('fa', 'فارسی', '', 'Persian', '\U0001f310'),
     ('fi', 'Suomi', '', 'Finnish', '\U0001f310'),
     ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
     ('fr', 'Français', '', 'French', '\U0001f310'),
@@ -43,20 +57,29 @@ sxng_locales = (
     ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
     ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
     ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
+    ('ga', 'Gaeilge', '', 'Irish', '\U0001f310'),
+    ('gd', 'Gàidhlig', '', 'Scottish Gaelic', '\U0001f310'),
     ('gl', 'Galego', '', 'Galician', '\U0001f310'),
     ('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
+    ('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'),
     ('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'),
     ('hu', 'Magyar', '', 'Hungarian', '\U0001f310'),
+    ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
     ('id', 'Indonesia', '', 'Indonesian', '\U0001f310'),
     ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
+    ('is', 'Íslenska', '', 'Icelandic', '\U0001f310'),
     ('it', 'Italiano', '', 'Italian', '\U0001f310'),
     ('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'),
     ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
     ('ja', '日本語', '', 'Japanese', '\U0001f310'),
     ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
+    ('kn', 'ಕನ್ನಡ', '', 'Kannada', '\U0001f310'),
     ('ko', '한국어', '', 'Korean', '\U0001f310'),
     ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
     ('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'),
+    ('lv', 'Latviešu', '', 'Latvian', '\U0001f310'),
+    ('ml', 'മലയാളം', '', 'Malayalam', '\U0001f310'),
+    ('mr', 'मराठी', '', 'Marathi', '\U0001f310'),
     ('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'),
     ('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'),
     ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
@@ -68,17 +91,24 @@ sxng_locales = (
     ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
     ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
     ('ro', 'Română', '', 'Romanian', '\U0001f310'),
+    ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
     ('ru', 'Русский', '', 'Russian', '\U0001f310'),
     ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
     ('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'),
+    ('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'),
     ('sq', 'Shqip', '', 'Albanian', '\U0001f310'),
     ('sv', 'Svenska', '', 'Swedish', '\U0001f310'),
     ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
+    ('ta', 'தமிழ்', '', 'Tamil', '\U0001f310'),
+    ('te', 'తెలుగు', '', 'Telugu', '\U0001f310'),
     ('th', 'ไทย', '', 'Thai', '\U0001f310'),
+    ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
     ('tr', 'Türkçe', '', 'Turkish', '\U0001f310'),
     ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
     ('uk', 'Українська', '', 'Ukrainian', '\U0001f310'),
+    ('ur', 'اردو', '', 'Urdu', '\U0001f310'),
     ('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'),
+    ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
     ('zh', '中文', '', 'Chinese', '\U0001f310'),
     ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
     ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),

+ 2 - 2
searxng_extra/update/update_engine_traits.py

@@ -101,8 +101,8 @@ def fetch_traits_map():
 def filter_locales(traits_map: EngineTraitsMap):
     """Filter language & region tags by a threshold."""
 
-    min_eng_per_region = 22
-    min_eng_per_lang = 24
+    min_eng_per_region = 18
+    min_eng_per_lang = 22
 
     _ = {}
     for eng in traits_map.values():