Browse Source

[fix] add back missing languages & regions (followup of PR #1071)

In PR #1071 the language catalog of dailymotion has been cleaned up, before
there had been over 7000 "languages" in the catalog.

As a side effect of this clean-up the language & region catalog in SearXNG has
been reduced [1].

This patch reduce the ``min_engines_per_lang`` from 13 to 12 to get the missed
languages back in language & region catalog of SearXNG.

[1] https://github.com/searxng/searxng/pull/1071/commits/3bb62823ec3af0e67bd2d959bec20c4791ee3bac#diff-f3f00db0f87f95b882624a192e0aac21525638af0b18c9514e765fcf1991678d

Requested-by: @tiekoetter in a Matrix chat
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
62982c8812
3 changed files with 18 additions and 11 deletions
  1. 3 3
      searx/data/engines_languages.json
  2. 12 6
      searx/languages.py
  3. 3 2
      searxng_extra/update/update_languages.py

+ 3 - 3
searx/data/engines_languages.json

@@ -1714,15 +1714,15 @@
     "mt": {
       "alias": "maltese"
     },
-    "nb": {
-      "alias": "norsk"
-    },
     "ne": {
       "alias": "nepali"
     },
     "nl": {
       "alias": "nederlands"
     },
+    "no": {
+      "alias": "norsk"
+    },
     "oc": {
       "alias": "occitan"
     },

+ 12 - 6
searx/languages.py

@@ -2,7 +2,9 @@
 # list of language codes
 # this file is generated automatically by utils/fetch_languages.py
 language_codes = (
+    ('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
     ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'),
+    ('be-BY', 'Беларуская', 'Беларусь', 'Belarusian', '\U0001f1e7\U0001f1fe'),
     ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
     ('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'),
     ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
@@ -26,28 +28,29 @@ language_codes = (
     ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
     ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
     ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
+    ('fa-IR', 'فارسی', 'ایران', 'Persian', '\U0001f1ee\U0001f1f7'),
     ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
+    ('fil-PH', 'Filipino', 'Pilipinas', 'Filipino', '\U0001f1f5\U0001f1ed'),
     ('fr', 'Français', '', 'French', '\U0001f310'),
     ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
     ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
     ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
     ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
     ('he-IL', 'עברית', 'ישראל', 'Hebrew', '\U0001f1ee\U0001f1f1'),
+    ('hi-IN', 'हिन्दी', 'भारत', 'Hindi', '\U0001f1ee\U0001f1f3'),
     ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
     ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
+    ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
+    ('is-IS', 'Íslenska', 'Ísland', 'Icelandic', '\U0001f1ee\U0001f1f8'),
     ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
     ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
     ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
     ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
     ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'),
-    (   'nb-NO',
-        'Norsk Bokmål',
-        'Norge',
-        'Norwegian Bokmål',
-        '\U0001f1f3\U0001f1f4'),
     ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
     ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
     ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
+    ('no-NO', 'Norsk', '', 'Norwegian (Bokmål)', '\U0001f1f3\U0001f1f4'),
     ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
     ('pt', 'Português', '', 'Portuguese', '\U0001f310'),
     ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
@@ -56,12 +59,15 @@ language_codes = (
     ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
     ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
     ('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'),
+    ('sr-RS', 'Српски', 'Србија', 'Serbian', '\U0001f1f7\U0001f1f8'),
     ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
+    ('sw-TZ', 'Kiswahili', 'Tanzania', 'Swahili', '\U0001f1f9\U0001f1ff'),
     ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
     ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
     ('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
+    ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
     ('zh', '中文', '', 'Chinese', '\U0001f310'),
     ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
-    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
+    ('zh-HK', '中文', '中國香港', 'Chinese', '\U0001f1ed\U0001f1f0'),
     ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
 )

+ 3 - 2
searxng_extra/update/update_languages.py

@@ -117,7 +117,8 @@ def get_territory_name(lang_code):
     country_name = None
     locale = get_locale(lang_code)
     try:
-        country_name = locale.get_territory_name()
+        if locale is not None:
+            country_name = locale.get_territory_name()
     except FileNotFoundError as exc:
         print("ERROR: %s --> %s" % (locale, exc))
     return country_name
@@ -190,7 +191,7 @@ def join_language_lists(engines_languages):
 
 # Filter language list so it only includes the most supported languages and countries
 def filter_language_list(all_languages):
-    min_engines_per_lang = 13
+    min_engines_per_lang = 12
     min_engines_per_country = 7
     # pylint: disable=consider-using-dict-items, consider-iterating-dictionary
     main_engines = [