Browse Source

[mod] add flags to the languages filter

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
2841abaf55

+ 57 - 66
searx/languages.py

@@ -2,70 +2,61 @@
 # list of language codes
 # list of language codes
 # this file is generated automatically by utils/fetch_languages.py
 # this file is generated automatically by utils/fetch_languages.py
 language_codes = (
 language_codes = (
-    ('af-ZA', 'Afrikaans', '', 'Afrikaans'),
-    ('ar-EG', 'العربية', '', 'Arabic'),
-    ('be-BY', 'Беларуская', '', 'Belarusian'),
-    ('bg-BG', 'Български', '', 'Bulgarian'),
-    ('ca-ES', 'Català', '', 'Catalan'),
-    ('cs-CZ', 'Čeština', '', 'Czech'),
-    ('da-DK', 'Dansk', '', 'Danish'),
-    ('de', 'Deutsch', '', 'German'),
-    ('de-AT', 'Deutsch', 'Österreich', 'German'),
-    ('de-CH', 'Deutsch', 'Schweiz', 'German'),
-    ('de-DE', 'Deutsch', 'Deutschland', 'German'),
-    ('el-GR', 'Ελληνικά', '', 'Greek'),
-    ('en', 'English', '', 'English'),
-    ('en-AU', 'English', 'Australia', 'English'),
-    ('en-CA', 'English', 'Canada', 'English'),
-    ('en-GB', 'English', 'United Kingdom', 'English'),
-    ('en-IE', 'English', 'Ireland', 'English'),
-    ('en-MY', 'English', 'Malaysia', 'English'),
-    ('en-NZ', 'English', 'New Zealand', 'English'),
-    ('en-US', 'English', 'United States', 'English'),
-    ('es', 'Español', '', 'Spanish'),
-    ('es-AR', 'Español', 'Argentina', 'Spanish'),
-    ('es-CL', 'Español', 'Chile', 'Spanish'),
-    ('es-ES', 'Español', 'España', 'Spanish'),
-    ('es-MX', 'Español', 'México', 'Spanish'),
-    ('et-EE', 'Eesti', '', 'Estonian'),
-    ('fa-IR', 'فارسی', '', 'Persian'),
-    ('fi-FI', 'Suomi', '', 'Finnish'),
-    ('fr', 'Français', '', 'French'),
-    ('fr-BE', 'Français', 'Belgique', 'French'),
-    ('fr-CA', 'Français', 'Canada', 'French'),
-    ('fr-CH', 'Français', 'Suisse', 'French'),
-    ('fr-FR', 'Français', 'France', 'French'),
-    ('he-IL', 'עברית', '', 'Hebrew'),
-    ('hi-IN', 'हिन्दी', '', 'Hindi'),
-    ('hr-HR', 'Hrvatski', '', 'Croatian'),
-    ('hu-HU', 'Magyar', '', 'Hungarian'),
-    ('id-ID', 'Indonesia', '', 'Indonesian'),
-    ('is-IS', 'Íslenska', '', 'Icelandic'),
-    ('it-IT', 'Italiano', '', 'Italian'),
-    ('ja-JP', '日本語', '', 'Japanese'),
-    ('ko-KR', '한국어', '', 'Korean'),
-    ('lt-LT', 'Lietuvių', '', 'Lithuanian'),
-    ('lv-LV', 'Latviešu', '', 'Latvian'),
-    ('nl', 'Nederlands', '', 'Dutch'),
-    ('nl-BE', 'Nederlands', 'België', 'Dutch'),
-    ('nl-NL', 'Nederlands', 'Nederland', 'Dutch'),
-    ('pl-PL', 'Polski', '', 'Polish'),
-    ('pt', 'Português', '', 'Portuguese'),
-    ('pt-BR', 'Português', 'Brasil', 'Portuguese'),
-    ('pt-PT', 'Português', 'Portugal', 'Portuguese'),
-    ('ro-RO', 'Română', '', 'Romanian'),
-    ('ru-RU', 'Русский', '', 'Russian'),
-    ('sk-SK', 'Slovenčina', '', 'Slovak'),
-    ('sl-SI', 'Slovenščina', '', 'Slovenian'),
-    ('sr-RS', 'Српски', '', 'Serbian'),
-    ('sv-SE', 'Svenska', '', 'Swedish'),
-    ('sw-TZ', 'Kiswahili', '', 'Swahili'),
-    ('th-TH', 'ไทย', '', 'Thai'),
-    ('tr-TR', 'Türkçe', '', 'Turkish'),
-    ('uk-UA', 'Українська', '', 'Ukrainian'),
-    ('vi-VN', 'Tiếng Việt', '', 'Vietnamese'),
-    ('zh', '中文', '', 'Chinese'),
-    ('zh-CN', '中文', '中国', 'Chinese'),
-    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese'),
-    ('zh-TW', '中文', '台灣', 'Chinese'),
+    ('ar-EG', 'العربية', '', 'Arabic', '\U0001f1ea\U0001f1ec'),
+    ('bg-BG', 'Български', '', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
+    ('ca-ES', 'Català', '', 'Catalan', '\U0001f1ea\U0001f1f8'),
+    ('cs-CZ', 'Čeština', '', 'Czech', '\U0001f1e8\U0001f1ff'),
+    ('da-DK', 'Dansk', '', 'Danish', '\U0001f1e9\U0001f1f0'),
+    ('de', 'Deutsch', '', 'German', '\U0001f1e9\U0001f1ea'),
+    ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
+    ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
+    ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
+    ('el-GR', 'Ελληνικά', '', 'Greek', '\U0001f1ec\U0001f1f7'),
+    ('en', 'English', '', 'English', '\U0001f1ec\U0001f1e7'),
+    ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
+    ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
+    ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
+    ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
+    ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
+    ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
+    ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
+    ('es', 'Español', '', 'Spanish', '\U0001f1ea\U0001f1f8'),
+    ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
+    ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
+    ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
+    ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
+    ('et-EE', 'Eesti', '', 'Estonian', '\U0001f1ea\U0001f1ea'),
+    ('fi-FI', 'Suomi', '', 'Finnish', '\U0001f1eb\U0001f1ee'),
+    ('fr', 'Français', '', 'French', '\U0001f1eb\U0001f1f7'),
+    ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
+    ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
+    ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
+    ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
+    ('he-IL', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
+    ('hr-HR', 'Hrvatski', '', 'Croatian', '\U0001f1ed\U0001f1f7'),
+    ('hu-HU', 'Magyar', '', 'Hungarian', '\U0001f1ed\U0001f1fa'),
+    ('it-IT', 'Italiano', '', 'Italian', '\U0001f1ee\U0001f1f9'),
+    ('ja-JP', '日本語', '', 'Japanese', '\U0001f1ef\U0001f1f5'),
+    ('ko-KR', '한국어', '', 'Korean', '\U0001f1f0\U0001f1f7'),
+    ('lt-LT', 'Lietuvių', '', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
+    ('lv-LV', 'Latviešu', '', 'Latvian', '\U0001f1f1\U0001f1fb'),
+    ('nl', 'Nederlands', '', 'Dutch', '\U0001f1f3\U0001f1f1'),
+    ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
+    ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
+    ('pl-PL', 'Polski', '', 'Polish', '\U0001f1f5\U0001f1f1'),
+    ('pt', 'Português', '', 'Portuguese', '\U0001f1f5\U0001f1f9'),
+    ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
+    ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
+    ('ro-RO', 'Română', '', 'Romanian', '\U0001f1f7\U0001f1f4'),
+    ('ru-RU', 'Русский', '', 'Russian', '\U0001f1f7\U0001f1fa'),
+    ('sk-SK', 'Slovenčina', '', 'Slovak', '\U0001f1f8\U0001f1f0'),
+    ('sl-SI', 'Slovenščina', '', 'Slovenian', '\U0001f1f8\U0001f1ee'),
+    ('sv-SE', 'Svenska', '', 'Swedish', '\U0001f1f8\U0001f1ea'),
+    ('th-TH', 'ไทย', '', 'Thai', '\U0001f1f9\U0001f1ed'),
+    ('tr-TR', 'Türkçe', '', 'Turkish', '\U0001f1f9\U0001f1f7'),
+    ('uk-UA', 'Українська', '', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
+    ('zh', '中文', '', 'Chinese', '\U0001f1e8\U0001f1f3'),
+    ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
+    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
+    ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
 )
 )

+ 2 - 2
searx/query.py

@@ -85,7 +85,7 @@ class LanguageParser(QueryPartParser):
         # check if any language-code is equal with
         # check if any language-code is equal with
         # declared language-codes
         # declared language-codes
         for lc in language_codes:
         for lc in language_codes:
-            lang_id, lang_name, country, english_name = map(str.lower, lc)
+            lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
 
 
             # if correct language-code is found
             # if correct language-code is found
             # set it as new search-language
             # set it as new search-language
@@ -128,7 +128,7 @@ class LanguageParser(QueryPartParser):
         for lc in language_codes:
         for lc in language_codes:
             if lc[0] not in settings['search']['languages']:
             if lc[0] not in settings['search']['languages']:
                 continue
                 continue
-            lang_id, lang_name, country, english_name = map(str.lower, lc)
+            lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
 
 
             # check if query starts with language-id
             # check if query starts with language-id
             if lang_id.startswith(value):
             if lang_id.startswith(value):

+ 1 - 1
searx/templates/oscar/languages.html

@@ -2,7 +2,7 @@
 <label class="visually-hidden" for="language">{{ _('Language') }}</label>
 <label class="visually-hidden" for="language">{{ _('Language') }}</label>
 <select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l">
 <select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l">
   <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
   <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
+{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
   <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
   <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
     {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
     {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
   </option>
   </option>

+ 2 - 2
searx/templates/simple/filters/languages.html

@@ -1,8 +1,8 @@
 <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
 <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
 	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
 	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-	{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
+	{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
 	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
 	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
-		{{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
+		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}
 	</option>
 	</option>
 	{%- endfor -%}
 	{%- endfor -%}
 </select>
 </select>

+ 2 - 2
searx/templates/simple/preferences.html

@@ -116,8 +116,8 @@
       <p class="value">{{- '' -}}
       <p class="value">{{- '' -}}
         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
           <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
           <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-          {%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
-          <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
+          {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
+          <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option>
           {%- endfor -%}
           {%- endfor -%}
         </select>{{- '' -}}
         </select>{{- '' -}}
       </p>
       </p>

+ 75 - 5
searxng_extra/update/update_languages.py

@@ -12,12 +12,13 @@ Output files: :origin:`searx/data/engines_languages.json` and
 """
 """
 
 
 # pylint: disable=invalid-name
 # pylint: disable=invalid-name
-
+from unicodedata import lookup
 import json
 import json
 from pathlib import Path
 from pathlib import Path
 from pprint import pformat
 from pprint import pformat
 from babel import Locale, UnknownLocaleError
 from babel import Locale, UnknownLocaleError
 from babel.languages import get_global
 from babel.languages import get_global
+from babel.core import parse_locale
 
 
 from searx import settings, searx_dir
 from searx import settings, searx_dir
 from searx.engines import load_engines, engines
 from searx.engines import load_engines, engines
@@ -61,6 +62,62 @@ def get_locale(lang_code):
         return None
         return None
 
 
 
 
+lang2emoji = {
+    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
+    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
+    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
+    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
+    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
+    'zh': '\U0001F1E8\U0001F1F3',  # China (zh)
+}
+
+
+def get_unicode_flag(lang_code):
+    """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
+
+    emoji = lang2emoji.get(lang_code.lower())
+    if emoji:
+        return emoji
+
+    if len(lang_code) == 2:
+        l_code = lang_code.lower()
+        c_code = lang_code.upper()
+        if c_code == 'EN':
+            c_code = 'GB'
+        lang_code = "%s-%s" % (l_code, c_code)
+
+    language = territory = script = variant = ''
+    try:
+        language, territory, script, variant = parse_locale(lang_code, '-')
+    except ValueError as exc:
+        print(exc)
+
+    # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+    if not territory:
+        # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
+        emoji = lang2emoji.get(language)
+        if not emoji:
+            print(
+                "%s --> language: %s / territory: %s / script: %s / variant: %s"
+                % (lang_code, language, territory, script, variant)
+            )
+        return emoji
+
+    emoji = lang2emoji.get(territory.lower())
+    if emoji:
+        return emoji
+
+    try:
+        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
+        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
+        # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
+    except KeyError as exc:
+        print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
+        return None
+
+    return c1 + c2
+
+
 # Join all language lists.
 # Join all language lists.
 def join_language_lists(engines_languages):
 def join_language_lists(engines_languages):
     language_list = {}
     language_list = {}
@@ -113,7 +170,10 @@ def join_language_lists(engines_languages):
                         print("ERROR: %s --> %s" % (locale, exc))
                         print("ERROR: %s --> %s" % (locale, exc))
                         locale = None
                         locale = None
 
 
-                language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()}
+                language_list[short_code]['countries'][lang_code] = {
+                    'country_name': country_name,
+                    'counter': set(),
+                }
 
 
             # count engine for both language_country combination and language alone
             # count engine for both language_country combination and language alone
             language_list[short_code]['counter'].add(engine_name)
             language_list[short_code]['counter'].add(engine_name)
@@ -167,7 +227,7 @@ def filter_language_list(all_languages):
 
 
         # add language without countries too if there's more than one country to choose from
         # add language without countries too if there's more than one country to choose from
         if len(filtered_countries) > 1:
         if len(filtered_countries) > 1:
-            filtered_countries[lang] = _copy_lang_data(lang)
+            filtered_countries[lang] = _copy_lang_data(lang, None)
         elif len(filtered_countries) == 1:
         elif len(filtered_countries) == 1:
             # if there's only one country per language, it's not necessary to show country name
             # if there's only one country per language, it's not necessary to show country name
             lang_country = next(iter(filtered_countries))
             lang_country = next(iter(filtered_countries))
@@ -183,15 +243,22 @@ def filter_language_list(all_languages):
                     lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
                     lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
 
 
             if lang_country:
             if lang_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang)
+                filtered_countries[lang_country] = _copy_lang_data(lang, None)
             else:
             else:
-                filtered_countries[lang] = _copy_lang_data(lang)
+                filtered_countries[lang] = _copy_lang_data(lang, None)
 
 
         filtered_languages_with_countries.update(filtered_countries)
         filtered_languages_with_countries.update(filtered_countries)
 
 
     return filtered_languages_with_countries
     return filtered_languages_with_countries
 
 
 
 
+class UnicodeEscape(str):
+    """Escape unicode string in :py:obj:`pprint.pformat`"""
+
+    def __repr__(self):
+        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
+
+
 # Write languages.py.
 # Write languages.py.
 def write_languages_file(languages):
 def write_languages_file(languages):
     file_headers = (
     file_headers = (
@@ -209,11 +276,14 @@ def write_languages_file(languages):
         if name is None:
         if name is None:
             print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
             print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
             continue
             continue
+
+        flag = get_unicode_flag(code) or ''
         item = (
         item = (
             code,
             code,
             languages[code]['name'].split(' (')[0],
             languages[code]['name'].split(' (')[0],
             languages[code].get('country_name') or '',
             languages[code].get('country_name') or '',
             languages[code].get('english_name') or '',
             languages[code].get('english_name') or '',
+            UnicodeEscape(flag),
         )
         )
 
 
         language_codes.append(item)
         language_codes.append(item)