Browse Source

Merge pull request #967 from return42/language-filter

[mod] add flags to the languages filter
Alexandre Flament 3 years ago
parent
commit
0379856712

+ 66 - 66
searx/languages.py

@@ -2,70 +2,70 @@
 # list of language codes
 # this file is generated automatically by utils/fetch_languages.py
 language_codes = (
-    ('af-ZA', 'Afrikaans', '', 'Afrikaans'),
-    ('ar-EG', 'العربية', '', 'Arabic'),
-    ('be-BY', 'Беларуская', '', 'Belarusian'),
-    ('bg-BG', 'Български', '', 'Bulgarian'),
-    ('ca-ES', 'Català', '', 'Catalan'),
-    ('cs-CZ', 'Čeština', '', 'Czech'),
-    ('da-DK', 'Dansk', '', 'Danish'),
-    ('de', 'Deutsch', '', 'German'),
-    ('de-AT', 'Deutsch', 'Österreich', 'German'),
-    ('de-CH', 'Deutsch', 'Schweiz', 'German'),
-    ('de-DE', 'Deutsch', 'Deutschland', 'German'),
-    ('el-GR', 'Ελληνικά', '', 'Greek'),
-    ('en', 'English', '', 'English'),
-    ('en-AU', 'English', 'Australia', 'English'),
-    ('en-CA', 'English', 'Canada', 'English'),
-    ('en-GB', 'English', 'United Kingdom', 'English'),
-    ('en-IE', 'English', 'Ireland', 'English'),
-    ('en-MY', 'English', 'Malaysia', 'English'),
-    ('en-NZ', 'English', 'New Zealand', 'English'),
-    ('en-US', 'English', 'United States', 'English'),
-    ('es', 'Español', '', 'Spanish'),
-    ('es-AR', 'Español', 'Argentina', 'Spanish'),
-    ('es-CL', 'Español', 'Chile', 'Spanish'),
-    ('es-ES', 'Español', 'España', 'Spanish'),
-    ('es-MX', 'Español', 'México', 'Spanish'),
-    ('et-EE', 'Eesti', '', 'Estonian'),
-    ('fa-IR', 'فارسی', '', 'Persian'),
-    ('fi-FI', 'Suomi', '', 'Finnish'),
-    ('fr', 'Français', '', 'French'),
-    ('fr-BE', 'Français', 'Belgique', 'French'),
-    ('fr-CA', 'Français', 'Canada', 'French'),
-    ('fr-CH', 'Français', 'Suisse', 'French'),
-    ('fr-FR', 'Français', 'France', 'French'),
-    ('he-IL', 'עברית', '', 'Hebrew'),
-    ('hi-IN', 'हिन्दी', '', 'Hindi'),
-    ('hr-HR', 'Hrvatski', '', 'Croatian'),
-    ('hu-HU', 'Magyar', '', 'Hungarian'),
-    ('id-ID', 'Indonesia', '', 'Indonesian'),
-    ('is-IS', 'Íslenska', '', 'Icelandic'),
-    ('it-IT', 'Italiano', '', 'Italian'),
-    ('ja-JP', '日本語', '', 'Japanese'),
-    ('ko-KR', '한국어', '', 'Korean'),
-    ('lt-LT', 'Lietuvių', '', 'Lithuanian'),
-    ('lv-LV', 'Latviešu', '', 'Latvian'),
-    ('nl', 'Nederlands', '', 'Dutch'),
-    ('nl-BE', 'Nederlands', 'België', 'Dutch'),
-    ('nl-NL', 'Nederlands', 'Nederland', 'Dutch'),
-    ('pl-PL', 'Polski', '', 'Polish'),
-    ('pt', 'Português', '', 'Portuguese'),
-    ('pt-BR', 'Português', 'Brasil', 'Portuguese'),
-    ('pt-PT', 'Português', 'Portugal', 'Portuguese'),
-    ('ro-RO', 'Română', '', 'Romanian'),
-    ('ru-RU', 'Русский', '', 'Russian'),
-    ('sk-SK', 'Slovenčina', '', 'Slovak'),
-    ('sl-SI', 'Slovenščina', '', 'Slovenian'),
-    ('sr-RS', 'Српски', '', 'Serbian'),
-    ('sv-SE', 'Svenska', '', 'Swedish'),
-    ('sw-TZ', 'Kiswahili', '', 'Swahili'),
-    ('th-TH', 'ไทย', '', 'Thai'),
-    ('tr-TR', 'Türkçe', '', 'Turkish'),
-    ('uk-UA', 'Українська', '', 'Ukrainian'),
-    ('vi-VN', 'Tiếng Việt', '', 'Vietnamese'),
-    ('zh', '中文', '', 'Chinese'),
-    ('zh-CN', '中文', '中国', 'Chinese'),
-    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese'),
-    ('zh-TW', '中文', '台灣', 'Chinese'),
+    ('af-ZA', 'Afrikaans', '', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
+    ('ar-EG', 'العربية', '', 'Arabic', '\U0001f1ea\U0001f1ec'),
+    ('be-BY', 'Беларуская', '', 'Belarusian', '\U0001f1e7\U0001f1fe'),
+    ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
+    ('ca-ES', 'Català', '', 'Catalan', '\U0001f1ea\U0001f1f8'),
+    ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
+    ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
+    ('de', 'Deutsch', '', 'German', '\U0001f310'),
+    ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
+    ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
+    ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
+    ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
+    ('en', 'English', '', 'English', '\U0001f310'),
+    ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
+    ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
+    ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
+    ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
+    ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
+    ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
+    ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
+    ('es', 'Español', '', 'Spanish', '\U0001f310'),
+    ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
+    ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
+    ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
+    ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
+    ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
+    ('fa-IR', 'فارسی', '', 'Persian', '\U0001f1ee\U0001f1f7'),
+    ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
+    ('fr', 'Français', '', 'French', '\U0001f310'),
+    ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
+    ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
+    ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
+    ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
+    ('he-IL', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
+    ('hi-IN', 'हिन्दी', '', 'Hindi', '\U0001f1ee\U0001f1f3'),
+    ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
+    ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
+    ('id-ID', 'Indonesia', '', 'Indonesian', '\U0001f1ee\U0001f1e9'),
+    ('is-IS', 'Íslenska', '', 'Icelandic', '\U0001f1ee\U0001f1f8'),
+    ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
+    ('ja-JP', '日本語', '', 'Japanese', '\U0001f1ef\U0001f1f5'),
+    ('ko-KR', '한국어', '', 'Korean', '\U0001f1f0\U0001f1f7'),
+    ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
+    ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'),
+    ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
+    ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
+    ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
+    ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
+    ('pt', 'Português', '', 'Portuguese', '\U0001f310'),
+    ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
+    ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
+    ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
+    ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
+    ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
+    ('sl-SI', 'Slovenščina', '', 'Slovenian', '\U0001f1f8\U0001f1ee'),
+    ('sr-RS', 'Српски', '', 'Serbian', '\U0001f1f7\U0001f1f8'),
+    ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
+    ('sw-TZ', 'Kiswahili', '', 'Swahili', '\U0001f1f9\U0001f1ff'),
+    ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
+    ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
+    ('uk-UA', 'Українська', '', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
+    ('vi-VN', 'Tiếng Việt', '', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
+    ('zh', '中文', '', 'Chinese', '\U0001f310'),
+    ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
+    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
+    ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
 )

+ 2 - 2
searx/query.py

@@ -85,7 +85,7 @@ class LanguageParser(QueryPartParser):
         # check if any language-code is equal with
         # declared language-codes
         for lc in language_codes:
-            lang_id, lang_name, country, english_name = map(str.lower, lc)
+            lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
 
             # if correct language-code is found
             # set it as new search-language
@@ -128,7 +128,7 @@ class LanguageParser(QueryPartParser):
         for lc in language_codes:
             if lc[0] not in settings['search']['languages']:
                 continue
-            lang_id, lang_name, country, english_name = map(str.lower, lc)
+            lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
 
             # check if query starts with language-id
             if lang_id.startswith(value):

+ 1 - 1
searx/templates/oscar/languages.html

@@ -2,7 +2,7 @@
 <label class="visually-hidden" for="language">{{ _('Language') }}</label>
 <select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l">
   <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
+{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
   <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
     {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
   </option>

+ 2 - 2
searx/templates/simple/filters/languages.html

@@ -1,8 +1,8 @@
 <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
 	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-	{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
+	{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
 	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
-		{{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
+		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}
 	</option>
 	{%- endfor -%}
 </select>

+ 2 - 2
searx/templates/simple/preferences.html

@@ -116,8 +116,8 @@
       <p class="value">{{- '' -}}
         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
           <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-          {%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
-          <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
+          {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
+          <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option>
           {%- endfor -%}
         </select>{{- '' -}}
       </p>

+ 70 - 7
searxng_extra/update/update_languages.py

@@ -12,12 +12,13 @@ Output files: :origin:`searx/data/engines_languages.json` and
 """
 
 # pylint: disable=invalid-name
-
+from unicodedata import lookup
 import json
 from pathlib import Path
 from pprint import pformat
 from babel import Locale, UnknownLocaleError
 from babel.languages import get_global
+from babel.core import parse_locale
 
 from searx import settings, searx_dir
 from searx.engines import load_engines, engines
@@ -61,6 +62,57 @@ def get_locale(lang_code):
         return None
 
 
+lang2emoji = {
+    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
+    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
+    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
+    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
+    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
+}
+
+
+def get_unicode_flag(lang_code):
+    """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
+
+    emoji = lang2emoji.get(lang_code.lower())
+    if emoji:
+        return emoji
+
+    if len(lang_code) == 2:
+        return '\U0001F310'
+
+    language = territory = script = variant = ''
+    try:
+        language, territory, script, variant = parse_locale(lang_code, '-')
+    except ValueError as exc:
+        print(exc)
+
+    # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+    if not territory:
+        # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
+        emoji = lang2emoji.get(language)
+        if not emoji:
+            print(
+                "%s --> language: %s / territory: %s / script: %s / variant: %s"
+                % (lang_code, language, territory, script, variant)
+            )
+        return emoji
+
+    emoji = lang2emoji.get(territory.lower())
+    if emoji:
+        return emoji
+
+    try:
+        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
+        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
+        # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
+    except KeyError as exc:
+        print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
+        return None
+
+    return c1 + c2
+
+
 # Join all language lists.
 def join_language_lists(engines_languages):
     language_list = {}
@@ -113,7 +165,10 @@ def join_language_lists(engines_languages):
                         print("ERROR: %s --> %s" % (locale, exc))
                         locale = None
 
-                language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()}
+                language_list[short_code]['countries'][lang_code] = {
+                    'country_name': country_name,
+                    'counter': set(),
+                }
 
             # count engine for both language_country combination and language alone
             language_list[short_code]['counter'].add(engine_name)
@@ -167,11 +222,9 @@ def filter_language_list(all_languages):
 
         # add language without countries too if there's more than one country to choose from
         if len(filtered_countries) > 1:
-            filtered_countries[lang] = _copy_lang_data(lang)
+            filtered_countries[lang] = _copy_lang_data(lang, None)
         elif len(filtered_countries) == 1:
-            # if there's only one country per language, it's not necessary to show country name
             lang_country = next(iter(filtered_countries))
-            filtered_countries[lang_country]['country_name'] = None
 
         # if no country has enough engines try to get most likely country code from babel
         if not filtered_countries:
@@ -183,15 +236,22 @@ def filter_language_list(all_languages):
                     lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
 
             if lang_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang)
+                filtered_countries[lang_country] = _copy_lang_data(lang, None)
             else:
-                filtered_countries[lang] = _copy_lang_data(lang)
+                filtered_countries[lang] = _copy_lang_data(lang, None)
 
         filtered_languages_with_countries.update(filtered_countries)
 
     return filtered_languages_with_countries
 
 
+class UnicodeEscape(str):
+    """Escape unicode string in :py:obj:`pprint.pformat`"""
+
+    def __repr__(self):
+        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
+
+
 # Write languages.py.
 def write_languages_file(languages):
     file_headers = (
@@ -209,11 +269,14 @@ def write_languages_file(languages):
         if name is None:
             print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
             continue
+
+        flag = get_unicode_flag(code) or ''
         item = (
             code,
             languages[code]['name'].split(' (')[0],
             languages[code].get('country_name') or '',
             languages[code].get('english_name') or '',
+            UnicodeEscape(flag),
         )
 
         language_codes.append(item)