Browse Source

Merge pull request #967 from return42/language-filter

[mod] add flags to the languages filter
Alexandre Flament 3 years ago
parent
commit
0379856712

+ 66 - 66
searx/languages.py

@@ -2,70 +2,70 @@
 # list of language codes
 # list of language codes
 # this file is generated automatically by utils/fetch_languages.py
 # this file is generated automatically by utils/fetch_languages.py
 language_codes = (
 language_codes = (
-    ('af-ZA', 'Afrikaans', '', 'Afrikaans'),
-    ('ar-EG', 'العربية', '', 'Arabic'),
-    ('be-BY', 'Беларуская', '', 'Belarusian'),
-    ('bg-BG', 'Български', '', 'Bulgarian'),
-    ('ca-ES', 'Català', '', 'Catalan'),
-    ('cs-CZ', 'Čeština', '', 'Czech'),
-    ('da-DK', 'Dansk', '', 'Danish'),
-    ('de', 'Deutsch', '', 'German'),
-    ('de-AT', 'Deutsch', 'Österreich', 'German'),
-    ('de-CH', 'Deutsch', 'Schweiz', 'German'),
-    ('de-DE', 'Deutsch', 'Deutschland', 'German'),
-    ('el-GR', 'Ελληνικά', '', 'Greek'),
-    ('en', 'English', '', 'English'),
-    ('en-AU', 'English', 'Australia', 'English'),
-    ('en-CA', 'English', 'Canada', 'English'),
-    ('en-GB', 'English', 'United Kingdom', 'English'),
-    ('en-IE', 'English', 'Ireland', 'English'),
-    ('en-MY', 'English', 'Malaysia', 'English'),
-    ('en-NZ', 'English', 'New Zealand', 'English'),
-    ('en-US', 'English', 'United States', 'English'),
-    ('es', 'Español', '', 'Spanish'),
-    ('es-AR', 'Español', 'Argentina', 'Spanish'),
-    ('es-CL', 'Español', 'Chile', 'Spanish'),
-    ('es-ES', 'Español', 'España', 'Spanish'),
-    ('es-MX', 'Español', 'México', 'Spanish'),
-    ('et-EE', 'Eesti', '', 'Estonian'),
-    ('fa-IR', 'فارسی', '', 'Persian'),
-    ('fi-FI', 'Suomi', '', 'Finnish'),
-    ('fr', 'Français', '', 'French'),
-    ('fr-BE', 'Français', 'Belgique', 'French'),
-    ('fr-CA', 'Français', 'Canada', 'French'),
-    ('fr-CH', 'Français', 'Suisse', 'French'),
-    ('fr-FR', 'Français', 'France', 'French'),
-    ('he-IL', 'עברית', '', 'Hebrew'),
-    ('hi-IN', 'हिन्दी', '', 'Hindi'),
-    ('hr-HR', 'Hrvatski', '', 'Croatian'),
-    ('hu-HU', 'Magyar', '', 'Hungarian'),
-    ('id-ID', 'Indonesia', '', 'Indonesian'),
-    ('is-IS', 'Íslenska', '', 'Icelandic'),
-    ('it-IT', 'Italiano', '', 'Italian'),
-    ('ja-JP', '日本語', '', 'Japanese'),
-    ('ko-KR', '한국어', '', 'Korean'),
-    ('lt-LT', 'Lietuvių', '', 'Lithuanian'),
-    ('lv-LV', 'Latviešu', '', 'Latvian'),
-    ('nl', 'Nederlands', '', 'Dutch'),
-    ('nl-BE', 'Nederlands', 'België', 'Dutch'),
-    ('nl-NL', 'Nederlands', 'Nederland', 'Dutch'),
-    ('pl-PL', 'Polski', '', 'Polish'),
-    ('pt', 'Português', '', 'Portuguese'),
-    ('pt-BR', 'Português', 'Brasil', 'Portuguese'),
-    ('pt-PT', 'Português', 'Portugal', 'Portuguese'),
-    ('ro-RO', 'Română', '', 'Romanian'),
-    ('ru-RU', 'Русский', '', 'Russian'),
-    ('sk-SK', 'Slovenčina', '', 'Slovak'),
-    ('sl-SI', 'Slovenščina', '', 'Slovenian'),
-    ('sr-RS', 'Српски', '', 'Serbian'),
-    ('sv-SE', 'Svenska', '', 'Swedish'),
-    ('sw-TZ', 'Kiswahili', '', 'Swahili'),
-    ('th-TH', 'ไทย', '', 'Thai'),
-    ('tr-TR', 'Türkçe', '', 'Turkish'),
-    ('uk-UA', 'Українська', '', 'Ukrainian'),
-    ('vi-VN', 'Tiếng Việt', '', 'Vietnamese'),
-    ('zh', '中文', '', 'Chinese'),
-    ('zh-CN', '中文', '中国', 'Chinese'),
-    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese'),
-    ('zh-TW', '中文', '台灣', 'Chinese'),
+    ('af-ZA', 'Afrikaans', '', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
+    ('ar-EG', 'العربية', '', 'Arabic', '\U0001f1ea\U0001f1ec'),
+    ('be-BY', 'Беларуская', '', 'Belarusian', '\U0001f1e7\U0001f1fe'),
+    ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
+    ('ca-ES', 'Català', '', 'Catalan', '\U0001f1ea\U0001f1f8'),
+    ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
+    ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
+    ('de', 'Deutsch', '', 'German', '\U0001f310'),
+    ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
+    ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
+    ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
+    ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
+    ('en', 'English', '', 'English', '\U0001f310'),
+    ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
+    ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
+    ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
+    ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
+    ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
+    ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
+    ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
+    ('es', 'Español', '', 'Spanish', '\U0001f310'),
+    ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
+    ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
+    ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
+    ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
+    ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
+    ('fa-IR', 'فارسی', '', 'Persian', '\U0001f1ee\U0001f1f7'),
+    ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
+    ('fr', 'Français', '', 'French', '\U0001f310'),
+    ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
+    ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
+    ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
+    ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
+    ('he-IL', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
+    ('hi-IN', 'हिन्दी', '', 'Hindi', '\U0001f1ee\U0001f1f3'),
+    ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
+    ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
+    ('id-ID', 'Indonesia', '', 'Indonesian', '\U0001f1ee\U0001f1e9'),
+    ('is-IS', 'Íslenska', '', 'Icelandic', '\U0001f1ee\U0001f1f8'),
+    ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
+    ('ja-JP', '日本語', '', 'Japanese', '\U0001f1ef\U0001f1f5'),
+    ('ko-KR', '한국어', '', 'Korean', '\U0001f1f0\U0001f1f7'),
+    ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
+    ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'),
+    ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
+    ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
+    ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
+    ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
+    ('pt', 'Português', '', 'Portuguese', '\U0001f310'),
+    ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
+    ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
+    ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
+    ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
+    ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
+    ('sl-SI', 'Slovenščina', '', 'Slovenian', '\U0001f1f8\U0001f1ee'),
+    ('sr-RS', 'Српски', '', 'Serbian', '\U0001f1f7\U0001f1f8'),
+    ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
+    ('sw-TZ', 'Kiswahili', '', 'Swahili', '\U0001f1f9\U0001f1ff'),
+    ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
+    ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
+    ('uk-UA', 'Українська', '', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
+    ('vi-VN', 'Tiếng Việt', '', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
+    ('zh', '中文', '', 'Chinese', '\U0001f310'),
+    ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
+    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
+    ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
 )
 )

+ 2 - 2
searx/query.py

@@ -85,7 +85,7 @@ class LanguageParser(QueryPartParser):
         # check if any language-code is equal with
         # check if any language-code is equal with
         # declared language-codes
         # declared language-codes
         for lc in language_codes:
         for lc in language_codes:
-            lang_id, lang_name, country, english_name = map(str.lower, lc)
+            lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
 
 
             # if correct language-code is found
             # if correct language-code is found
             # set it as new search-language
             # set it as new search-language
@@ -128,7 +128,7 @@ class LanguageParser(QueryPartParser):
         for lc in language_codes:
         for lc in language_codes:
             if lc[0] not in settings['search']['languages']:
             if lc[0] not in settings['search']['languages']:
                 continue
                 continue
-            lang_id, lang_name, country, english_name = map(str.lower, lc)
+            lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
 
 
             # check if query starts with language-id
             # check if query starts with language-id
             if lang_id.startswith(value):
             if lang_id.startswith(value):

+ 1 - 1
searx/templates/oscar/languages.html

@@ -2,7 +2,7 @@
 <label class="visually-hidden" for="language">{{ _('Language') }}</label>
 <label class="visually-hidden" for="language">{{ _('Language') }}</label>
 <select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l">
 <select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l">
   <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
   <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
+{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
   <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
   <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
     {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
     {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
   </option>
   </option>

+ 2 - 2
searx/templates/simple/filters/languages.html

@@ -1,8 +1,8 @@
 <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
 <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
 	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
 	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-	{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
+	{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
 	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
 	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
-		{{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
+		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}
 	</option>
 	</option>
 	{%- endfor -%}
 	{%- endfor -%}
 </select>
 </select>

+ 2 - 2
searx/templates/simple/preferences.html

@@ -116,8 +116,8 @@
       <p class="value">{{- '' -}}
       <p class="value">{{- '' -}}
         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
         <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
           <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
           <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
-          {%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%}
-          <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
+          {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
+          <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option>
           {%- endfor -%}
           {%- endfor -%}
         </select>{{- '' -}}
         </select>{{- '' -}}
       </p>
       </p>

+ 70 - 7
searxng_extra/update/update_languages.py

@@ -12,12 +12,13 @@ Output files: :origin:`searx/data/engines_languages.json` and
 """
 """
 
 
 # pylint: disable=invalid-name
 # pylint: disable=invalid-name
-
+from unicodedata import lookup
 import json
 import json
 from pathlib import Path
 from pathlib import Path
 from pprint import pformat
 from pprint import pformat
 from babel import Locale, UnknownLocaleError
 from babel import Locale, UnknownLocaleError
 from babel.languages import get_global
 from babel.languages import get_global
+from babel.core import parse_locale
 
 
 from searx import settings, searx_dir
 from searx import settings, searx_dir
 from searx.engines import load_engines, engines
 from searx.engines import load_engines, engines
@@ -61,6 +62,57 @@ def get_locale(lang_code):
         return None
         return None
 
 
 
 
+lang2emoji = {
+    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
+    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
+    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
+    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
+    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
+}
+
+
+def get_unicode_flag(lang_code):
+    """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
+
+    emoji = lang2emoji.get(lang_code.lower())
+    if emoji:
+        return emoji
+
+    if len(lang_code) == 2:
+        return '\U0001F310'
+
+    language = territory = script = variant = ''
+    try:
+        language, territory, script, variant = parse_locale(lang_code, '-')
+    except ValueError as exc:
+        print(exc)
+
+    # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+    if not territory:
+        # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
+        emoji = lang2emoji.get(language)
+        if not emoji:
+            print(
+                "%s --> language: %s / territory: %s / script: %s / variant: %s"
+                % (lang_code, language, territory, script, variant)
+            )
+        return emoji
+
+    emoji = lang2emoji.get(territory.lower())
+    if emoji:
+        return emoji
+
+    try:
+        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
+        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
+        # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
+    except KeyError as exc:
+        print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
+        return None
+
+    return c1 + c2
+
+
 # Join all language lists.
 # Join all language lists.
 def join_language_lists(engines_languages):
 def join_language_lists(engines_languages):
     language_list = {}
     language_list = {}
@@ -113,7 +165,10 @@ def join_language_lists(engines_languages):
                         print("ERROR: %s --> %s" % (locale, exc))
                         print("ERROR: %s --> %s" % (locale, exc))
                         locale = None
                         locale = None
 
 
-                language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()}
+                language_list[short_code]['countries'][lang_code] = {
+                    'country_name': country_name,
+                    'counter': set(),
+                }
 
 
             # count engine for both language_country combination and language alone
             # count engine for both language_country combination and language alone
             language_list[short_code]['counter'].add(engine_name)
             language_list[short_code]['counter'].add(engine_name)
@@ -167,11 +222,9 @@ def filter_language_list(all_languages):
 
 
         # add language without countries too if there's more than one country to choose from
         # add language without countries too if there's more than one country to choose from
         if len(filtered_countries) > 1:
         if len(filtered_countries) > 1:
-            filtered_countries[lang] = _copy_lang_data(lang)
+            filtered_countries[lang] = _copy_lang_data(lang, None)
         elif len(filtered_countries) == 1:
         elif len(filtered_countries) == 1:
-            # if there's only one country per language, it's not necessary to show country name
             lang_country = next(iter(filtered_countries))
             lang_country = next(iter(filtered_countries))
-            filtered_countries[lang_country]['country_name'] = None
 
 
         # if no country has enough engines try to get most likely country code from babel
         # if no country has enough engines try to get most likely country code from babel
         if not filtered_countries:
         if not filtered_countries:
@@ -183,15 +236,22 @@ def filter_language_list(all_languages):
                     lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
                     lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
 
 
             if lang_country:
             if lang_country:
-                filtered_countries[lang_country] = _copy_lang_data(lang)
+                filtered_countries[lang_country] = _copy_lang_data(lang, None)
             else:
             else:
-                filtered_countries[lang] = _copy_lang_data(lang)
+                filtered_countries[lang] = _copy_lang_data(lang, None)
 
 
         filtered_languages_with_countries.update(filtered_countries)
         filtered_languages_with_countries.update(filtered_countries)
 
 
     return filtered_languages_with_countries
     return filtered_languages_with_countries
 
 
 
 
+class UnicodeEscape(str):
+    """Escape unicode string in :py:obj:`pprint.pformat`"""
+
+    def __repr__(self):
+        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
+
+
 # Write languages.py.
 # Write languages.py.
 def write_languages_file(languages):
 def write_languages_file(languages):
     file_headers = (
     file_headers = (
@@ -209,11 +269,14 @@ def write_languages_file(languages):
         if name is None:
         if name is None:
             print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
             print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
             continue
             continue
+
+        flag = get_unicode_flag(code) or ''
         item = (
         item = (
             code,
             code,
             languages[code]['name'].split(' (')[0],
             languages[code]['name'].split(' (')[0],
             languages[code].get('country_name') or '',
             languages[code].get('country_name') or '',
             languages[code].get('english_name') or '',
             languages[code].get('english_name') or '',
+            UnicodeEscape(flag),
         )
         )
 
 
         language_codes.append(item)
         language_codes.append(item)