Browse Source

Merge pull request #380 from dalf/fix-339

[fix] interface language zh_CN and zh_TW don't work
Alexandre Flament 3 years ago
parent
commit
ee5e9f9e1d

+ 14 - 20
searx/locales.py

@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`UI_LOCALE_CODES` and
-:py:obj:`RTL_LOCALES`."""
+"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
+"""
 
-from typing import List, Set
+from typing import Set
 import os
 import pathlib
 
@@ -12,17 +12,14 @@ from babel import Locale
 
 LOCALE_NAMES = {
     "oc": "Occitan",
-    "nl_BE": "Vlaams (Dutch, Belgium)",
+    "nl-BE": "Vlaams (Dutch, Belgium)",
 }
-"""Mapping of locales and their description.  Locales e.g. 'fr' or 'pt_BR'
-(delimiter is *underline* '_')"""
-
-UI_LOCALE_CODES: List[str] = []
-"""List of locales e.g. 'fr' or 'pt-BR' (delimiter is '-')"""
+"""Mapping of locales and their description.  Locales e.g. 'fr' or 'pt-BR'
+(delimiter is *underline* '-')"""
 
 RTL_LOCALES: Set[str] = set()
-"""List of *Right-To-Left* locales e.g. 'he' or 'fa_IR' (delimiter is
-*underline* '_')"""
+"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (delimiter is
+*underline* '-')"""
 
 
 def _get_name(locale, language_code):
@@ -37,7 +34,7 @@ def _get_locale_name(locale, locale_name):
     """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
 
     :param locale: instance of :py:class:`Locale`
-    :param locale_name: name e.g. 'fr'  or 'pt_BR'
+    :param locale_name: name e.g. 'fr'  or 'pt_BR' (delimiter is *underscore*)
     """
     native_language, native_territory = _get_name(locale, locale_name)
     english_language, english_territory = _get_name(locale, 'en')
@@ -54,22 +51,19 @@ def _get_locale_name(locale, locale_name):
 
 
 def initialize_locales(directory):
-    """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`UI_LOCALE_CODES` and
-    :py:obj:`RTL_LOCALES`.
+    """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
     """
-    global UI_LOCALE_CODES  # pylint: disable=global-statement
     for dirname in sorted(os.listdir(directory)):
         # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations
         if not os.path.isdir( os.path.join(directory, dirname, 'LC_MESSAGES') ):
             continue
-        info = LOCALE_NAMES.get(dirname)
+        locale_name = dirname.replace('_', '-')
+        info = LOCALE_NAMES.get(locale_name)
         if not info:
             locale = Locale.parse(dirname)
-            LOCALE_NAMES[dirname] = _get_locale_name(locale, dirname)
+            LOCALE_NAMES[locale_name] = _get_locale_name(locale, dirname)
             if locale.text_direction == 'rtl':
-                RTL_LOCALES.add(dirname)
-
-    UI_LOCALE_CODES = [l.replace('_', '-') for l in LOCALE_NAMES]
+                RTL_LOCALES.add(locale_name)
 
 
 initialize_locales(pathlib.Path(__file__).parent / 'translations')

+ 1 - 1
searx/templates/oscar/base.html

@@ -1,6 +1,6 @@
 {% from 'oscar/macros.html' import icon %}
 <!DOCTYPE html>
-<html lang="{{ preferences.get_value('locale') }}" xml:lang="{{ preferences.get_value('locale') }}"{% if rtl %} dir="rtl"{% endif %} class="nojs">
+<html lang="{{ locale_rfc5646 }}" xml:lang="{{ locale_rfc5646 }}"{% if rtl %} dir="rtl"{% endif %} class="nojs">
 <head>
     <meta charset="UTF-8" />
     <meta name="description" content="SearXNG - a privacy-respecting, hackable metasearch engine" />

+ 1 - 1
searx/templates/simple/base.html

@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html class="no-js" lang="en" {% if rtl %} dir="rtl"{% endif %}>
+<html class="no-js" lang="{{ locale_rfc5646 }}" {% if rtl %} dir="rtl"{% endif %}>
 <head>
   <meta charset="UTF-8" />
   <meta name="description" content="SearXNG — a privacy-respecting, hackable metasearch engine">

+ 0 - 0
searx/translations/zh_CN/LC_MESSAGES/messages.mo → searx/translations/zh_Hans_CN/LC_MESSAGES/messages.mo


+ 0 - 0
searx/translations/zh_CN/LC_MESSAGES/messages.po → searx/translations/zh_Hans_CN/LC_MESSAGES/messages.po


+ 0 - 0
searx/translations/zh_TW/LC_MESSAGES/messages.mo → searx/translations/zh_Hant_TW/LC_MESSAGES/messages.mo


+ 0 - 0
searx/translations/zh_TW/LC_MESSAGES/messages.po → searx/translations/zh_Hant_TW/LC_MESSAGES/messages.po


+ 22 - 4
searx/utils.py

@@ -369,6 +369,16 @@ def _get_lang_to_lc_dict(lang_list):
     return value
 
 
+# babel's get_global contains all sorts of miscellaneous locale and territory related data
+# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py
+def _get_from_babel(lang_code, key):
+    match = get_global(key).get(lang_code.replace('-', '_'))
+    # for some keys, such as territory_aliases, match may be a list
+    if isinstance(match, str):
+        return match.replace('_', '-')
+    return match
+
+
 def _match_language(lang_code, lang_list=[], custom_aliases={}):  # pylint: disable=W0102
     """auxiliary function to match lang_code in lang_list"""
     # replace language code with a custom alias if necessary
@@ -379,9 +389,11 @@ def _match_language(lang_code, lang_list=[], custom_aliases={}):  # pylint: disa
         return lang_code
 
     # try to get the most likely country for this language
-    subtags = get_global('likely_subtags').get(lang_code)
+    subtags = _get_from_babel(lang_code, 'likely_subtags')
     if subtags:
-        subtag_parts = subtags.split('_')
+        if subtags in lang_list:
+            return subtags
+        subtag_parts = subtags.split('-')
         new_code = subtag_parts[0] + '-' + subtag_parts[-1]
         if new_code in custom_aliases:
             new_code = custom_aliases[new_code]
@@ -402,16 +414,22 @@ def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US
     locale_parts = locale_code.split('-')
     lang_code = locale_parts[0]
 
+    # if locale_code has script, try matching without it
+    if len(locale_parts) > 2:
+        language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases)
+        if language:
+            return language
+
     # try to get language using an equivalent country code
     if len(locale_parts) > 1:
-        country_alias = get_global('territory_aliases').get(locale_parts[-1])
+        country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases')
         if country_alias:
             language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
             if language:
                 return language
 
     # try to get language using an equivalent language code
-    alias = get_global('language_aliases').get(lang_code)
+    alias = _get_from_babel(lang_code, 'language_aliases')
     if alias:
         language = _match_language(alias, lang_list, custom_aliases)
         if language:

+ 20 - 3
searx/webapp.py

@@ -109,7 +109,7 @@ from searx.flaskfix import patch_application
 
 from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
 from searx.languages import language_codes as languages
-from searx.locales import LOCALE_NAMES, UI_LOCALE_CODES, RTL_LOCALES
+from searx.locales import LOCALE_NAMES, RTL_LOCALES
 from searx.search import SearchWithPlugins, initialize as search_initialize
 from searx.network import stream as http_stream, set_context_network_name
 from searx.search.checker import get_result as checker_get_result
@@ -223,6 +223,12 @@ def get_locale():
     if locale == 'oc':
         request.form['use-translation'] = 'oc'
         locale = 'fr_FR'
+    if locale == '':
+        # if there is an error loading the preferences
+        # the locale is going to be ''
+        locale = 'en'
+    # babel uses underscore instead of hyphen.
+    locale = locale.replace('-', '_')
     logger.debug("%s uses locale `%s`", urllib.parse.quote(request.url), locale)
     return locale
 
@@ -240,6 +246,16 @@ def _get_browser_language(req, lang_list):
     return 'en'
 
 
+def _get_locale_rfc5646(locale):
+    """Get locale name for <html lang="...">
+    Chrom* browsers don't detect the language when there is a subtag (ie a territory).
+    For example "zh-TW" is detected but not "zh-Hant-TW".
+    This function returns a locale without the subtag.
+    """
+    parts = locale.split('-')
+    return parts[0].lower() + '-' + parts[-1].upper()
+
+
 # code-highlighter
 @app.template_filter('code_highlighter')
 def code_highlighter(codelines, language=None):
@@ -420,6 +436,8 @@ def render(template_name, override_theme=None, **kwargs):
     kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':'))
 
     locale = request.preferences.get_value('locale')
+    kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale)
+
     if locale in RTL_LOCALES and 'rtl' not in kwargs:
         kwargs['rtl'] = True
     if 'current_language' not in kwargs:
@@ -512,8 +530,7 @@ def pre_request():
     # locale is defined neither in settings nor in preferences
     # use browser headers
     if not preferences.get_value("locale"):
-        locale = _get_browser_language(request, UI_LOCALE_CODES)
-        locale = locale.replace('-', '_')
+        locale = _get_browser_language(request, LOCALE_NAMES.keys())
         preferences.parse_dict({"locale": locale})
 
     # request.user_plugins

+ 8 - 0
tests/unit/test_utils.py

@@ -92,6 +92,14 @@ class TestUtils(SearxTestCase):
         self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
         self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
 
+        # handle script tags
+        self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN')
+        self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW')
+        self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN')
+        self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW')
+        self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN')
+        self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW')
+
         aliases = {'en-GB': 'en-UK', 'he': 'iw'}
 
         # guess country

+ 2 - 2
tests/unit/test_webapp.py

@@ -211,12 +211,12 @@ class ViewsTestCase(SearxTestCase):
         result = self.app.get('/preferences', headers={'Accept-Language': 'zh-tw;q=0.8'})
         self.assertEqual(result.status_code, 200)
         self.assertIn(
-            b'<option value="zh_TW" selected="selected">',
+            b'<option value="zh-Hant-TW" selected="selected">',
             result.data,
             'Interface locale ignored browser preference.'
         )
         self.assertIn(
-            b'<option value="zh-TW" selected="selected">',
+            b'<option value="zh-Hant-TW" selected="selected">',
             result.data,
             'Search language ignored browser preference.'
         )