Browse Source

[mod] reduce memory footprint by not calling babel.Locale.parse at runtime

babel.Locale.parse loads more than 60MB in RAM.  The only purpose is to get:

    LOCALE_NAMES   - searx.data.LOCALES["LOCALE_NAMES"]
    RTL_LOCALES    - searx.data.LOCALES["RTL_LOCALES"]

This commit calls babel.Locale.parse when the translations are update from
weblate and stored in::

    searx/data/locales.json

This file can be build by::

    ./manage data.locales

By store these variables in searx.data when the translations are updated we save
round about 65MB (usually 4 worker = 260MB of RAM saved.

Suggested-by: https://github.com/searxng/searxng/discussions/2633#discussioncomment-8490494
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Alexandre Flament 1 year ago
parent
commit
ed66ed758d

+ 10 - 0
docs/dev/searxng_extra/update.rst

@@ -78,6 +78,16 @@ Scripts to update static data in :origin:`searx/data/`
 .. automodule:: searxng_extra.update.update_pygments
 .. automodule:: searxng_extra.update.update_pygments
   :members:
   :members:
 
 
+.. _update_locales.py:
+
+``update_locales.py``
+=====================
+
+:origin:`[source] <searxng_extra/update/update_locales.py>`
+
+.. automodule:: searxng_extra.update.update_locales
+  :members:
+
 
 
 ``update_wikidata_units.py``
 ``update_wikidata_units.py``
 ============================
 ============================

+ 1 - 6
docs/src/searx.locales.rst

@@ -10,11 +10,6 @@ Locales
    :backlinks: entry
    :backlinks: entry
 
 
 .. automodule:: searx.locales
 .. automodule:: searx.locales
-  :members:
+   :members:
 
 
 
 
-SearXNG's locale codes
-======================
-
-.. automodule:: searx.sxng_locales
-  :members:

+ 2 - 0
searx/data/__init__.py

@@ -15,6 +15,7 @@ __all__ = [
     'EXTERNAL_BANGS',
     'EXTERNAL_BANGS',
     'OSM_KEYS_TAGS',
     'OSM_KEYS_TAGS',
     'ENGINE_DESCRIPTIONS',
     'ENGINE_DESCRIPTIONS',
+    'LOCALES',
     'ahmia_blacklist_loader',
     'ahmia_blacklist_loader',
 ]
 ]
 
 
@@ -50,3 +51,4 @@ EXTERNAL_BANGS = _load('external_bangs.json')
 OSM_KEYS_TAGS = _load('osm_keys_tags.json')
 OSM_KEYS_TAGS = _load('osm_keys_tags.json')
 ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
 ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
 ENGINE_TRAITS = _load('engine_traits.json')
 ENGINE_TRAITS = _load('engine_traits.json')
+LOCALES = _load('locales.json')

+ 69 - 0
searx/data/locales.json

@@ -0,0 +1,69 @@
+{
+  "LOCALE_NAMES": {
+    "af": "Afrikaans",
+    "ar": "العربية (Arabic)",
+    "bg": "Български (Bulgarian)",
+    "bn": "বাংলা (Bangla)",
+    "bo": "བོད་སྐད་ (Tibetan)",
+    "ca": "Català (Catalan)",
+    "cs": "Čeština (Czech)",
+    "cy": "Cymraeg (Welsh)",
+    "da": "Dansk (Danish)",
+    "de": "Deutsch (German)",
+    "dv": "ދިވެހި (Dhivehi)",
+    "el-GR": "Ελληνικά, Ελλάδα (Greek, Greece)",
+    "en": "English",
+    "eo": "Esperanto",
+    "es": "Español (Spanish)",
+    "et": "Eesti (Estonian)",
+    "eu": "Euskara (Basque)",
+    "fa-IR": "فارسی, ایران (Persian, Iran)",
+    "fi": "Suomi (Finnish)",
+    "fil": "Filipino",
+    "fr": "Français (French)",
+    "gl": "Galego (Galician)",
+    "he": "עברית (Hebrew)",
+    "hr": "Hrvatski (Croatian)",
+    "hu": "Magyar (Hungarian)",
+    "ia": "Interlingua",
+    "id": "Indonesia (Indonesian)",
+    "it": "Italiano (Italian)",
+    "ja": "日本語 (Japanese)",
+    "ko": "한국어 (Korean)",
+    "lt": "Lietuvių (Lithuanian)",
+    "lv": "Latviešu (Latvian)",
+    "ml": "മലയാളം (Malayalam)",
+    "ms": "Melayu (Malay)",
+    "nb-NO": "Norsk bokmål, Norge (Norwegian bokmål, Norway)",
+    "nl": "Nederlands (Dutch)",
+    "nl-BE": "Nederlands, België (Dutch, Belgium)",
+    "oc": "Occitan",
+    "pa": "ਪੰਜਾਬੀ (Punjabi)",
+    "pap": "Papiamento",
+    "pl": "Polski (Polish)",
+    "pt": "Português (Portuguese)",
+    "pt-BR": "Português, Brasil (Portuguese, Brazil)",
+    "ro": "Română (Romanian)",
+    "ru": "Русский (Russian)",
+    "si": "සිංහල (Sinhala)",
+    "sk": "Slovenčina (Slovak)",
+    "sl": "Slovenščina (Slovenian)",
+    "sr": "Српски (Serbian)",
+    "sv": "Svenska (Swedish)",
+    "szl": "Ślōnski (Silesian)",
+    "ta": "தமிழ் (Tamil)",
+    "te": "తెలుగు (Telugu)",
+    "th": "ไทย (Thai)",
+    "tr": "Türkçe (Turkish)",
+    "uk": "Українська (Ukrainian)",
+    "vi": "Tiếng việt (Vietnamese)",
+    "zh-HK": "中文, 中國香港特別行政區 (Chinese, Hong Kong SAR China)",
+    "zh-Hans-CN": "中文, 中国 (Chinese, China)",
+    "zh-Hant-TW": "中文, 台灣 (Chinese, Taiwan)"
+  },
+  "RTL_LOCALES": [
+    "fa-IR",
+    "ar",
+    "he"
+  ]
+}

+ 58 - 67
searx/locales.py

@@ -1,12 +1,36 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
 """
 """
+SearXNG’s locale data
+=====================
 
 
-from typing import Set, Optional, List
-import os
-import pathlib
+The variables :py:obj:`RTL_LOCALES` and :py:obj:`LOCALE_NAMES` are loaded from
+:origin:`searx/data/locales.json` / see :py:obj:`locales_initialize` and
+:ref:`update_locales.py`.
+
+.. hint::
+
+   Whenever the value of :py:obj:`ADDITIONAL_TRANSLATIONS` or
+   :py:obj:`LOCALE_BEST_MATCH` is modified, the
+   :origin:`searx/data/locales.json` needs to be rebuild::
+
+     ./manage data.locales
+
+SearXNG's locale codes
+======================
+
+.. automodule:: searx.sxng_locales
+   :members:
+
+
+SearXNG’s locale implementations
+================================
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
 
 
 import babel
 import babel
 from babel.support import Translations
 from babel.support import Translations
@@ -15,7 +39,11 @@ import babel.core
 import flask_babel
 import flask_babel
 import flask
 import flask
 from flask.ctx import has_request_context
 from flask.ctx import has_request_context
-from searx import logger
+from searx import (
+    data,
+    logger,
+    searx_dir,
+)
 
 
 logger = logger.getChild('locales')
 logger = logger.getChild('locales')
 
 
@@ -30,7 +58,7 @@ LOCALE_NAMES = {}
 :meta hide-value:
 :meta hide-value:
 """
 """
 
 
-RTL_LOCALES: Set[str] = set()
+RTL_LOCALES: set[str] = set()
 """List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
 """List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
 :py:obj:`locales_initialize`)."""
 :py:obj:`locales_initialize`)."""
 
 
@@ -52,7 +80,7 @@ LOCALE_BEST_MATCH = {
     "pap": "pt-BR",
     "pap": "pt-BR",
 }
 }
 """Map a locale we do not have a translations for to a locale we have a
 """Map a locale we do not have a translations for to a locale we have a
-translation for. By example: use Taiwan version of the translation for Hong
+translation for.  By example: use Taiwan version of the translation for Hong
 Kong."""
 Kong."""
 
 
 
 
@@ -90,74 +118,37 @@ def get_translations():
     return _flask_babel_get_translations()
     return _flask_babel_get_translations()
 
 
 
 
-def get_locale_descr(locale, locale_name):
-    """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
+_TR_LOCALES: list[str] = []
 
 
-    :param locale: instance of :py:class:`Locale`
-    :param locale_name: name e.g. 'fr'  or 'pt_BR' (delimiter is *underscore*)
-    """
-
-    native_language, native_territory = _get_locale_descr(locale, locale_name)
-    english_language, english_territory = _get_locale_descr(locale, 'en')
-
-    if native_territory == english_territory:
-        english_territory = None
 
 
-    if not native_territory and not english_territory:
-        if native_language == english_language:
-            return native_language
-        return native_language + ' (' + english_language + ')'
+def get_translation_locales() -> list[str]:
+    """Returns the list of transaltion locales (*underscore*).  The list is
+    generated from the translation folders in :origin:`searx/translations`"""
 
 
-    result = native_language + ', ' + native_territory + ' (' + english_language
-    if english_territory:
-        return result + ', ' + english_territory + ')'
-    return result + ')'
+    global _TR_LOCALES  # pylint:disable=global-statement
+    if _TR_LOCALES:
+        return _TR_LOCALES
 
 
-
-def _get_locale_descr(locale, language_code):
-    language_name = locale.get_language_name(language_code).capitalize()
-    if language_name and ('a' <= language_name[0] <= 'z'):
-        language_name = language_name.capitalize()
-    territory_name = locale.get_territory_name(language_code)
-    return language_name, territory_name
+    tr_locales = []
+    for folder in (Path(searx_dir) / 'translations').iterdir():
+        if not folder.is_dir():
+            continue
+        if not (folder / 'LC_MESSAGES').is_dir():
+            continue
+        tr_locales.append(folder.name)
+    _TR_LOCALES = sorted(tr_locales)
+    return _TR_LOCALES
 
 
 
 
-def locales_initialize(directory=None):
+def locales_initialize():
     """Initialize locales environment of the SearXNG session.
     """Initialize locales environment of the SearXNG session.
 
 
     - monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`
     - monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`
     - init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`
     - init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`
     """
     """
-
-    directory = directory or pathlib.Path(__file__).parent / 'translations'
-    logger.debug("locales_initialize: %s", directory)
     flask_babel.get_translations = get_translations
     flask_babel.get_translations = get_translations
-
-    for tag, descr in ADDITIONAL_TRANSLATIONS.items():
-        locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
-        LOCALE_NAMES[tag] = descr
-        if locale.text_direction == 'rtl':
-            RTL_LOCALES.add(tag)
-
-    for tag in LOCALE_BEST_MATCH:
-        descr = LOCALE_NAMES.get(tag)
-        if not descr:
-            locale = babel.Locale.parse(tag, sep='-')
-            LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
-            if locale.text_direction == 'rtl':
-                RTL_LOCALES.add(tag)
-
-    for dirname in sorted(os.listdir(directory)):
-        # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations
-        if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')):
-            continue
-        tag = dirname.replace('_', '-')
-        descr = LOCALE_NAMES.get(tag)
-        if not descr:
-            locale = babel.Locale.parse(dirname)
-            LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
-            if locale.text_direction == 'rtl':
-                RTL_LOCALES.add(tag)
+    LOCALE_NAMES.update(data.LOCALES["LOCALE_NAMES"])
+    RTL_LOCALES.update(data.LOCALES["RTL_LOCALES"])
 
 
 
 
 def region_tag(locale: babel.Locale) -> str:
 def region_tag(locale: babel.Locale) -> str:
@@ -177,7 +168,7 @@ def language_tag(locale: babel.Locale) -> str:
     return sxng_lang
     return sxng_lang
 
 
 
 
-def get_locale(locale_tag: str) -> Optional[babel.Locale]:
+def get_locale(locale_tag: str) -> babel.Locale | None:
     """Returns a :py:obj:`babel.Locale` object parsed from argument
     """Returns a :py:obj:`babel.Locale` object parsed from argument
     ``locale_tag``"""
     ``locale_tag``"""
     try:
     try:
@@ -190,7 +181,7 @@ def get_locale(locale_tag: str) -> Optional[babel.Locale]:
 
 
 def get_official_locales(
 def get_official_locales(
     territory: str, languages=None, regional: bool = False, de_facto: bool = True
     territory: str, languages=None, regional: bool = False, de_facto: bool = True
-) -> Set[babel.Locale]:
+) -> set[babel.Locale]:
     """Returns a list of :py:obj:`babel.Locale` with languages from
     """Returns a list of :py:obj:`babel.Locale` with languages from
     :py:obj:`babel.languages.get_official_languages`.
     :py:obj:`babel.languages.get_official_languages`.
 
 
@@ -376,7 +367,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
     return default
     return default
 
 
 
 
-def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
+def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str | None = None) -> str | None:
     """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
     """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
 
 
     :param str searxng_locale: SearXNG's internal representation of locale (de,
     :param str searxng_locale: SearXNG's internal representation of locale (de,
@@ -425,7 +416,7 @@ def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Opti
     return get_engine_locale(searxng_locale, engine_locales, default=fallback)
     return get_engine_locale(searxng_locale, engine_locales, default=fallback)
 
 
 
 
-def build_engine_locales(tag_list: List[str]):
+def build_engine_locales(tag_list: list[str]):
     """From a list of locale tags a dictionary is build that can be passed by
     """From a list of locale tags a dictionary is build that can be passed by
     argument ``engine_locales`` to :py:obj:`get_engine_locale`.  This function
     argument ``engine_locales`` to :py:obj:`get_engine_locale`.  This function
     is mainly used by :py:obj:`match_locale` and is similar to what the
     is mainly used by :py:obj:`match_locale` and is similar to what the

+ 4 - 2
searx/sxng_locales.py

@@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 '''List of SearXNG's locale codes.
 '''List of SearXNG's locale codes.
 
 
-This file is generated automatically by::
+.. hint::
 
 
-   ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+   Don't modify this file, this file is generated by::
+
+     ./manage data.traits
 '''
 '''
 
 
 sxng_locales = (
 sxng_locales = (

+ 4 - 2
searxng_extra/update/update_engine_traits.py

@@ -31,9 +31,11 @@ languages_file_header = """\
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 '''List of SearXNG's locale codes.
 '''List of SearXNG's locale codes.
 
 
-This file is generated automatically by::
+.. hint::
 
 
-   ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+   Don't modify this file, this file is generated by::
+
+     ./manage data.traits
 '''
 '''
 
 
 sxng_locales = (
 sxng_locales = (

+ 103 - 0
searxng_extra/update/update_locales.py

@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update locale names in :origin:`searx/data/locales.json` used by
+:ref:`searx.locales`
+
+- :py:obj:`searx.locales.RTL_LOCALES`
+- :py:obj:`searx.locales.LOCALE_NAMES`
+"""
+from __future__ import annotations
+
+from typing import Set
+import json
+from pathlib import Path
+import os
+
+import babel
+import babel.languages
+import babel.core
+
+from searx import searx_dir
+from searx.locales import (
+    ADDITIONAL_TRANSLATIONS,
+    LOCALE_BEST_MATCH,
+    get_translation_locales,
+)
+
+LOCALE_DATA_FILE = Path(searx_dir) / 'data' / 'locales.json'
+TRANSLATOINS_FOLDER = Path(searx_dir) / 'translations'
+
+
+def main():
+
+    LOCALE_NAMES = {}
+    RTL_LOCALES: Set[str] = set()
+
+    for tag, descr in ADDITIONAL_TRANSLATIONS.items():
+        locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
+        LOCALE_NAMES[tag] = descr
+        if locale.text_direction == 'rtl':
+            RTL_LOCALES.add(tag)
+
+    for tag in LOCALE_BEST_MATCH:
+        descr = LOCALE_NAMES.get(tag)
+        if not descr:
+            locale = babel.Locale.parse(tag, sep='-')
+            LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
+            if locale.text_direction == 'rtl':
+                RTL_LOCALES.add(tag)
+
+    for tr_locale in get_translation_locales():
+        sxng_tag = tr_locale.replace('_', '-')
+        descr = LOCALE_NAMES.get(sxng_tag)
+        if not descr:
+            locale = babel.Locale.parse(tr_locale)
+            LOCALE_NAMES[sxng_tag] = get_locale_descr(locale, tr_locale)
+            if locale.text_direction == 'rtl':
+                RTL_LOCALES.add(sxng_tag)
+
+    content = {
+        "LOCALE_NAMES": LOCALE_NAMES,
+        "RTL_LOCALES": list(RTL_LOCALES),
+    }
+
+    with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
+        json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
+
+
+def get_locale_descr(locale: babel.Locale, tr_locale):
+    """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
+
+    :param locale: instance of :py:class:`Locale`
+    :param tr_locale: name e.g. 'fr'  or 'pt_BR' (delimiter is *underscore*)
+    """
+
+    native_language, native_territory = _get_locale_descr(locale, tr_locale)
+    english_language, english_territory = _get_locale_descr(locale, 'en')
+
+    if native_territory == english_territory:
+        english_territory = None
+
+    if not native_territory and not english_territory:
+        # none territory name
+        if native_language == english_language:
+            return native_language
+        return native_language + ' (' + english_language + ')'
+
+    else:
+        result = native_language + ', ' + native_territory + ' (' + english_language
+        if english_territory:
+            return result + ', ' + english_territory + ')'
+        return result + ')'
+
+
+def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:
+    language_name = locale.get_language_name(tr_locale).capitalize()  # type: ignore
+    if language_name and ('a' <= language_name[0] <= 'z'):
+        language_name = language_name.capitalize()
+    territory_name: str = locale.get_territory_name(tr_locale)  # type: ignore
+    return language_name, territory_name
+
+
+if __name__ == "__main__":
+    main()

+ 11 - 0
utils/lib_sxng_data.sh

@@ -7,6 +7,7 @@ data.:
   all       : update searx/sxng_locales.py and searx/data/*
   all       : update searx/sxng_locales.py and searx/data/*
   traits    : update searx/data/engine_traits.json & searx/sxng_locales.py
   traits    : update searx/data/engine_traits.json & searx/sxng_locales.py
   useragents: update searx/data/useragents.json with the most recent versions of Firefox
   useragents: update searx/data/useragents.json with the most recent versions of Firefox
+  locales   : update searx/data/locales.json from babel
 EOF
 EOF
 }
 }
 
 
@@ -16,6 +17,7 @@ data.all() {
         pyenv.activate
         pyenv.activate
         data.traits
         data.traits
         data.useragents
         data.useragents
+	data.locales
 
 
         build_msg DATA "update searx/data/osm_keys_tags.json"
         build_msg DATA "update searx/data/osm_keys_tags.json"
         pyenv.cmd python searxng_extra/update/update_osm_keys_tags.py
         pyenv.cmd python searxng_extra/update/update_osm_keys_tags.py
@@ -49,6 +51,15 @@ data.useragents() {
     dump_return $?
     dump_return $?
 }
 }
 
 
+data.locales() {
+    (   set -e
+        pyenv.activate
+        build_msg DATA "update searx/data/locales.json"
+        python searxng_extra/update/update_locales.py
+    )
+    dump_return $?
+}
+
 docs.prebuild() {
 docs.prebuild() {
     build_msg DOCS "build ${DOCS_BUILD}/includes"
     build_msg DOCS "build ${DOCS_BUILD}/includes"
     (
     (

+ 5 - 0
utils/lib_sxng_weblate.sh

@@ -96,10 +96,15 @@ weblate.translations.commit() {
         build_msg BABEL 'compile translation catalogs into binary MO files'
         build_msg BABEL 'compile translation catalogs into binary MO files'
         pybabel compile --statistics \
         pybabel compile --statistics \
                 -d "searx/translations"
                 -d "searx/translations"
+
+        # update searx/data/translation_labels.json
+        data.locales
+
         # git add/commit (no push)
         # git add/commit (no push)
         commit_body=$(cd "${TRANSLATIONS_WORKTREE}"; git log --pretty=format:'%h - %as - %aN <%ae>' "${existing_commit_hash}..HEAD")
         commit_body=$(cd "${TRANSLATIONS_WORKTREE}"; git log --pretty=format:'%h - %as - %aN <%ae>' "${existing_commit_hash}..HEAD")
         commit_message=$(echo -e "[translations] update from Weblate\n\n${commit_body}")
         commit_message=$(echo -e "[translations] update from Weblate\n\n${commit_body}")
         git add searx/translations
         git add searx/translations
+        git add searx/data/locales.json
         git commit -m "${commit_message}"
         git commit -m "${commit_message}"
     )
     )
     exitcode=$?
     exitcode=$?