|
@@ -29,6 +29,9 @@ from urllib.parse import urlencode
|
|
from lxml import html
|
|
from lxml import html
|
|
from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
|
from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
|
from searx.exceptions import SearxEngineCaptchaException
|
|
from searx.exceptions import SearxEngineCaptchaException
|
|
|
|
+from searx.enginelib.traits import EngineTraits
|
|
|
|
+
|
|
|
|
+traits: EngineTraits
|
|
|
|
|
|
# about
|
|
# about
|
|
about = {
|
|
about = {
|
|
@@ -373,3 +376,87 @@ def _fetch_supported_languages(resp):
|
|
ret_val[code] = {"name": name}
|
|
ret_val[code] = {"name": name}
|
|
|
|
|
|
return ret_val
|
|
return ret_val
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+skip_countries = [
|
|
|
|
+ # official language of google-country not in google-languages
|
|
|
|
+ 'AL', # Albanien (sq)
|
|
|
|
+ 'AZ', # Aserbaidschan (az)
|
|
|
|
+ 'BD', # Bangladesch (bn)
|
|
|
|
+ 'BN', # Brunei Darussalam (ms)
|
|
|
|
+ 'BT', # Bhutan (dz)
|
|
|
|
+ 'ET', # Äthiopien (am)
|
|
|
|
+ 'GE', # Georgien (ka, os)
|
|
|
|
+ 'GL', # Grönland (kl)
|
|
|
|
+ 'KH', # Kambodscha (km)
|
|
|
|
+ 'LA', # Laos (lo)
|
|
|
|
+ 'LK', # Sri Lanka (si, ta)
|
|
|
|
+ 'ME', # Montenegro (sr)
|
|
|
|
+ 'MK', # Nordmazedonien (mk, sq)
|
|
|
|
+ 'MM', # Myanmar (my)
|
|
|
|
+ 'MN', # Mongolei (mn)
|
|
|
|
+ 'MV', # Malediven (dv) // dv_MV is unknown by babel
|
|
|
|
+ 'MY', # Malaysia (ms)
|
|
|
|
+ 'NP', # Nepal (ne)
|
|
|
|
+ 'TJ', # Tadschikistan (tg)
|
|
|
|
+ 'TM', # Turkmenistan (tk)
|
|
|
|
+ 'UZ', # Usbekistan (uz)
|
|
|
|
+]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def fetch_traits(engine_traits: EngineTraits):
|
|
|
|
+ """Fetch languages from Google."""
|
|
|
|
+ # pylint: disable=import-outside-toplevel
|
|
|
|
+
|
|
|
|
+ engine_traits.data_type = 'supported_languages' # deprecated
|
|
|
|
+
|
|
|
|
+ import babel
|
|
|
|
+ import babel.languages
|
|
|
|
+ from searx import network
|
|
|
|
+ from searx.locales import language_tag, region_tag, get_offical_locales
|
|
|
|
+
|
|
|
|
+ resp = network.get('https://www.google.com/preferences')
|
|
|
|
+ if not resp.ok:
|
|
|
|
+ print("ERROR: response from Google is not OK.")
|
|
|
|
+
|
|
|
|
+ dom = html.fromstring(resp.text)
|
|
|
|
+
|
|
|
|
+ lang_map = {'no': 'nb'}
|
|
|
|
+
|
|
|
|
+ for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'):
|
|
|
|
+
|
|
|
|
+ eng_lang = x.get("value").split('_')[-1]
|
|
|
|
+ try:
|
|
|
|
+ locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
|
|
|
|
+ except babel.UnknownLocaleError:
|
|
|
|
+ print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
|
|
|
|
+ continue
|
|
|
|
+ sxng_lang = language_tag(locale)
|
|
|
|
+
|
|
|
|
+ conflict = engine_traits.languages.get(sxng_lang)
|
|
|
|
+ if conflict:
|
|
|
|
+ if conflict != eng_lang:
|
|
|
|
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
|
|
|
|
+ continue
|
|
|
|
+ engine_traits.languages[sxng_lang] = 'lang_' + eng_lang
|
|
|
|
+
|
|
|
|
+ # alias languages
|
|
|
|
+ engine_traits.languages['zh'] = 'lang_zh-CN'
|
|
|
|
+
|
|
|
|
+ for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'):
|
|
|
|
+ eng_country = x.get("value")
|
|
|
|
+
|
|
|
|
+ if eng_country in skip_countries:
|
|
|
|
+ continue
|
|
|
|
+ if eng_country == 'ZZ':
|
|
|
|
+ engine_traits.all_locale = 'ZZ'
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True)
|
|
|
|
+
|
|
|
|
+ if not sxng_locales:
|
|
|
|
+ print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country))
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ for sxng_locale in sxng_locales:
|
|
|
|
+ engine_traits.regions[region_tag(sxng_locale)] = 'country' + eng_country
|