Browse Source

[mod] engine - simplify region & lang handling, make filters configurable

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
fd1422a670
3 changed files with 453 additions and 38 deletions
  1. 13 0
      docs/dev/engines/online/radio_browser.rst
  2. 337 0
      searx/data/engine_traits.json
  3. 103 38
      searx/engines/radio_browser.py

+ 13 - 0
docs/dev/engines/online/radio_browser.rst

@@ -0,0 +1,13 @@
+.. _RadioBrowser engine:
+
+============
+RadioBrowser
+============
+
+.. contents::
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+.. automodule:: searx.engines.radio_browser
+   :members:

+ 337 - 0
searx/data/engine_traits.json

@@ -4932,6 +4932,343 @@
       "zh-HK": "zh_HK"
       "zh-HK": "zh_HK"
     }
     }
   },
   },
+  "radio browser": {
+    "all_locale": null,
+    "custom": {
+      "countrycodes": [
+        "AD",
+        "AE",
+        "AF",
+        "AG",
+        "AL",
+        "AM",
+        "AO",
+        "AQ",
+        "AR",
+        "AS",
+        "AT",
+        "AU",
+        "AW",
+        "AZ",
+        "BA",
+        "BB",
+        "BD",
+        "BE",
+        "BF",
+        "BG",
+        "BH",
+        "BI",
+        "BJ",
+        "BM",
+        "BN",
+        "BO",
+        "BQ",
+        "BR",
+        "BS",
+        "BT",
+        "BW",
+        "BY",
+        "BZ",
+        "CA",
+        "CC",
+        "CD",
+        "CF",
+        "CH",
+        "CI",
+        "CK",
+        "CL",
+        "CM",
+        "CN",
+        "CO",
+        "CR",
+        "CU",
+        "CV",
+        "CW",
+        "CY",
+        "CZ",
+        "DE",
+        "DK",
+        "DM",
+        "DO",
+        "DZ",
+        "EC",
+        "EE",
+        "EG",
+        "ES",
+        "ET",
+        "FI",
+        "FJ",
+        "FK",
+        "FO",
+        "FR",
+        "GA",
+        "GB",
+        "GD",
+        "GE",
+        "GF",
+        "GG",
+        "GH",
+        "GI",
+        "GL",
+        "GN",
+        "GP",
+        "GQ",
+        "GR",
+        "GS",
+        "GT",
+        "GU",
+        "GW",
+        "GY",
+        "HK",
+        "HN",
+        "HR",
+        "HT",
+        "HU",
+        "ID",
+        "IE",
+        "IL",
+        "IM",
+        "IN",
+        "IO",
+        "IQ",
+        "IR",
+        "IS",
+        "IT",
+        "JM",
+        "JO",
+        "JP",
+        "KE",
+        "KG",
+        "KH",
+        "KM",
+        "KN",
+        "KP",
+        "KR",
+        "KW",
+        "KY",
+        "KZ",
+        "LB",
+        "LC",
+        "LK",
+        "LT",
+        "LU",
+        "LV",
+        "LY",
+        "MA",
+        "MC",
+        "MD",
+        "ME",
+        "MG",
+        "MK",
+        "ML",
+        "MM",
+        "MN",
+        "MO",
+        "MQ",
+        "MT",
+        "MU",
+        "MW",
+        "MX",
+        "MY",
+        "MZ",
+        "NA",
+        "NC",
+        "NE",
+        "NF",
+        "NG",
+        "NI",
+        "NL",
+        "NO",
+        "NP",
+        "NZ",
+        "OM",
+        "PA",
+        "PE",
+        "PF",
+        "PH",
+        "PK",
+        "PL",
+        "PM",
+        "PR",
+        "PS",
+        "PT",
+        "PY",
+        "QA",
+        "RE",
+        "RO",
+        "RS",
+        "RU",
+        "RW",
+        "SA",
+        "SC",
+        "SD",
+        "SE",
+        "SG",
+        "SH",
+        "SI",
+        "SJ",
+        "SK",
+        "SL",
+        "SM",
+        "SN",
+        "SO",
+        "SR",
+        "ST",
+        "SV",
+        "SY",
+        "SZ",
+        "TC",
+        "TD",
+        "TF",
+        "TG",
+        "TH",
+        "TJ",
+        "TM",
+        "TN",
+        "TO",
+        "TR",
+        "TT",
+        "TW",
+        "TZ",
+        "UA",
+        "UG",
+        "UM",
+        "US",
+        "UY",
+        "UZ",
+        "VA",
+        "VC",
+        "VE",
+        "VG",
+        "VI",
+        "VN",
+        "VU",
+        "WF",
+        "XK",
+        "YE",
+        "YT",
+        "ZA",
+        "ZM",
+        "ZW"
+      ]
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "afrikaans",
+      "ak": "akan",
+      "am": "amharic",
+      "ar": "arabic",
+      "ast": "asturian",
+      "az": "azerbaijani",
+      "be": "belarusian",
+      "bg": "bulgarian",
+      "bm": "bambara",
+      "bn": "bengali",
+      "bo": "tibetan",
+      "br": "breton",
+      "bs": "bosnian",
+      "ca": "catalan",
+      "cs": "czech",
+      "cv": "chuvash",
+      "cy": "welsh",
+      "da": "danish",
+      "de": "german",
+      "dsb": "lower sorbian",
+      "dz": "dzongkha",
+      "el": "greek",
+      "en": "english",
+      "eo": "esperanto",
+      "es": "spanish",
+      "et": "estonian",
+      "eu": "basque",
+      "fa": "persian",
+      "fi": "finnish",
+      "fil": "tagalog",
+      "fo": "faroese",
+      "fr": "french",
+      "ga": "irish",
+      "gd": "gaelic",
+      "gl": "galician",
+      "gsw": "swiss german",
+      "gu": "gujarati",
+      "gv": "manx",
+      "ha": "hausa",
+      "he": "hebrew",
+      "hi": "hindi",
+      "hr": "croatian",
+      "hsb": "upper sorbian",
+      "hu": "hungarian",
+      "hy": "armenian",
+      "id": "indonesian",
+      "is": "icelandic",
+      "it": "italian",
+      "ja": "japanese",
+      "jv": "javanese",
+      "ka": "georgian",
+      "kk": "kazakh",
+      "kl": "kalaallisut",
+      "km": "khmer",
+      "kn": "kannada",
+      "ko": "korean",
+      "ku": "kurdish",
+      "lb": "luxembourgish",
+      "ln": "lingala",
+      "lt": "lithuanian",
+      "lv": "latvian",
+      "mg": "malagasy",
+      "mk": "macedonian",
+      "ml": "malayalam",
+      "mn": "mongolian",
+      "mr": "marathi",
+      "ms": "malay",
+      "mt": "maltese",
+      "my": "burmese",
+      "nds": "low german",
+      "ne": "nepali",
+      "nl": "dutch",
+      "no": "norwegian",
+      "oc": "occitan",
+      "om": "oromo",
+      "os": "ossetian",
+      "pa": "panjabi",
+      "pl": "polish",
+      "pt": "portuguese",
+      "qu": "quechua",
+      "rm": "romansh",
+      "ro": "romanian",
+      "ru": "russian",
+      "rw": "kinyarwanda",
+      "sa": "sanskrit",
+      "sc": "sardinian",
+      "sd": "sindhi",
+      "si": "sinhala",
+      "sk": "slovak",
+      "sl": "slovenian",
+      "so": "somali",
+      "sq": "albanian",
+      "sr": "serbian",
+      "sv": "swedish",
+      "sw": "swahili",
+      "ta": "tamil",
+      "te": "telugu",
+      "tg": "tajik",
+      "th": "thai",
+      "tk": "turkmen",
+      "tr": "turkish",
+      "tt": "tatar",
+      "uk": "ukrainian",
+      "ur": "urdu",
+      "uz": "uzbek",
+      "vi": "vietnamese",
+      "wo": "wolof",
+      "xh": "xhosa",
+      "yi": "yiddish",
+      "yue": "cantonese",
+      "zh": "chinese",
+      "zh_Hans": "mandarin"
+    },
+    "regions": {}
+  },
   "sepiasearch": {
   "sepiasearch": {
     "all_locale": null,
     "all_locale": null,
     "custom": {},
     "custom": {},

+ 103 - 38
searx/engines/radio_browser.py

@@ -1,30 +1,57 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Radio browser (music)
+"""Search radio stations from RadioBrowser by `Advanced station search API`_.
+
+.. _Advanced station search API:
+   https://de1.api.radio-browser.info/#Advanced_station_search
+
 """
 """
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 import babel
 import babel
+from flask_babel import gettext
 
 
 from searx.network import get
 from searx.network import get
 from searx.enginelib.traits import EngineTraits
 from searx.enginelib.traits import EngineTraits
-from searx.locales import language_tag, region_tag
+from searx.locales import language_tag
 
 
 traits: EngineTraits
 traits: EngineTraits
 
 
 about = {
 about = {
     "website": 'https://www.radio-browser.info/',
     "website": 'https://www.radio-browser.info/',
+    "wikidata_id": 'Q111664849',
     "official_api_documentation": 'https://de1.api.radio-browser.info/',
     "official_api_documentation": 'https://de1.api.radio-browser.info/',
     "use_official_api": True,
     "use_official_api": True,
     "require_api_key": False,
     "require_api_key": False,
     "results": 'JSON',
     "results": 'JSON',
 }
 }
 paging = True
 paging = True
-categories = ['music']
+categories = ['music', 'radio']
 
 
 base_url = "https://de1.api.radio-browser.info"  # see https://api.radio-browser.info/ for all nodes
 base_url = "https://de1.api.radio-browser.info"  # see https://api.radio-browser.info/ for all nodes
 number_of_results = 10
 number_of_results = 10
 
 
+station_filters = []  # ['countrycode', 'language']
+"""A list of filters to be applied to the search of radio stations.  By default
+none filters are applied. Valid filters are:
+
+``language``
+  Filter stations by selected language.  For instance the ``de`` from ``:de-AU``
+  will be translated to `german` and used in the argument ``language=``.
+
+``countrycode``
+  Filter stations by selected country.  The 2-digit countrycode of the station
+  comes from the region the user selected.  For instance ``:de-AU`` will filter
+  out all stations not in ``AU``.
+
+.. note::
+
+   RadioBrowser has registered a lot of languages and countrycodes unknown to
+   :py:obj:`babel` and note that when searching for radio stations, users are
+   more likely to search by name than by region or language.
+
+"""
+
 
 
 def request(query, params):
 def request(query, params):
     args = {
     args = {
@@ -35,13 +62,17 @@ def request(query, params):
         'hidebroken': 'true',
         'hidebroken': 'true',
         'reverse': 'true',
         'reverse': 'true',
     }
     }
-    lang = traits.get_language(params['searxng_locale'], None)
-    if lang is not None:
-        args['language'] = lang
 
 
-    region = traits.get_region(params['searxng_locale'], None)
-    if region is not None:
-        args['countrycode'] = region.split('-')[1]
+    if 'language' in station_filters:
+        lang = traits.get_language(params['searxng_locale'])  # type: ignore
+        if lang:
+            args['language'] = lang
+
+    if 'countrycode' in station_filters:
+        if len(params['searxng_locale'].split('-')) > 1:
+            countrycode = params['searxng_locale'].split('-')[-1].upper()
+            if countrycode in traits.custom['countrycodes']:  # type: ignore
+                args['countrycode'] = countrycode
 
 
     params['url'] = f"{base_url}/json/stations/search?{urlencode(args)}"
     params['url'] = f"{base_url}/json/stations/search?{urlencode(args)}"
     return params
     return params
@@ -50,22 +81,43 @@ def request(query, params):
 def response(resp):
 def response(resp):
     results = []
     results = []
 
 
-    for result in resp.json():
+    json_resp = resp.json()
+
+    for result in json_resp:
         url = result['homepage']
         url = result['homepage']
         if not url:
         if not url:
             url = result['url_resolved']
             url = result['url_resolved']
 
 
+        content = []
+        tags = ', '.join(result.get('tags', '').split(','))
+        if tags:
+            content.append(tags)
+        for x in ['state', 'country']:
+            v = result.get(x)
+            if v:
+                v = str(v).strip()
+                content.append(v)
+
+        metadata = []
+        codec = result.get('codec')
+        if codec and codec.lower() != 'unknown':
+            metadata.append(f'{codec} ' + gettext('radio'))
+        for x, y in [
+            (gettext('bitrate'), 'bitrate'),
+            (gettext('votes'), 'votes'),
+            (gettext('clicks'), 'clickcount'),
+        ]:
+            v = result.get(y)
+            if v:
+                v = str(v).strip()
+                metadata.append(f"{x} {v}")
         results.append(
         results.append(
             {
             {
-                'template': 'videos.html',
                 'url': url,
                 'url': url,
                 'title': result['name'],
                 'title': result['name'],
-                'thumbnail': result.get('favicon', '').replace("http://", "https://"),
-                'content': result['country']
-                + " / "
-                + result["tags"]
-                + f" / {result['votes']} votes"
-                + f" / {result['clickcount']} clicks",
+                'img_src': result.get('favicon', '').replace("http://", "https://"),
+                'content': ' | '.join(content),
+                'metadata': ' | '.join(metadata),
                 'iframe_src': result['url_resolved'].replace("http://", "https://"),
                 'iframe_src': result['url_resolved'].replace("http://", "https://"),
             }
             }
         )
         )
@@ -74,38 +126,51 @@ def response(resp):
 
 
 
 
 def fetch_traits(engine_traits: EngineTraits):
 def fetch_traits(engine_traits: EngineTraits):
-    language_list = get(f'{base_url}/json/languages').json()
+    """Fetch languages and countrycodes from RadioBrowser
+
+    - ``traits.languages``: `list of languages API`_
+    - ``traits.custom['countrycodes']``: `list of countries API`_
+
+    .. _list of countries API: https://de1.api.radio-browser.info/#List_of_countries
+    .. _list of languages API: https://de1.api.radio-browser.info/#List_of_languages
+    """
+    # pylint: disable=import-outside-toplevel
+
+    from babel.core import get_global
+
+    babel_reg_list = get_global("territory_languages").keys()
 
 
-    country_list = get(f'{base_url}/json/countrycodes').json()
+    language_list = get(f'{base_url}/json/languages').json()  # type: ignore
+    country_list = get(f'{base_url}/json/countries').json()  # type: ignore
 
 
     for lang in language_list:
     for lang in language_list:
 
 
-        # the language doesn't have any iso code, and hence can't be parsed
-        if not lang['iso_639']:
+        babel_lang = lang.get('iso_639')
+        if not babel_lang:
+            # the language doesn't have any iso code, and hence can't be parsed
+            # print(f"ERROR: lang - no iso code in {lang}")
             continue
             continue
-
         try:
         try:
-            lang_tag = lang['iso_639']
-            sxng_tag = language_tag(babel.Locale.parse(lang_tag, sep="-"))
+            sxng_tag = language_tag(babel.Locale.parse(babel_lang, sep="-"))
         except babel.UnknownLocaleError:
         except babel.UnknownLocaleError:
-            print("ERROR: %s is unknown by babel" % lang_tag)
+            # print(f"ERROR: language tag {babel_lang} is unknown by babel")
             continue
             continue
 
 
+        eng_tag = lang['name']
         conflict = engine_traits.languages.get(sxng_tag)
         conflict = engine_traits.languages.get(sxng_tag)
         if conflict:
         if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
             continue
             continue
+        engine_traits.languages[sxng_tag] = eng_tag
 
 
-        engine_traits.languages[sxng_tag] = lang['name']
-
-        for region in country_list:
-            try:
-                reg_tag = f"{lang['iso_639']}-{region['name']}"
-                sxng_tag = region_tag(babel.Locale.parse(reg_tag, sep="-"))
-            except babel.UnknownLocaleError:
-                continue
-
-            conflict = engine_traits.regions.get(sxng_tag)
-            if conflict:
-                continue
+    countrycodes = set()
+    for region in country_list:
+        if region['iso_3166_1'] not in babel_reg_list:
+            print(f"ERROR: region tag {region['iso_3166_1']} is unknown by babel")
+            continue
+        countrycodes.add(region['iso_3166_1'])
 
 
-            engine_traits.regions[sxng_tag] = reg_tag
+    countrycodes = list(countrycodes)
+    countrycodes.sort()
+    engine_traits.custom['countrycodes'] = countrycodes