| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 | 
"""SoundCloud is a German audio streaming service."""from __future__ import annotationsimport reimport typingimport datetimefrom urllib.parse import quote_plus, urlencodefrom dateutil import parserfrom lxml import htmlfrom searx.network import get as http_getfrom searx.enginelib import EngineCacheif typing.TYPE_CHECKING:    import logging    logger: logging.Loggerabout = {    "website": "https://soundcloud.com",    "wikidata_id": "Q568769",    "official_api_documentation": "https://developers.soundcloud.com/docs/api/guide",    "use_official_api": False,    "require_api_key": False,    "results": 'JSON',}categories = ["music"]paging = Truesearch_url = "https://api-v2.soundcloud.com/search""""This is not the official (developer) url, it is the API which is used by theHTML frontend of the common WEB site."""cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)results_per_page = 10soundcloud_facet = "model"app_locale_map = {    "de": "de",    "en": "en",    "es": "es",    "fr": "fr",    "oc": "fr",    "it": "it",    "nl": "nl",    "pl": "pl",    "szl": "pl",    "pt": "pt_BR",    "pap": "pt_BR",    "sv": "sv",}CACHE: EngineCache"""Persistent (SQLite) key/value cache that deletes its values after ``expire``seconds."""def request(query, params):                guest_client_id = CACHE.get("guest_client_id")    if guest_client_id is None:        guest_client_id = get_client_id()        if guest_client_id:            CACHE.set(key="guest_client_id", value=guest_client_id)    args = {        "q": query,        "offset": (params['pageno'] - 1) * results_per_page,        "limit": results_per_page,        "facet": soundcloud_facet,        "client_id": guest_client_id,        "app_locale": app_locale_map.get(params["language"].split("-")[0], "en"),    }    params['url'] = f"{search_url}?{urlencode(args)}"    return paramsdef response(resp):    results = []    data = resp.json()    for result in data.get("collection", []):        if result["kind"] in ("track", "playlist"):            url = result.get("permalink_url")            if not url:                continue            uri = quote_plus(result.get("uri"))            content = [                result.get("description"),                result.get("label_name"),            ]            res = {                "url": url,                "title": result["title"],                "content": " / ".join([c for c in content if c]),                "publishedDate": parser.parse(result["last_modified"]),                "iframe_src": "https://w.soundcloud.com/player/?url=" + uri,                "views": result.get("likes_count"),            }            thumbnail = result["artwork_url"] or result["user"]["avatar_url"]            res["thumbnail"] = thumbnail or None            length = int(result.get("duration", 0) / 1000)            if length:                length = datetime.timedelta(seconds=length)                res["length"] = length            res["views"] = result.get("playback_count", 0) or None            res["author"] = result.get("user", {}).get("full_name") or None            results.append(res)    return resultsdef init(engine_settings):      global CACHE      CACHE = EngineCache(engine_settings["name"])  def get_client_id() -> str | None:    client_id = ""    url = "https://soundcloud.com"    resp = http_get(url, timeout=10)    if not resp.ok:        logger.error("init: GET %s failed", url)        return client_id    tree = html.fromstring(resp.content)    script_tags = tree.xpath("//script[contains(@src, '/assets/')]")    app_js_urls = [tag.get("src") for tag in script_tags if tag is not None]        for url in app_js_urls[::-1]:                resp = http_get(url)        if not resp.ok:            logger.error("init: app_js GET %s failed", url)            continue        cids = cid_re.search(resp.content.decode())        if cids and len(cids.groups()):            client_id = cids.groups()[0]            break    if client_id:        logger.info("using client_id '%s' for soundclud queries", client_id)    else:        logger.warning("missing valid client_id for soundclud queries")    return client_id or None
 |