soundcloud.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Soundcloud (Music)
  4. """
  5. import re
  6. from json import loads
  7. from lxml import html
  8. from dateutil import parser
  9. from urllib.parse import quote_plus, urlencode
  10. from searx.network import get as http_get
  11. # about
  12. about = {
  13. "website": 'https://soundcloud.com',
  14. "wikidata_id": 'Q568769',
  15. "official_api_documentation": 'https://developers.soundcloud.com/',
  16. "use_official_api": True,
  17. "require_api_key": False,
  18. "results": 'JSON',
  19. }
  20. # engine dependent config
  21. categories = ['music']
  22. paging = True
  23. # search-url
  24. # missing attribute: user_id, app_version, app_locale
  25. url = 'https://api-v2.soundcloud.com/'
  26. search_url = (
  27. url + 'search?{query}'
  28. '&variant_ids='
  29. '&facet=model'
  30. '&limit=20'
  31. '&offset={offset}'
  32. '&linked_partitioning=1'
  33. '&client_id={client_id}'
  34. ) # noqa
  35. embedded_url = (
  36. '<iframe width="100%" height="166" '
  37. + 'scrolling="no" frameborder="no" '
  38. + 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
  39. )
  40. cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
  41. guest_client_id = ''
  42. def get_client_id():
  43. response = http_get("https://soundcloud.com")
  44. if response.ok:
  45. tree = html.fromstring(response.content)
  46. # script_tags has been moved from /assets/app/ to /assets/ path. I
  47. # found client_id in https://a-v2.sndcdn.com/assets/49-a0c01933-3.js
  48. script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
  49. app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
  50. # extracts valid app_js urls from soundcloud.com content
  51. for app_js_url in app_js_urls[::-1]:
  52. # gets app_js and searches for the clientid
  53. response = http_get(app_js_url)
  54. if response.ok:
  55. cids = cid_re.search(response.content.decode())
  56. if cids is not None and len(cids.groups()):
  57. return cids.groups()[0]
  58. logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
  59. return ""
  60. def init(engine_settings=None):
  61. global guest_client_id
  62. # api-key
  63. guest_client_id = get_client_id()
  64. # do search-request
  65. def request(query, params):
  66. offset = (params['pageno'] - 1) * 20
  67. params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset, client_id=guest_client_id)
  68. return params
  69. # get response from search-request
  70. def response(resp):
  71. results = []
  72. search_res = loads(resp.text)
  73. # parse results
  74. for result in search_res.get('collection', []):
  75. if result['kind'] in ('track', 'playlist'):
  76. title = result['title']
  77. content = result['description'] or ''
  78. publishedDate = parser.parse(result['last_modified'])
  79. uri = quote_plus(result['uri'])
  80. embedded = embedded_url.format(uri=uri)
  81. # append result
  82. results.append(
  83. {
  84. 'url': result['permalink_url'],
  85. 'title': title,
  86. 'publishedDate': publishedDate,
  87. 'embedded': embedded,
  88. 'content': content,
  89. }
  90. )
  91. # return results
  92. return results