yggtorrent.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Yggtorrent (Videos, Music, Files)
  4. """
  5. from lxml import html
  6. from operator import itemgetter
  7. from datetime import datetime
  8. from urllib.parse import quote
  9. from searx.utils import extract_text, get_torrent_size
  10. from searx.network import get as http_get
  11. # about
  12. about = {
  13. "website": 'https://www4.yggtorrent.li/',
  14. "wikidata_id": None,
  15. "official_api_documentation": None,
  16. "use_official_api": False,
  17. "require_api_key": False,
  18. "results": 'HTML',
  19. }
  20. # engine dependent config
  21. categories = ['files']
  22. paging = True
  23. # search-url
  24. url = 'https://www4.yggtorrent.li/'
  25. search_url = url + 'engine/search?name={search_term}&do=search&page={pageno}&category={search_type}'
  26. # yggtorrent specific type-definitions
  27. search_types = {'files': 'all',
  28. 'music': '2139',
  29. 'videos': '2145'}
  30. cookies = dict()
  31. def init(engine_settings=None):
  32. # initial cookies
  33. resp = http_get(url, allow_redirects=False)
  34. if resp.ok:
  35. for r in resp.history:
  36. cookies.update(r.cookies)
  37. cookies.update(resp.cookies)
  38. # do search-request
  39. def request(query, params):
  40. search_type = search_types.get(params['category'], 'all')
  41. pageno = (params['pageno'] - 1) * 50
  42. params['url'] = search_url.format(search_term=quote(query),
  43. search_type=search_type,
  44. pageno=pageno)
  45. params['cookies'] = cookies
  46. return params
  47. # get response from search-request
  48. def response(resp):
  49. results = []
  50. dom = html.fromstring(resp.text)
  51. search_res = dom.xpath('//section[@id="#torrents"]/div/table/tbody/tr')
  52. # return empty array if nothing is found
  53. if not search_res:
  54. return []
  55. # parse results
  56. for result in search_res:
  57. link = result.xpath('.//a[@id="torrent_name"]')[0]
  58. href = link.attrib.get('href')
  59. title = extract_text(link)
  60. seed = result.xpath('.//td[8]/text()')[0]
  61. leech = result.xpath('.//td[9]/text()')[0]
  62. # convert seed to int if possible
  63. if seed.isdigit():
  64. seed = int(seed)
  65. else:
  66. seed = 0
  67. # convert leech to int if possible
  68. if leech.isdigit():
  69. leech = int(leech)
  70. else:
  71. leech = 0
  72. params = {'url': href,
  73. 'title': title,
  74. 'seed': seed,
  75. 'leech': leech,
  76. 'template': 'torrent.html'}
  77. # let's try to calculate the torrent size
  78. try:
  79. filesize_info = result.xpath('.//td[6]/text()')[0]
  80. filesize = filesize_info[:-2]
  81. filesize_multiplier = filesize_info[-2:].lower()
  82. multiplier_french_to_english = {
  83. 'to': 'TiB',
  84. 'go': 'GiB',
  85. 'mo': 'MiB',
  86. 'ko': 'KiB'
  87. }
  88. filesize = get_torrent_size(filesize, multiplier_french_to_english[filesize_multiplier])
  89. params['filesize'] = filesize
  90. except:
  91. pass
  92. # extract and convert creation date
  93. try:
  94. date_ts = result.xpath('.//td[5]/div/text()')[0]
  95. date = datetime.fromtimestamp(float(date_ts))
  96. params['publishedDate'] = date
  97. except:
  98. pass
  99. # append result
  100. results.append(params)
  101. # return results sorted by seeder
  102. return sorted(results, key=itemgetter('seed'), reverse=True)