torrentz.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. """
  2. Torrentz.eu (BitTorrent meta-search engine)
  3. @website https://torrentz.eu/
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change, although unlikely,
  8. see https://torrentz.eu/torrentz.btsearch)
  9. @parse url, title, publishedDate, seed, leech, filesize, magnetlink
  10. """
  11. import re
  12. from cgi import escape
  13. from urllib import urlencode
  14. from lxml import html
  15. from searx.engines.xpath import extract_text
  16. from datetime import datetime
  17. from searx.engines.nyaa import int_or_zero, get_filesize_mul
  18. # engine dependent config
  19. categories = ['files', 'videos', 'music']
  20. paging = True
  21. # search-url
  22. # https://torrentz.eu/search?f=EXAMPLE&p=6
  23. base_url = 'https://torrentz.eu/'
  24. search_url = base_url + 'search?{query}'
  25. # do search-request
  26. def request(query, params):
  27. page = params['pageno'] - 1
  28. query = urlencode({'q': query, 'p': page})
  29. params['url'] = search_url.format(query=query)
  30. return params
  31. # get response from search-request
  32. def response(resp):
  33. results = []
  34. dom = html.fromstring(resp.text)
  35. for result in dom.xpath('//div[@class="results"]/dl'):
  36. name_cell = result.xpath('./dt')[0]
  37. title = extract_text(name_cell)
  38. # skip rows that do not contain a link to a torrent
  39. links = name_cell.xpath('./a')
  40. if len(links) != 1:
  41. continue
  42. # extract url and remove a slash in the beginning
  43. link = links[0].attrib.get('href').lstrip('/')
  44. seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '')
  45. leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '')
  46. params = {
  47. 'url': base_url + link,
  48. 'title': title,
  49. 'seed': int_or_zero(seed),
  50. 'leech': int_or_zero(leech),
  51. 'template': 'torrent.html'
  52. }
  53. # let's try to calculate the torrent size
  54. try:
  55. size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
  56. size, suffix = size_str.split()
  57. params['filesize'] = int(size) * get_filesize_mul(suffix)
  58. except Exception as e:
  59. pass
  60. # does our link contain a valid SHA1 sum?
  61. if re.compile('[0-9a-fA-F]{40}').match(link):
  62. # add a magnet link to the result
  63. params['magnetlink'] = 'magnet:?xt=urn:btih:' + link
  64. # extract and convert creation date
  65. try:
  66. date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title')
  67. # Fri, 25 Mar 2016 16:29:01
  68. date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
  69. params['publishedDate'] = date
  70. except Exception as e:
  71. pass
  72. results.append(params)
  73. return results