torrentz.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. """
  2. Torrentz.eu (BitTorrent meta-search engine)
  3. @website https://torrentz.eu/
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change, although unlikely,
  8. see https://torrentz.eu/torrentz.btsearch)
  9. @parse url, title, publishedDate, seed, leech, filesize, magnetlink
  10. """
  11. import re
  12. from urllib import urlencode
  13. from lxml import html
  14. from searx.engines.xpath import extract_text
  15. from datetime import datetime
  16. from searx.engines.nyaa import int_or_zero, get_filesize_mul
  17. # engine dependent config
  18. categories = ['files', 'videos', 'music']
  19. paging = True
  20. # search-url
  21. # https://torrentz.eu/search?f=EXAMPLE&p=6
  22. base_url = 'https://torrentz.eu/'
  23. search_url = base_url + 'search?{query}'
  24. # do search-request
  25. def request(query, params):
  26. page = params['pageno'] - 1
  27. query = urlencode({'q': query, 'p': page})
  28. params['url'] = search_url.format(query=query)
  29. return params
  30. # get response from search-request
  31. def response(resp):
  32. results = []
  33. dom = html.fromstring(resp.text)
  34. for result in dom.xpath('//div[@class="results"]/dl'):
  35. name_cell = result.xpath('./dt')[0]
  36. title = extract_text(name_cell)
  37. # skip rows that do not contain a link to a torrent
  38. links = name_cell.xpath('./a')
  39. if len(links) != 1:
  40. continue
  41. # extract url and remove a slash in the beginning
  42. link = links[0].attrib.get('href').lstrip('/')
  43. seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '')
  44. leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '')
  45. params = {
  46. 'url': base_url + link,
  47. 'title': title,
  48. 'seed': int_or_zero(seed),
  49. 'leech': int_or_zero(leech),
  50. 'template': 'torrent.html'
  51. }
  52. # let's try to calculate the torrent size
  53. try:
  54. size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
  55. size, suffix = size_str.split()
  56. params['filesize'] = int(size) * get_filesize_mul(suffix)
  57. except Exception as e:
  58. pass
  59. # does our link contain a valid SHA1 sum?
  60. if re.compile('[0-9a-fA-F]{40}').match(link):
  61. # add a magnet link to the result
  62. params['magnetlink'] = 'magnet:?xt=urn:btih:' + link
  63. # extract and convert creation date
  64. try:
  65. date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title')
  66. # Fri, 25 Mar 2016 16:29:01
  67. date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
  68. params['publishedDate'] = date
  69. except Exception as e:
  70. pass
  71. results.append(params)
  72. return results