torrentz.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. """
  2. Torrentz2.is (BitTorrent meta-search engine)
  3. @website https://torrentz2.is/
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change, although unlikely,
  8. see https://torrentz.is/torrentz.btsearch)
  9. @parse url, title, publishedDate, seed, leech, filesize, magnetlink
  10. """
  11. import re
  12. from urllib.parse import urlencode
  13. from lxml import html
  14. from datetime import datetime
  15. from searx.engines.xpath import extract_text
  16. from searx.utils import get_torrent_size
  17. # engine dependent config
  18. categories = ['files', 'videos', 'music']
  19. paging = True
  20. # search-url
  21. # https://torrentz2.is/search?f=EXAMPLE&p=6
  22. base_url = 'https://torrentz2.is/'
  23. search_url = base_url + 'search?{query}'
  24. # do search-request
  25. def request(query, params):
  26. page = params['pageno'] - 1
  27. query = urlencode({'f': query, 'p': page})
  28. params['url'] = search_url.format(query=query)
  29. return params
  30. # get response from search-request
  31. def response(resp):
  32. results = []
  33. dom = html.fromstring(resp.text)
  34. for result in dom.xpath('//div[@class="results"]/dl'):
  35. name_cell = result.xpath('./dt')[0]
  36. title = extract_text(name_cell)
  37. # skip rows that do not contain a link to a torrent
  38. links = name_cell.xpath('./a')
  39. if len(links) != 1:
  40. continue
  41. # extract url and remove a slash in the beginning
  42. link = links[0].attrib.get('href').lstrip('/')
  43. seed = 0
  44. leech = 0
  45. try:
  46. seed = int(result.xpath('./dd/span[4]/text()')[0].replace(',', ''))
  47. leech = int(result.xpath('./dd/span[5]/text()')[0].replace(',', ''))
  48. except:
  49. pass
  50. params = {
  51. 'url': base_url + link,
  52. 'title': title,
  53. 'seed': seed,
  54. 'leech': leech,
  55. 'template': 'torrent.html'
  56. }
  57. # let's try to calculate the torrent size
  58. try:
  59. filesize_info = result.xpath('./dd/span[3]/text()')[0]
  60. filesize, filesize_multiplier = filesize_info.split()
  61. filesize = get_torrent_size(filesize, filesize_multiplier)
  62. params['filesize'] = filesize
  63. except:
  64. pass
  65. # does our link contain a valid SHA1 sum?
  66. if re.compile('[0-9a-fA-F]{40}').match(link):
  67. # add a magnet link to the result
  68. params['magnetlink'] = 'magnet:?xt=urn:btih:' + link
  69. # extract and convert creation date
  70. try:
  71. date_ts = result.xpath('./dd/span[2]')[0].attrib.get('title')
  72. date = datetime.fromtimestamp(float(date_ts))
  73. params['publishedDate'] = date
  74. except:
  75. pass
  76. results.append(params)
  77. return results