kickass.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. ## Kickass Torrent (Videos, Music, Files)
  2. #
  3. # @website https://kickass.so
  4. # @provide-api no (nothing found)
  5. #
  6. # @using-api no
  7. # @results HTML (using search portal)
  8. # @stable yes (HTML can change)
  9. # @parse url, title, content, seed, leech, magnetlink
  10. from urlparse import urljoin
  11. from cgi import escape
  12. from urllib import quote
  13. from lxml import html
  14. from operator import itemgetter
  15. from searx.engines.xpath import extract_text
  16. # engine dependent config
  17. categories = ['videos', 'music', 'files']
  18. paging = True
  19. # search-url
  20. url = 'https://kickass.to/'
  21. search_url = url + 'search/{search_term}/{pageno}/'
  22. # specific xpath variables
  23. magnet_xpath = './/a[@title="Torrent magnet link"]'
  24. torrent_xpath = './/a[@title="Download torrent file"]'
  25. content_xpath = './/span[@class="font11px lightgrey block"]'
  26. # do search-request
  27. def request(query, params):
  28. params['url'] = search_url.format(search_term=quote(query),
  29. pageno=params['pageno'])
  30. # FIX: SSLError: hostname 'kickass.so'
  31. # doesn't match either of '*.kickass.to', 'kickass.to'
  32. params['verify'] = False
  33. return params
  34. # get response from search-request
  35. def response(resp):
  36. results = []
  37. # check if redirect comparing to the True value,
  38. # because resp can be a Mock object, and any attribut name returns something.
  39. if resp.is_redirect is True:
  40. return results
  41. dom = html.fromstring(resp.text)
  42. search_res = dom.xpath('//table[@class="data"]//tr')
  43. # return empty array if nothing is found
  44. if not search_res:
  45. return []
  46. # parse results
  47. for result in search_res[1:]:
  48. link = result.xpath('.//a[@class="cellMainLink"]')[0]
  49. href = urljoin(url, link.attrib['href'])
  50. title = extract_text(link)
  51. content = escape(extract_text(result.xpath(content_xpath)))
  52. seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
  53. leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
  54. filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0]
  55. filesize_multiplier = result.xpath('.//td[contains(@class, "nobr")]//span/text()')[0]
  56. files = result.xpath('.//td[contains(@class, "center")][2]/text()')[0]
  57. # convert seed to int if possible
  58. if seed.isdigit():
  59. seed = int(seed)
  60. else:
  61. seed = 0
  62. # convert leech to int if possible
  63. if leech.isdigit():
  64. leech = int(leech)
  65. else:
  66. leech = 0
  67. # convert filesize to byte if possible
  68. try:
  69. filesize = float(filesize)
  70. # convert filesize to byte
  71. if filesize_multiplier == 'TB':
  72. filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
  73. elif filesize_multiplier == 'GB':
  74. filesize = int(filesize * 1024 * 1024 * 1024)
  75. elif filesize_multiplier == 'MB':
  76. filesize = int(filesize * 1024 * 1024)
  77. elif filesize_multiplier == 'KB':
  78. filesize = int(filesize * 1024)
  79. except:
  80. filesize = None
  81. # convert files to int if possible
  82. if files.isdigit():
  83. files = int(files)
  84. else:
  85. files = None
  86. magnetlink = result.xpath(magnet_xpath)[0].attrib['href']
  87. torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
  88. torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")
  89. # append result
  90. results.append({'url': href,
  91. 'title': title,
  92. 'content': content,
  93. 'seed': seed,
  94. 'leech': leech,
  95. 'filesize': filesize,
  96. 'files': files,
  97. 'magnetlink': magnetlink,
  98. 'torrentfile': torrentfileurl,
  99. 'template': 'torrent.html'})
  100. # return results sorted by seeder
  101. return sorted(results, key=itemgetter('seed'), reverse=True)