nyaa.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. """
  2. Nyaa.se (Anime Bittorrent tracker)
  3. @website http://www.nyaa.se/
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, content, seed, leech, torrentfile
  9. """
  10. from cgi import escape
  11. from urllib import urlencode
  12. from lxml import html
  13. from searx.engines.xpath import extract_text
  14. # engine dependent config
  15. categories = ['files', 'images', 'videos', 'music']
  16. paging = True
  17. # search-url
  18. base_url = 'http://www.nyaa.se/'
  19. search_url = base_url + '?page=search&{query}&offset={offset}'
  20. # xpath queries
  21. xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]'
  22. xpath_category = './/td[@class="tlisticon"]/a'
  23. xpath_title = './/td[@class="tlistname"]/a'
  24. xpath_torrent_file = './/td[@class="tlistdownload"]/a'
  25. xpath_filesize = './/td[@class="tlistsize"]/text()'
  26. xpath_seeds = './/td[@class="tlistsn"]/text()'
  27. xpath_leeches = './/td[@class="tlistln"]/text()'
  28. xpath_downloads = './/td[@class="tlistdn"]/text()'
  29. # convert a variable to integer or return 0 if it's not a number
  30. def int_or_zero(num):
  31. if isinstance(num, list):
  32. if len(num) < 1:
  33. return 0
  34. num = num[0]
  35. if num.isdigit():
  36. return int(num)
  37. return 0
  38. # get multiplier to convert torrent size to bytes
  39. def get_filesize_mul(suffix):
  40. return {
  41. 'KB': 1024,
  42. 'MB': 1024 ** 2,
  43. 'GB': 1024 ** 3,
  44. 'TB': 1024 ** 4,
  45. 'KIB': 1024,
  46. 'MIB': 1024 ** 2,
  47. 'GIB': 1024 ** 3,
  48. 'TIB': 1024 ** 4
  49. }[str(suffix).upper()]
  50. # do search-request
  51. def request(query, params):
  52. query = urlencode({'term': query})
  53. params['url'] = search_url.format(query=query, offset=params['pageno'])
  54. return params
  55. # get response from search-request
  56. def response(resp):
  57. results = []
  58. dom = html.fromstring(resp.text)
  59. for result in dom.xpath(xpath_results):
  60. # category in which our torrent belongs
  61. category = result.xpath(xpath_category)[0].attrib.get('title')
  62. # torrent title
  63. page_a = result.xpath(xpath_title)[0]
  64. title = escape(extract_text(page_a))
  65. # link to the page
  66. href = page_a.attrib.get('href')
  67. # link to the torrent file
  68. torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href')
  69. # torrent size
  70. try:
  71. file_size, suffix = result.xpath(xpath_filesize)[0].split(' ')
  72. file_size = int(float(file_size) * get_filesize_mul(suffix))
  73. except Exception as e:
  74. file_size = None
  75. # seed count
  76. seed = int_or_zero(result.xpath(xpath_seeds))
  77. # leech count
  78. leech = int_or_zero(result.xpath(xpath_leeches))
  79. # torrent downloads count
  80. downloads = int_or_zero(result.xpath(xpath_downloads))
  81. # content string contains all information not included into template
  82. content = 'Category: "{category}". Downloaded {downloads} times.'
  83. content = content.format(category=category, downloads=downloads)
  84. content = escape(content)
  85. results.append({'url': href,
  86. 'title': title,
  87. 'content': content,
  88. 'seed': seed,
  89. 'leech': leech,
  90. 'filesize': file_size,
  91. 'torrentfile': torrent_link,
  92. 'template': 'torrent.html'})
  93. return results