nyaa.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. """
  2. Nyaa.se (Anime Bittorrent tracker)
  3. @website http://www.nyaa.se/
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, content, seed, leech, torrentfile
  9. """
  10. from cgi import escape
  11. from urllib import urlencode
  12. from lxml import html
  13. from searx.engines.xpath import extract_text
  14. # engine dependent config
  15. categories = ['files', 'images', 'videos', 'music']
  16. paging = True
  17. # search-url
  18. base_url = 'http://www.nyaa.se/'
  19. search_url = base_url + '?page=search&{query}&offset={offset}'
  20. # xpath queries
  21. xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]'
  22. xpath_category = './/td[@class="tlisticon"]/a'
  23. xpath_title = './/td[@class="tlistname"]/a'
  24. xpath_torrent_file = './/td[@class="tlistdownload"]/a'
  25. xpath_filesize = './/td[@class="tlistsize"]/text()'
  26. xpath_seeds = './/td[@class="tlistsn"]/text()'
  27. xpath_leeches = './/td[@class="tlistln"]/text()'
  28. xpath_downloads = './/td[@class="tlistdn"]/text()'
  29. # convert a variable to integer or return 0 if it's not a number
  30. def int_or_zero(num):
  31. if isinstance(num, list):
  32. if len(num) < 1:
  33. return 0
  34. num = num[0]
  35. if num.isdigit():
  36. return int(num)
  37. return 0
  38. # do search-request
  39. def request(query, params):
  40. query = urlencode({'term': query})
  41. params['url'] = search_url.format(query=query, offset=params['pageno'])
  42. return params
  43. # get response from search-request
  44. def response(resp):
  45. results = []
  46. dom = html.fromstring(resp.text)
  47. for result in dom.xpath(xpath_results):
  48. # category in which our torrent belongs
  49. category = result.xpath(xpath_category)[0].attrib.get('title')
  50. # torrent title
  51. page_a = result.xpath(xpath_title)[0]
  52. title = escape(extract_text(page_a))
  53. # link to the page
  54. href = page_a.attrib.get('href')
  55. # link to the torrent file
  56. torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href')
  57. # torrent size
  58. try:
  59. file_size, suffix = result.xpath(xpath_filesize)[0].split(' ')
  60. # convert torrent size to bytes.
  61. # if there is no correct index in this dictionary,
  62. # the try block fails as it should
  63. multiplier = {
  64. 'KIB': 1024,
  65. 'MIB': 1024 ** 2,
  66. 'GIB': 1024 ** 3,
  67. 'TIB': 1024 ** 4
  68. }[suffix.upper()]
  69. file_size = int(float(file_size) * multiplier)
  70. except Exception as e:
  71. file_size = None
  72. # seed count
  73. seed = int_or_zero(result.xpath(xpath_seeds))
  74. # leech count
  75. leech = int_or_zero(result.xpath(xpath_leeches))
  76. # torrent downloads count
  77. downloads = int_or_zero(result.xpath(xpath_downloads))
  78. # content string contains all information not included into template
  79. content = 'Category: "{category}". Downloaded {downloads} times.'
  80. content = content.format(category=category, downloads=downloads)
  81. content = escape(content)
  82. results.append({'url': href,
  83. 'title': title,
  84. 'content': content,
  85. 'seed': seed,
  86. 'leech': leech,
  87. 'filesize': file_size,
  88. 'torrentfile': torrent_link,
  89. 'template': 'torrent.html'})
  90. return results