nyaa.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Nyaa.si (Anime Bittorrent tracker)
  3. """
  4. from urllib.parse import urlencode
  5. from lxml import html
  6. from searx.utils import (
  7. eval_xpath_getindex,
  8. extract_text,
  9. get_torrent_size,
  10. int_or_zero,
  11. )
  12. # about
  13. about = {
  14. "website": 'https://nyaa.si/',
  15. "wikidata_id": None,
  16. "official_api_documentation": None,
  17. "use_official_api": False,
  18. "require_api_key": False,
  19. "results": 'HTML',
  20. }
  21. # engine dependent config
  22. categories = ['files']
  23. paging = True
  24. # search-url
  25. base_url = 'https://nyaa.si/'
  26. # xpath queries
  27. xpath_results = '//table[contains(@class, "torrent-list")]//tr[not(th)]'
  28. xpath_category = './/td[1]/a[1]'
  29. xpath_title = './/td[2]/a[last()]'
  30. xpath_torrent_links = './/td[3]/a'
  31. xpath_filesize = './/td[4]/text()'
  32. xpath_seeds = './/td[6]/text()'
  33. xpath_leeches = './/td[7]/text()'
  34. xpath_downloads = './/td[8]/text()'
  35. # do search-request
  36. def request(query, params):
  37. args = urlencode(
  38. {
  39. 'q': query,
  40. 'p': params['pageno'],
  41. }
  42. )
  43. params['url'] = base_url + '?' + args #
  44. logger.debug("query_url --> %s", params['url'])
  45. return params
  46. # get response from search-request
  47. def response(resp):
  48. results = []
  49. dom = html.fromstring(resp.text)
  50. for result in dom.xpath(xpath_results):
  51. # defaults
  52. filesize = 0
  53. magnet_link = ""
  54. torrent_link = ""
  55. # category in which our torrent belongs
  56. category = eval_xpath_getindex(result, xpath_category, 0, '')
  57. if category:
  58. category = category.attrib.get('title')
  59. # torrent title
  60. page_a = result.xpath(xpath_title)[0]
  61. title = extract_text(page_a)
  62. # link to the page
  63. href = base_url + page_a.attrib.get('href')
  64. for link in result.xpath(xpath_torrent_links):
  65. url = link.attrib.get('href')
  66. if 'magnet' in url:
  67. # link to the magnet
  68. magnet_link = url
  69. else:
  70. # link to the torrent file
  71. torrent_link = url
  72. # seed count
  73. seed = int_or_zero(result.xpath(xpath_seeds))
  74. # leech count
  75. leech = int_or_zero(result.xpath(xpath_leeches))
  76. # torrent downloads count
  77. downloads = int_or_zero(result.xpath(xpath_downloads))
  78. # let's try to calculate the torrent size
  79. filesize = None
  80. filesize_info = eval_xpath_getindex(result, xpath_filesize, 0, '')
  81. if filesize_info:
  82. filesize_info = result.xpath(xpath_filesize)[0]
  83. filesize = get_torrent_size(*filesize_info.split())
  84. # content string contains all information not included into template
  85. content = 'Category: "{category}". Downloaded {downloads} times.'
  86. content = content.format(category=category, downloads=downloads)
  87. results.append(
  88. {
  89. 'url': href,
  90. 'title': title,
  91. 'content': content,
  92. 'seed': seed,
  93. 'leech': leech,
  94. 'filesize': filesize,
  95. 'torrentfile': torrent_link,
  96. 'magnetlink': magnet_link,
  97. 'template': 'torrent.html',
  98. }
  99. )
  100. return results