nyaa.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Nyaa.si (Anime Bittorrent tracker)
  4. """
  5. from urllib.parse import urlencode
  6. from lxml import html
  7. from searx.utils import (
  8. eval_xpath_getindex,
  9. extract_text,
  10. get_torrent_size,
  11. int_or_zero,
  12. )
  13. # about
  14. about = {
  15. "website": 'https://nyaa.si/',
  16. "wikidata_id": None,
  17. "official_api_documentation": None,
  18. "use_official_api": False,
  19. "require_api_key": False,
  20. "results": 'HTML',
  21. }
  22. # engine dependent config
  23. categories = ['files']
  24. paging = True
  25. # search-url
  26. base_url = 'https://nyaa.si/'
  27. # xpath queries
  28. xpath_results = '//table[contains(@class, "torrent-list")]//tr[not(th)]'
  29. xpath_category = './/td[1]/a[1]'
  30. xpath_title = './/td[2]/a[last()]'
  31. xpath_torrent_links = './/td[3]/a'
  32. xpath_filesize = './/td[4]/text()'
  33. xpath_seeds = './/td[6]/text()'
  34. xpath_leeches = './/td[7]/text()'
  35. xpath_downloads = './/td[8]/text()'
  36. # do search-request
  37. def request(query, params):
  38. args = urlencode(
  39. {
  40. 'q': query,
  41. 'p': params['pageno'],
  42. }
  43. )
  44. params['url'] = base_url + '?' + args #
  45. logger.debug("query_url --> %s", params['url'])
  46. return params
  47. # get response from search-request
  48. def response(resp):
  49. results = []
  50. dom = html.fromstring(resp.text)
  51. for result in dom.xpath(xpath_results):
  52. # defaults
  53. filesize = 0
  54. magnet_link = ""
  55. torrent_link = ""
  56. # category in which our torrent belongs
  57. category = eval_xpath_getindex(result, xpath_category, 0, '')
  58. if category:
  59. category = category.attrib.get('title')
  60. # torrent title
  61. page_a = result.xpath(xpath_title)[0]
  62. title = extract_text(page_a)
  63. # link to the page
  64. href = base_url + page_a.attrib.get('href')
  65. for link in result.xpath(xpath_torrent_links):
  66. url = link.attrib.get('href')
  67. if 'magnet' in url:
  68. # link to the magnet
  69. magnet_link = url
  70. else:
  71. # link to the torrent file
  72. torrent_link = url
  73. # seed count
  74. seed = int_or_zero(result.xpath(xpath_seeds))
  75. # leech count
  76. leech = int_or_zero(result.xpath(xpath_leeches))
  77. # torrent downloads count
  78. downloads = int_or_zero(result.xpath(xpath_downloads))
  79. # let's try to calculate the torrent size
  80. filesize = None
  81. filesize_info = eval_xpath_getindex(result, xpath_filesize, 0, '')
  82. if filesize_info:
  83. filesize_info = result.xpath(xpath_filesize)[0]
  84. filesize = get_torrent_size(*filesize_info.split())
  85. # content string contains all information not included into template
  86. content = 'Category: "{category}". Downloaded {downloads} times.'
  87. content = content.format(category=category, downloads=downloads)
  88. results.append(
  89. {
  90. 'url': href,
  91. 'title': title,
  92. 'content': content,
  93. 'seed': seed,
  94. 'leech': leech,
  95. 'filesize': filesize,
  96. 'torrentfile': torrent_link,
  97. 'magnetlink': magnet_link,
  98. 'template': 'torrent.html',
  99. }
  100. )
  101. return results