torznab.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """.. _torznab engine:
  4. ==============
  5. Torznab WebAPI
  6. ==============
  7. .. contents:: Contents
  8. :depth: 2
  9. :local:
  10. :backlinks: entry
  11. Torznab_ is an API specification that provides a standardized way to query
  12. torrent site for content. It is used by a number of torrent applications,
  13. including Prowlarr_ and Jackett_.
  14. Using this engine together with Prowlarr_ or Jackett_ allows you to search
  15. a huge number of torrent sites which are not directly supported.
  16. Configuration
  17. =============
  18. The engine has the following settings:
  19. ``base_url``:
  20. Torznab endpoint URL.
  21. ``api_key``:
  22. The API key to use for authentication.
  23. ``torznab_categories``:
  24. The categories to use for searching. This is a list of category IDs. See
  25. Prowlarr-categories_ or Jackett-categories_ for more information.
  26. ``show_torrent_files``:
  27. Whether to show the torrent file in the search results. Be carful as using
  28. this with Prowlarr_ or Jackett_ leaks the API key. This should be used only
  29. if you are querying a Torznab endpoint without authentication or if the
  30. instance is private. Be aware that private trackers may ban you if you share
  31. the torrent file. Defaults to ``false``.
  32. ``show_magnet_links``:
  33. Whether to show the magnet link in the search results. Be aware that private
  34. trackers may ban you if you share the magnet link. Defaults to ``true``.
  35. .. _Torznab:
  36. https://torznab.github.io/spec-1.3-draft/index.html
  37. .. _Prowlarr:
  38. https://github.com/Prowlarr/Prowlarr
  39. .. _Jackett:
  40. https://github.com/Jackett/Jackett
  41. .. _Prowlarr-categories:
  42. https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories
  43. .. _Jackett-categories:
  44. https://github.com/Jackett/Jackett/wiki/Jackett-Categories
  45. Implementations
  46. ===============
  47. """
  48. from __future__ import annotations
  49. from typing import TYPE_CHECKING
  50. from typing import List, Dict, Any
  51. from datetime import datetime
  52. from urllib.parse import quote
  53. from lxml import etree # type: ignore
  54. from searx.exceptions import SearxEngineAPIException
  55. if TYPE_CHECKING:
  56. import httpx
  57. import logging
  58. logger: logging.Logger
  59. # engine settings
  60. about: Dict[str, Any] = {
  61. "website": None,
  62. "wikidata_id": None,
  63. "official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
  64. "use_official_api": True,
  65. "require_api_key": False,
  66. "results": 'XML',
  67. }
  68. categories: List[str] = ['files']
  69. paging: bool = False
  70. time_range_support: bool = False
  71. # defined in settings.yml
  72. # example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
  73. base_url: str = ''
  74. api_key: str = ''
  75. # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
  76. torznab_categories: List[str] = []
  77. show_torrent_files: bool = False
  78. show_magnet_links: bool = True
  79. def init(engine_settings=None): # pylint: disable=unused-argument
  80. """Initialize the engine."""
  81. if len(base_url) < 1:
  82. raise ValueError('missing torznab base_url')
  83. def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
  84. """Build the request params."""
  85. search_url: str = base_url + '?t=search&q={search_query}'
  86. if len(api_key) > 0:
  87. search_url += '&apikey={api_key}'
  88. if len(torznab_categories) > 0:
  89. search_url += '&cat={torznab_categories}'
  90. params['url'] = search_url.format(
  91. search_query=quote(query), api_key=api_key, torznab_categories=",".join([str(x) for x in torznab_categories])
  92. )
  93. return params
  94. def response(resp: httpx.Response) -> List[Dict[str, Any]]:
  95. """Parse the XML response and return a list of results."""
  96. results = []
  97. search_results = etree.XML(resp.content)
  98. # handle errors: https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
  99. if search_results.tag == "error":
  100. raise SearxEngineAPIException(search_results.get("description"))
  101. channel: etree.Element = search_results[0]
  102. item: etree.Element
  103. for item in channel.iterfind('item'):
  104. result: Dict[str, Any] = build_result(item)
  105. results.append(result)
  106. return results
  107. def build_result(item: etree.Element) -> Dict[str, Any]:
  108. """Build a result from a XML item."""
  109. # extract attributes from XML
  110. # see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes
  111. enclosure: etree.Element | None = item.find('enclosure')
  112. enclosure_url: str | None = None
  113. if enclosure is not None:
  114. enclosure_url = enclosure.get('url')
  115. size = get_attribute(item, 'size')
  116. if not size and enclosure:
  117. size = enclosure.get('length')
  118. if size:
  119. size = int(size)
  120. guid = get_attribute(item, 'guid')
  121. comments = get_attribute(item, 'comments')
  122. pubDate = get_attribute(item, 'pubDate')
  123. seeders = get_torznab_attribute(item, 'seeders')
  124. leechers = get_torznab_attribute(item, 'leechers')
  125. peers = get_torznab_attribute(item, 'peers')
  126. # map attributes to searx result
  127. result: Dict[str, Any] = {
  128. 'template': 'torrent.html',
  129. 'title': get_attribute(item, 'title'),
  130. 'filesize': size,
  131. 'files': get_attribute(item, 'files'),
  132. 'seed': seeders,
  133. 'leech': _map_leechers(leechers, seeders, peers),
  134. 'url': _map_result_url(guid, comments),
  135. 'publishedDate': _map_published_date(pubDate),
  136. 'torrentfile': None,
  137. 'magnetlink': None,
  138. }
  139. link = get_attribute(item, 'link')
  140. if show_torrent_files:
  141. result['torrentfile'] = _map_torrent_file(link, enclosure_url)
  142. if show_magnet_links:
  143. magneturl = get_torznab_attribute(item, 'magneturl')
  144. result['magnetlink'] = _map_magnet_link(magneturl, guid, enclosure_url, link)
  145. return result
  146. def _map_result_url(guid: str | None, comments: str | None) -> str | None:
  147. if guid and guid.startswith('http'):
  148. return guid
  149. if comments and comments.startswith('http'):
  150. return comments
  151. return None
  152. def _map_leechers(leechers: str | None, seeders: str | None, peers: str | None) -> str | None:
  153. if leechers:
  154. return leechers
  155. if seeders and peers:
  156. return str(int(peers) - int(seeders))
  157. return None
  158. def _map_published_date(pubDate: str | None) -> datetime | None:
  159. if pubDate is not None:
  160. try:
  161. return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %z')
  162. except (ValueError, TypeError) as e:
  163. logger.debug("ignore exception (publishedDate): %s", e)
  164. return None
  165. def _map_torrent_file(link: str | None, enclosure_url: str | None) -> str | None:
  166. if link and link.startswith('http'):
  167. return link
  168. if enclosure_url and enclosure_url.startswith('http'):
  169. return enclosure_url
  170. return None
  171. def _map_magnet_link(
  172. magneturl: str | None,
  173. guid: str | None,
  174. enclosure_url: str | None,
  175. link: str | None,
  176. ) -> str | None:
  177. if magneturl and magneturl.startswith('magnet'):
  178. return magneturl
  179. if guid and guid.startswith('magnet'):
  180. return guid
  181. if enclosure_url and enclosure_url.startswith('magnet'):
  182. return enclosure_url
  183. if link and link.startswith('magnet'):
  184. return link
  185. return None
  186. def get_attribute(item: etree.Element, property_name: str) -> str | None:
  187. """Get attribute from item."""
  188. property_element: etree.Element | None = item.find(property_name)
  189. if property_element is not None:
  190. return property_element.text
  191. return None
  192. def get_torznab_attribute(item: etree.Element, attribute_name: str) -> str | None:
  193. """Get torznab special attribute from item."""
  194. element: etree.Element | None = item.find(
  195. './/torznab:attr[@name="{attribute_name}"]'.format(attribute_name=attribute_name),
  196. {'torznab': 'http://torznab.com/schemas/2015/feed'},
  197. )
  198. if element is not None:
  199. return element.get("value")
  200. return None