Browse Source

[mod] engine torznab - refactor & option to hide links

- torznab engine using types and clearer code
- torznab option to hide torrent and magnet links.
- document the torznab engine
- add myself to authors

Closes: https://github.com/searxng/searxng/issues/1124
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Paolo Basso 1 year ago
parent
commit
401561cb58
4 changed files with 191 additions and 73 deletions
  1. 1 0
      AUTHORS.rst
  2. 2 0
      docs/src/searx.engines.torznab.rst
  3. 179 67
      searx/engines/torznab.py
  4. 9 6
      searx/settings.yml

+ 1 - 0
AUTHORS.rst

@@ -168,3 +168,4 @@ features or generally made searx better:
 - Milad Laly @Milad-Laly
 - Milad Laly @Milad-Laly
 - @llmII
 - @llmII
 - @blob42 `<https://blob42.xyz>`_
 - @blob42 `<https://blob42.xyz>`_
+- Paolo Basso `<https://github.com/paolobasso99>`

+ 2 - 0
docs/src/searx.engines.torznab.rst

@@ -0,0 +1,2 @@
+.. automodule:: searx.engines.torznab
+   :members:

+ 179 - 67
searx/engines/torznab.py

@@ -1,21 +1,83 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # lint: pylint
-"""Torznab WebAPI
+""".. _torznab engine:
 
 
-A engine that implements the `torznab WebAPI`_.
+==============
+Torznab WebAPI
+==============
 
 
-.. _torznab WebAPI: https://torznab.github.io/spec-1.3-draft/torznab
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+Torznab_ is an API specification that provides a standardized way to query
+torrent site for content. It is used by a number of torrent applications,
+including Prowlarr_ and Jackett_.
+
+Using this engine together with Prowlarr_ or Jackett_ allows you to search
+a huge number of torrent sites which are not directly supported.
+
+Configuration
+=============
+
+The engine has the following settings:
+
+``base_url``:
+  Torznab endpoint URL.
+
+``api_key``:
+  The API key to use for authentication.
+
+``torznab_categories``:
+  The categories to use for searching. This is a list of category IDs.  See
+  Prowlarr-categories_ or Jackett-categories_ for more information.
+
+``show_torrent_files``:
+  Whether to show the torrent file in the search results.  Be carful as using
+  this with Prowlarr_ or Jackett_ leaks the API key.  This should be used only
+  if you are querying a Torznab endpoint without authentication or if the
+  instance is private.  Be aware that private trackers may ban you if you share
+  the torrent file.  Defaults to ``false``.
+
+``show_magnet_links``:
+  Whether to show the magnet link in the search results.  Be aware that private
+  trackers may ban you if you share the magnet link.  Defaults to ``true``.
+
+.. _Torznab:
+   https://torznab.github.io/spec-1.3-draft/index.html
+.. _Prowlarr:
+   https://github.com/Prowlarr/Prowlarr
+.. _Jackett:
+   https://github.com/Jackett/Jackett
+.. _Prowlarr-categories:
+   https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories
+.. _Jackett-categories:
+   https://github.com/Jackett/Jackett/wiki/Jackett-Categories
+
+
+Implementations
+===============
 
 
 """
 """
+from __future__ import annotations
+from typing import TYPE_CHECKING
 
 
+from typing import List, Dict, Any
 from datetime import datetime
 from datetime import datetime
 from urllib.parse import quote
 from urllib.parse import quote
-from lxml import etree
+from lxml import etree  # type: ignore
 
 
 from searx.exceptions import SearxEngineAPIException
 from searx.exceptions import SearxEngineAPIException
 
 
-# about
-about = {
+if TYPE_CHECKING:
+    import httpx
+    import logging
+
+    logger: logging.Logger
+
+# engine settings
+about: Dict[str, Any] = {
     "website": None,
     "website": None,
     "wikidata_id": None,
     "wikidata_id": None,
     "official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
     "official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
@@ -23,27 +85,30 @@ about = {
     "require_api_key": False,
     "require_api_key": False,
     "results": 'XML',
     "results": 'XML',
 }
 }
-
-categories = ['files']
-paging = False
-time_range_support = False
+categories: List[str] = ['files']
+paging: bool = False
+time_range_support: bool = False
 
 
 # defined in settings.yml
 # defined in settings.yml
 # example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
 # example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
-base_url = ''
-api_key = ''
+base_url: str = ''
+api_key: str = ''
 # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
 # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
-torznab_categories = []
+torznab_categories: List[str] = []
+show_torrent_files: bool = False
+show_magnet_links: bool = True
 
 
 
 
 def init(engine_settings=None):  # pylint: disable=unused-argument
 def init(engine_settings=None):  # pylint: disable=unused-argument
+    """Initialize the engine."""
     if len(base_url) < 1:
     if len(base_url) < 1:
         raise ValueError('missing torznab base_url')
         raise ValueError('missing torznab base_url')
 
 
 
 
-def request(query, params):
+def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
+    """Build the request params."""
+    search_url: str = base_url + '?t=search&q={search_query}'
 
 
-    search_url = base_url + '?t=search&q={search_query}'
     if len(api_key) > 0:
     if len(api_key) > 0:
         search_url += '&apikey={api_key}'
         search_url += '&apikey={api_key}'
     if len(torznab_categories) > 0:
     if len(torznab_categories) > 0:
@@ -56,88 +121,135 @@ def request(query, params):
     return params
     return params
 
 
 
 
-def response(resp):
+def response(resp: httpx.Response) -> List[Dict[str, Any]]:
+    """Parse the XML response and return a list of results."""
     results = []
     results = []
-
     search_results = etree.XML(resp.content)
     search_results = etree.XML(resp.content)
 
 
-    # handle errors
-    # https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
+    # handle errors:  https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
     if search_results.tag == "error":
     if search_results.tag == "error":
         raise SearxEngineAPIException(search_results.get("description"))
         raise SearxEngineAPIException(search_results.get("description"))
 
 
-    for item in search_results[0].iterfind('item'):
-        result = {'template': 'torrent.html'}
+    channel: etree.Element = search_results[0]
 
 
-        enclosure = item.find('enclosure')
+    item: etree.Element
+    for item in channel.iterfind('item'):
+        result: Dict[str, Any] = build_result(item)
+        results.append(result)
 
 
-        result["filesize"] = int(enclosure.get('length'))
+    return results
 
 
-        link = get_property(item, 'link')
-        guid = get_property(item, 'guid')
-        comments = get_property(item, 'comments')
 
 
-        # define url
-        result["url"] = enclosure.get('url')
-        if comments is not None and comments.startswith('http'):
-            result["url"] = comments
-        elif guid is not None and guid.startswith('http'):
-            result["url"] = guid
+def build_result(item: etree.Element) -> Dict[str, Any]:
+    """Build a result from a XML item."""
+
+    # extract attributes from XML
+    # see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes
+    enclosure: etree.Element | None = item.find('enclosure')
+    enclosure_url: str | None = None
+    if enclosure is not None:
+        enclosure_url = enclosure.get('url')
+
+    size = get_attribute(item, 'size')
+    if not size and enclosure:
+        size = enclosure.get('length')
+    if size:
+        size = int(size)
+
+    guid = get_attribute(item, 'guid')
+    comments = get_attribute(item, 'comments')
+    pubDate = get_attribute(item, 'pubDate')
+    seeders = get_torznab_attribute(item, 'seeders')
+    leechers = get_torznab_attribute(item, 'leechers')
+    peers = get_torznab_attribute(item, 'peers')
+
+    # map attributes to searx result
+    result: Dict[str, Any] = {
+        'template': 'torrent.html',
+        'title': get_attribute(item, 'title'),
+        'filesize': size,
+        'files': get_attribute(item, 'files'),
+        'seed': seeders,
+        'leech': _map_leechers(leechers, seeders, peers),
+        'url': _map_result_url(guid, comments),
+        'publishedDate': _map_published_date(pubDate),
+        'torrentfile': None,
+        'magnetlink': None,
+    }
+
+    link = get_attribute(item, 'link')
+    if show_torrent_files:
+        result['torrentfile'] = _map_torrent_file(link, enclosure_url)
+    if show_magnet_links:
+        magneturl = get_torznab_attribute(item, 'magneturl')
+        result['magnetlink'] = _map_magnet_link(magneturl, guid, enclosure_url, link)
+    return result
+
+
+def _map_result_url(guid: str | None, comments: str | None) -> str | None:
+    if guid and guid.startswith('http'):
+        return guid
+    if comments and comments.startswith('http'):
+        return comments
+    return None
 
 
-        # define torrent file url
-        result["torrentfile"] = None
-        if enclosure.get('url').startswith("http"):
-            result["torrentfile"] = enclosure.get('url')
-        elif link is not None and link.startswith('http'):
-            result["torrentfile"] = link
 
 
-        # define magnet link
-        result["magnetlink"] = get_torznab_attr(item, 'magneturl')
-        if result["magnetlink"] is None:
-            if enclosure.get('url').startswith("magnet"):
-                result["magnetlink"] = enclosure.get('url')
-            elif link is not None and link.startswith('magnet'):
-                result["magnetlink"] = link
+def _map_leechers(leechers: str | None, seeders: str | None, peers: str | None) -> str | None:
+    if leechers:
+        return leechers
+    if seeders and peers:
+        return str(int(peers) - int(seeders))
+    return None
 
 
-        result["title"] = get_property(item, 'title')
-        result["files"] = get_property(item, 'files')
 
 
-        result["publishedDate"] = None
+def _map_published_date(pubDate: str | None) -> datetime | None:
+    if pubDate is not None:
         try:
         try:
-            result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z')
+            return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %z')
         except (ValueError, TypeError) as e:
         except (ValueError, TypeError) as e:
             logger.debug("ignore exception (publishedDate): %s", e)
             logger.debug("ignore exception (publishedDate): %s", e)
+    return None
 
 
-        result["seed"] = get_torznab_attr(item, 'seeders')
-
-        # define leech
-        result["leech"] = get_torznab_attr(item, 'leechers')
-        if result["leech"] is None and result["seed"] is not None:
-            peers = get_torznab_attr(item, 'peers')
-            if peers is not None:
-                result["leech"] = int(peers) - int(result["seed"])
 
 
-        results.append(result)
+def _map_torrent_file(link: str | None, enclosure_url: str | None) -> str | None:
+    if link and link.startswith('http'):
+        return link
+    if enclosure_url and enclosure_url.startswith('http'):
+        return enclosure_url
+    return None
 
 
-    return results
 
 
+def _map_magnet_link(
+    magneturl: str | None,
+    guid: str | None,
+    enclosure_url: str | None,
+    link: str | None,
+) -> str | None:
+    if magneturl and magneturl.startswith('magnet'):
+        return magneturl
+    if guid and guid.startswith('magnet'):
+        return guid
+    if enclosure_url and enclosure_url.startswith('magnet'):
+        return enclosure_url
+    if link and link.startswith('magnet'):
+        return link
+    return None
 
 
-def get_property(item, property_name):
-    property_element = item.find(property_name)
 
 
+def get_attribute(item: etree.Element, property_name: str) -> str | None:
+    """Get attribute from item."""
+    property_element: etree.Element | None = item.find(property_name)
     if property_element is not None:
     if property_element is not None:
         return property_element.text
         return property_element.text
-
     return None
     return None
 
 
 
 
-def get_torznab_attr(item, attr_name):
-    element = item.find(
-        './/torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name),
+def get_torznab_attribute(item: etree.Element, attribute_name: str) -> str | None:
+    """Get torznab special attribute from item."""
+    element: etree.Element | None = item.find(
+        './/torznab:attr[@name="{attribute_name}"]'.format(attribute_name=attribute_name),
         {'torznab': 'http://torznab.com/schemas/2015/feed'},
         {'torznab': 'http://torznab.com/schemas/2015/feed'},
     )
     )
-
     if element is not None:
     if element is not None:
         return element.get("value")
         return element.get("value")
-
     return None
     return None

+ 9 - 6
searx/settings.yml

@@ -1392,15 +1392,18 @@ engines:
     shortcut: tch
     shortcut: tch
 
 
   # torznab engine lets you query any torznab compatible indexer.  Using this
   # torznab engine lets you query any torznab compatible indexer.  Using this
-  # engine in combination with Jackett (https://github.com/Jackett/Jackett)
-  # opens the possibility to query a lot of public and private indexers directly
-  # from SearXNG.
-  # - name: torznab
+  # engine in combination with Jackett opens the possibility to query a lot of
+  # public and private indexers directly from SearXNG. More details at:
+  # https://docs.searxng.org/src/searx.engines.torznab.html
+  #
+  # - name: Torznab EZTV
   #   engine: torznab
   #   engine: torznab
-  #   shortcut: trz
-  #   base_url: http://localhost:9117/api/v2.0/indexers/all/results/torznab
+  #   shortcut: eztv
+  #   base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab
   #   enable_http: true  # if using localhost
   #   enable_http: true  # if using localhost
   #   api_key: xxxxxxxxxxxxxxx
   #   api_key: xxxxxxxxxxxxxxx
+  #   show_magnet_links: true
+  #   show_torrent_files: false
   #   # https://github.com/Jackett/Jackett/wiki/Jackett-Categories
   #   # https://github.com/Jackett/Jackett/wiki/Jackett-Categories
   #   torznab_categories:  # optional
   #   torznab_categories:  # optional
   #     - 2000
   #     - 2000