Browse Source

[fix] solidtorrents engine: JSON API no longer exists

The API endpoint, we where using does not exist anymore.  This patch is a
rewrite that parses the HTML page.

Related: https://github.com/paulgoio/searxng/issues/17
Closes: https://github.com/searxng/searxng/issues/858

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
d92b3d96fd
2 changed files with 65 additions and 26 deletions
  1. 59 25
      searx/engines/solidtorrents.py
  2. 6 1
      searx/settings.yml

+ 59 - 25
searx/engines/solidtorrents.py

@@ -1,51 +1,85 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Solid Torrents
-
+"""SolidTorrents
 """
 
-from json import loads
+from datetime import datetime
 from urllib.parse import urlencode
+import random
+
+from lxml import html
+
+from searx.utils import extract_text, eval_xpath, eval_xpath_getindex
 
 about = {
     "website": 'https://www.solidtorrents.net/',
     "wikidata_id": None,
     "official_api_documentation": None,
-    "use_official_api": True,
+    "use_official_api": False,
     "require_api_key": False,
-    "results": 'JSON',
+    "results": 'HTML',
 }
 
 categories = ['files']
 paging = True
 
-base_url = 'https://www.solidtorrents.net/'
-search_url = base_url + 'api/v1/search?{query}'
+base_url = ''
+base_url_rand = ''
+
+units = {"B": 1, "KB": 2 ** 10, "MB": 2 ** 20, "GB": 2 ** 30, "TB": 2 ** 40}
+
+
+def size2int(size_str):
+    n, u = size_str.split()
+    return int(float(n.strip()) * units[u.strip()])
 
 
 def request(query, params):
-    skip = (params['pageno'] - 1) * 20
-    query = urlencode({'q': query, 'skip': skip})
+    global base_url_rand  # pylint: disable=global-statement
+    if isinstance(base_url, list):
+        base_url_rand = random.choice(base_url)
+    else:
+        base_url_rand = base_url
+    search_url = base_url_rand + '/search?{query}'
+    page = (params['pageno'] - 1) * 20
+    query = urlencode({'q': query, 'page': page})
     params['url'] = search_url.format(query=query)
-    logger.debug("query_url --> %s", params['url'])
     return params
 
 
 def response(resp):
     results = []
-    search_results = loads(resp.text)
-
-    for result in search_results["results"]:
-        results.append(
-            {
-                'infohash': result["infohash"],
-                'seed': result["swarm"]["seeders"],
-                'leech': result["swarm"]["leechers"],
-                'title': result["title"],
-                'url': "https://solidtorrents.net/view/" + result["_id"],
-                'filesize': result["size"],
-                'magnetlink': result["magnet"],
-                'template': "torrent.html",
-            }
-        )
+    dom = html.fromstring(resp.text)
+
+    for result in eval_xpath(dom, '//div[contains(@class, "search-result")]'):
+        a = eval_xpath_getindex(result, './div/h5/a', 0, None)
+        if a is None:
+            continue
+        title = extract_text(a)
+        url = eval_xpath_getindex(a, '@href', 0, None)
+        stats = eval_xpath(result, './div//div[contains(@class, "stats")]/div')
+        filesize = size2int(extract_text(stats[1]))
+        leech = extract_text(stats[2])
+        seed = extract_text(stats[3])
+        magnet = eval_xpath_getindex(result, './div//a[contains(@class, "dl-magnet")]/@href', 0, None)
+
+        params = {
+            'seed': seed,
+            'leech': leech,
+            'title': title,
+            'url': base_url_rand + url,
+            'filesize': filesize,
+            'magnetlink': magnet,
+            'template': "torrent.html",
+        }
+
+        date_str = extract_text(stats[4])
+
+        try:
+            params['publishedDate'] = datetime.strptime(date_str, '%b %d, %Y')
+        except ValueError:
+            pass
+
+        results.append(params)
+
     return results

+ 6 - 1
searx/settings.yml

@@ -1257,8 +1257,13 @@ engines:
   - name: solidtorrents
     engine: solidtorrents
     shortcut: solid
-    timeout: 3.0
+    timeout: 4.0
     disabled: false
+    base_url:
+      - https://solidtorrents.net
+      - https://solidtorrents.eu
+      - https://solidtorrents.to
+      - https://bitsearch.to
 
   # For this demo of the sqlite engine download:
   #   https://liste.mediathekview.de/filmliste-v2.db.bz2