Browse Source

Merge pull request #862 from return42/fix-858

[fix] solidtorrents engine - JSON API no longer exists
Markus Heiser 3 years ago
parent
commit
ae804ddf40
2 changed files with 69 additions and 26 deletions
  1. 63 25
      searx/engines/solidtorrents.py
  2. 6 1
      searx/settings.yml

+ 63 - 25
searx/engines/solidtorrents.py

@@ -1,51 +1,89 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Solid Torrents
-
+"""SolidTorrents
 """
 
-from json import loads
+from datetime import datetime
 from urllib.parse import urlencode
+import random
+
+from lxml import html
+
+from searx.utils import (
+    extract_text,
+    eval_xpath,
+    eval_xpath_getindex,
+    eval_xpath_list,
+    get_torrent_size,
+)
 
 about = {
     "website": 'https://www.solidtorrents.net/',
     "wikidata_id": None,
     "official_api_documentation": None,
-    "use_official_api": True,
+    "use_official_api": False,
     "require_api_key": False,
-    "results": 'JSON',
+    "results": 'HTML',
 }
 
 categories = ['files']
 paging = True
 
-base_url = 'https://www.solidtorrents.net/'
-search_url = base_url + 'api/v1/search?{query}'
+# base_url can be overwritten by a list of URLs in the settings.yml
+base_url = 'https://solidtorrents.net'
 
 
 def request(query, params):
-    skip = (params['pageno'] - 1) * 20
-    query = urlencode({'q': query, 'skip': skip})
+    if isinstance(base_url, list):
+        params['base_url'] = random.choice(base_url)
+    else:
+        params['base_url'] = base_url
+    search_url = params['base_url'] + '/search?{query}'
+    page = (params['pageno'] - 1) * 20
+    query = urlencode({'q': query, 'page': page})
     params['url'] = search_url.format(query=query)
-    logger.debug("query_url --> %s", params['url'])
     return params
 
 
 def response(resp):
     results = []
-    search_results = loads(resp.text)
-
-    for result in search_results["results"]:
-        results.append(
-            {
-                'infohash': result["infohash"],
-                'seed': result["swarm"]["seeders"],
-                'leech': result["swarm"]["leechers"],
-                'title': result["title"],
-                'url': "https://solidtorrents.net/view/" + result["_id"],
-                'filesize': result["size"],
-                'magnetlink': result["magnet"],
-                'template': "torrent.html",
-            }
-        )
+    dom = html.fromstring(resp.text)
+
+    for result in eval_xpath(dom, '//div[contains(@class, "search-result")]'):
+        a = eval_xpath_getindex(result, './div/h5/a', 0, None)
+        if a is None:
+            continue
+        title = extract_text(a)
+        url = eval_xpath_getindex(a, '@href', 0, None)
+        categ = eval_xpath(result, './div//a[contains(@class, "category")]')
+        metadata = extract_text(categ)
+        stats = eval_xpath_list(result, './div//div[contains(@class, "stats")]/div', min_len=5)
+        n, u = extract_text(stats[1]).split()
+        filesize = get_torrent_size(n, u)
+        leech = extract_text(stats[2])
+        seed = extract_text(stats[3])
+        torrentfile = eval_xpath_getindex(result, './div//a[contains(@class, "dl-torrent")]/@href', 0, None)
+        magnet = eval_xpath_getindex(result, './div//a[contains(@class, "dl-magnet")]/@href', 0, None)
+
+        params = {
+            'seed': seed,
+            'leech': leech,
+            'title': title,
+            'url': resp.search_params['base_url'] + url,
+            'filesize': filesize,
+            'magnetlink': magnet,
+            'torrentfile': torrentfile,
+            'metadata': metadata,
+            'template': "torrent.html",
+        }
+
+        date_str = extract_text(stats[4])
+
+        try:
+            params['publishedDate'] = datetime.strptime(date_str, '%b %d, %Y')
+        except ValueError:
+            pass
+
+        results.append(params)
+
     return results

+ 6 - 1
searx/settings.yml

@@ -1257,8 +1257,13 @@ engines:
   - name: solidtorrents
     engine: solidtorrents
     shortcut: solid
-    timeout: 3.0
+    timeout: 4.0
     disabled: false
+    base_url:
+      - https://solidtorrents.net
+      - https://solidtorrents.eu
+      - https://solidtorrents.to
+      - https://bitsearch.to
 
   # For this demo of the sqlite engine download:
   #   https://liste.mediathekview.de/filmliste-v2.db.bz2