Browse Source

[feat] new engine: bt4g added & enabled and disable by default btdigg

Disable btdigg because on most SearXNG instances, SearXNG is blocked by btdigg
due to cloudflare too many requests.

This impementation did not parse the HTML page because there is an API in
XML (RSS).  The RSS feed provides fewer data like amount of seeders/leechers and
the files in the torrent file. It's a tradeoff for a "stable" engine as the XML
from RSS content will change way less than the HTML page.

Closes: https://github.com/searxng/searxng/issues/2553
Emilien Devos 1 year ago
parent
commit
0fc8f99ecc
2 changed files with 85 additions and 0 deletions
  1. 80 0
      searx/engines/bt4g.py
  2. 5 0
      searx/settings.yml

+ 80 - 0
searx/engines/bt4g.py

@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+ BT4G (Videos, Music, Files)
+"""
+
+import re
+from datetime import datetime
+from urllib.parse import quote
+
+from lxml import etree
+
+from searx.utils import get_torrent_size
+
+# about
+about = {
+    "website": 'https://bt4gprx.com',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'XML',
+}
+
+# engine dependent config
+categories = ['files']
+paging = True
+time_range_support = True
+
+# search-url
+url = 'https://bt4gprx.com'
+search_url = url + '/search?q={search_term}&orderby={order_by}&category={category}&p={pageno}&page=rss'
+bt4g_order_by = 'relevance'  # relevance, size, seeders, time
+bt4g_category = 'all'  # all, audio, movie, doc, app, other
+
+
+def request(query, params):
+
+    order_by = bt4g_order_by
+    if params['time_range']:
+        order_by = 'time'
+
+    params['url'] = search_url.format(
+        search_term=quote(query),
+        order_by=order_by,
+        category=bt4g_category,
+        pageno=params['pageno'],
+    )
+    return params
+
+
+def response(resp):
+    results = []
+
+    search_results = etree.XML(resp.content)
+
+    # return empty array if nothing is found
+    if len(search_results) == 0:
+        return []
+
+    for entry in search_results.xpath('./channel/item'):
+        title = entry.find("title").text
+        link = entry.find("guid").text
+        fullDescription = entry.find("description").text.split('<br>')
+        filesize = fullDescription[1]
+        filesizeParsed = re.split(r"([A-Z]+)", filesize)
+        magnetlink = entry.find("link").text
+        pubDate = entry.find("pubDate").text
+        results.append(
+            {
+                'url': link,
+                'title': title,
+                'magnetlink': magnetlink,
+                'seed': 'N/A',
+                'leech': 'N/A',
+                'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]),
+                'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
+                'template': 'torrent.html',
+            }
+        )
+
+    return results

+ 5 - 0
searx/settings.yml

@@ -415,6 +415,7 @@ engines:
   - name: btdigg
   - name: btdigg
     engine: btdigg
     engine: btdigg
     shortcut: bt
     shortcut: bt
+    disabled: true
 
 
   - name: ccc-tv
   - name: ccc-tv
     engine: xpath
     engine: xpath
@@ -1926,6 +1927,10 @@ engines:
       results: HTML
       results: HTML
       language: ja
       language: ja
 
 
+  - name: bt4g
+    engine: bt4g
+    shortcut: bt4g
+
 # Doku engine lets you access to any Doku wiki instance:
 # Doku engine lets you access to any Doku wiki instance:
 # A public one or a privete/corporate one.
 # A public one or a privete/corporate one.
 #  - name: ubuntuwiki
 #  - name: ubuntuwiki