Browse Source

add digbt engine

Unfortunately, it is quite slow so it is disabled.
Furthermore, the display of number of files is wrong
on digbt.org, so it is not displayed on searx.
Noemi Vanyi 8 years ago
parent
commit
3a1c5876b1
4 changed files with 84 additions and 15 deletions
  1. 2 14
      searx/engines/btdigg.py
  2. 58 0
      searx/engines/digbt.py
  3. 6 1
      searx/settings.yml
  4. 18 0
      searx/utils.py

+ 2 - 14
searx/engines/btdigg.py

@@ -16,6 +16,7 @@ from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
+from searx.utils import get_torrent_size
 
 # engine dependent config
 categories = ['videos', 'music', 'files']
@@ -68,20 +69,7 @@ def response(resp):
         leech = 0
 
         # convert filesize to byte if possible
-        try:
-            filesize = float(filesize)
-
-            # convert filesize to byte
-            if filesize_multiplier == 'TB':
-                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
-            elif filesize_multiplier == 'GB':
-                filesize = int(filesize * 1024 * 1024 * 1024)
-            elif filesize_multiplier == 'MB':
-                filesize = int(filesize * 1024 * 1024)
-            elif filesize_multiplier == 'KB':
-                filesize = int(filesize * 1024)
-        except:
-            filesize = None
+        filesize = get_torrent_size(filesize, filesize_multiplier)
 
         # convert files to int if possible
         if files.isdigit():

+ 58 - 0
searx/engines/digbt.py

@@ -0,0 +1,58 @@
+"""
+ DigBT (Videos, Music, Files)
+
+ @website     https://digbt.org
+ @provide-api no
+
+ @using-api   no
+ @results     HTML (using search portal)
+ @stable      no (HTML can change)
+ @parse       url, title, content, magnetlink
+"""
+
+from urlparse import urljoin
+from lxml import html
+from searx.engines.xpath import extract_text
+from searx.utils import get_torrent_size
+
+categories = ['videos', 'music', 'files']
+paging = True
+
+URL = 'https://digbt.org'
+SEARCH_URL = URL + '/search/{query}-time-{pageno}'
+FILESIZE = 3
+FILESIZE_MULTIPLIER = 4
+
+
+def request(query, params):
+    params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
+
+    return params
+
+
+def response(resp):
+    dom = html.fromstring(resp.content)
+    search_res = dom.xpath('.//td[@class="x-item"]')
+
+    if not search_res:
+        return list()
+
+    results = list()
+    for result in search_res:
+        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
+        title = result.xpath('.//a[@title]/text()')[0]
+        content = extract_text(result.xpath('.//div[@class="files"]'))
+        files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
+        filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
+        magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
+
+        results.append({'url': url,
+                        'title': title,
+                        'content': content,
+                        'filesize': filesize,
+                        'magnetlink': magnetlink,
+                        'seed': 'N/A',
+                        'leech': 'N/A',
+                        'template': 'torrent.html'})
+
+    return results

+ 6 - 1
searx/settings.yml

@@ -87,7 +87,7 @@ engines:
   - name : btdigg
     engine : btdigg
     shortcut : bt
-    
+
   - name : crossref
     engine : json_engine
     paging : True
@@ -118,6 +118,11 @@ engines:
     weight : 2
     disabled : True
 
+  - name : digbt
+    engine : digbt
+    shortcut : dbt
+    timeout : 6.0
+
   - name : digg
     engine : digg
     shortcut : dg

+ 18 - 0
searx/utils.py

@@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
         return a_list[index]
     else:
         return default
+
+
+def get_torrent_size(filesize, filesize_multiplier):
+    try:
+        filesize = float(filesize)
+
+        if filesize_multiplier == 'TB':
+            filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
+        elif filesize_multiplier == 'GB':
+            filesize = int(filesize * 1024 * 1024 * 1024)
+        elif filesize_multiplier == 'MB':
+            filesize = int(filesize * 1024 * 1024)
+        elif filesize_multiplier == 'KB':
+            filesize = int(filesize * 1024)
+    except:
+        filesize = None
+
+    return filesize