Browse Source

Merge pull request #639 from kvch/digbt-engine

add digbt engine - fixes #638
Adam Tauber 8 years ago
parent
commit
13bed1f872
5 changed files with 144 additions and 15 deletions
  1. 2 14
      searx/engines/btdigg.py
  2. 58 0
      searx/engines/digbt.py
  3. 7 1
      searx/settings.yml
  4. 18 0
      searx/utils.py
  5. 59 0
      tests/unit/engines/test_digbt.py

+ 2 - 14
searx/engines/btdigg.py

@@ -16,6 +16,7 @@ from urllib import quote
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.utils import get_torrent_size
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']
@@ -68,20 +69,7 @@ def response(resp):
         leech = 0
         leech = 0
 
 
         # convert filesize to byte if possible
         # convert filesize to byte if possible
-        try:
-            filesize = float(filesize)
-
-            # convert filesize to byte
-            if filesize_multiplier == 'TB':
-                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
-            elif filesize_multiplier == 'GB':
-                filesize = int(filesize * 1024 * 1024 * 1024)
-            elif filesize_multiplier == 'MB':
-                filesize = int(filesize * 1024 * 1024)
-            elif filesize_multiplier == 'KB':
-                filesize = int(filesize * 1024)
-        except:
-            filesize = None
+        filesize = get_torrent_size(filesize, filesize_multiplier)
 
 
         # convert files to int if possible
         # convert files to int if possible
         if files.isdigit():
         if files.isdigit():

+ 58 - 0
searx/engines/digbt.py

@@ -0,0 +1,58 @@
+"""
+ DigBT (Videos, Music, Files)
+
+ @website     https://digbt.org
+ @provide-api no
+
+ @using-api   no
+ @results     HTML (using search portal)
+ @stable      no (HTML can change)
+ @parse       url, title, content, magnetlink
+"""
+
+from urlparse import urljoin
+from lxml import html
+from searx.engines.xpath import extract_text
+from searx.utils import get_torrent_size
+
+categories = ['videos', 'music', 'files']
+paging = True
+
+URL = 'https://digbt.org'
+SEARCH_URL = URL + '/search/{query}-time-{pageno}'
+FILESIZE = 3
+FILESIZE_MULTIPLIER = 4
+
+
+def request(query, params):
+    params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
+
+    return params
+
+
+def response(resp):
+    dom = html.fromstring(resp.content)
+    search_res = dom.xpath('.//td[@class="x-item"]')
+
+    if not search_res:
+        return list()
+
+    results = list()
+    for result in search_res:
+        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
+        title = result.xpath('.//a[@title]/text()')[0]
+        content = extract_text(result.xpath('.//div[@class="files"]'))
+        files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
+        filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
+        magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
+
+        results.append({'url': url,
+                        'title': title,
+                        'content': content,
+                        'filesize': filesize,
+                        'magnetlink': magnetlink,
+                        'seed': 'N/A',
+                        'leech': 'N/A',
+                        'template': 'torrent.html'})
+
+    return results

+ 7 - 1
searx/settings.yml

@@ -87,7 +87,7 @@ engines:
   - name : btdigg
   - name : btdigg
     engine : btdigg
     engine : btdigg
     shortcut : bt
     shortcut : bt
-    
+
   - name : crossref
   - name : crossref
     engine : json_engine
     engine : json_engine
     paging : True
     paging : True
@@ -118,6 +118,12 @@ engines:
     weight : 2
     weight : 2
     disabled : True
     disabled : True
 
 
+  - name : digbt
+    engine : digbt
+    shortcut : dbt
+    timeout : 6.0
+    disabled : True
+
   - name : digg
   - name : digg
     engine : digg
     engine : digg
     shortcut : dg
     shortcut : dg

+ 18 - 0
searx/utils.py

@@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
         return a_list[index]
         return a_list[index]
     else:
     else:
         return default
         return default
+
+
+def get_torrent_size(filesize, filesize_multiplier):
+    try:
+        filesize = float(filesize)
+
+        if filesize_multiplier == 'TB':
+            filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
+        elif filesize_multiplier == 'GB':
+            filesize = int(filesize * 1024 * 1024 * 1024)
+        elif filesize_multiplier == 'MB':
+            filesize = int(filesize * 1024 * 1024)
+        elif filesize_multiplier == 'KB':
+            filesize = int(filesize * 1024)
+    except:
+        filesize = None
+
+    return filesize

+ 59 - 0
tests/unit/engines/test_digbt.py

@@ -0,0 +1,59 @@
+from collections import defaultdict
+import mock
+from searx.engines import digbt
+from searx.testing import SearxTestCase
+
+
+class TestDigBTEngine(SearxTestCase):
+
+    def test_request(self):
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        dicto['pageno'] = 0
+        params = digbt.request(query, dicto)
+        self.assertIn('url', params)
+        self.assertIn(query, params['url'])
+        self.assertIn('digbt.org', params['url'])
+
+    def test_response(self):
+        self.assertRaises(AttributeError, digbt.response, None)
+        self.assertRaises(AttributeError, digbt.response, [])
+        self.assertRaises(AttributeError, digbt.response, '')
+        self.assertRaises(AttributeError, digbt.response, '[]')
+
+        response = mock.Mock(content='<html></html>')
+        self.assertEqual(digbt.response(response), [])
+
+        html = """
+        <table class="table">
+            <tr><td class="x-item">
+            <div>
+                <a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a>
+                <span class="ctime"><span style="color:red;">4 hours ago</span></span>
+            </div>
+            <div class="files">
+                <ul>
+                    <li>The Big Bang Theory  2.9 GB</li>
+                    <li>....</li>
+                </ul>
+            </div>
+            <div class="tail">
+                Files: 1 Size: 2.9 GB  Downloads: 1 Updated: <span style="color:red;">4 hours ago</span>
+                &nbsp; &nbsp;
+                <a class="title" href="magnet:?xt=urn:btih:a&amp;dn=The+Big+Bang+Theory">
+                    <span class="glyphicon glyphicon-magnet"></span> magnet-link
+                </a>
+                &nbsp; &nbsp;
+            </div>
+            </td></tr>
+        </table>
+        """
+        response = mock.Mock(content=html)
+        results = digbt.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'], 'The Big Bang Theory')
+        self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html')
+        self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....')
+        self.assertEqual(results[0]['filesize'], 3113851289)
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory')