Browse Source

PirateBay unit test + reactivation in Settings

Cqoicebordel 10 years ago
parent
commit
5a16077455

+ 8 - 4
searx/engines/piratebay.py

@@ -13,6 +13,7 @@ from cgi import escape
 from urllib import quote
 from lxml import html
 from operator import itemgetter
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['videos', 'music', 'files']
@@ -29,7 +30,8 @@ search_types = {'files': '0',
 
 # specific xpath variables
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
-content_xpath = './/font[@class="detDesc"]//text()'
+torrent_xpath = './/a[@title="Download this torrent"]'
+content_xpath = './/font[@class="detDesc"]'
 
 
 # do search-request
@@ -59,8 +61,8 @@ def response(resp):
     for result in search_res[1:]:
         link = result.xpath('.//div[@class="detName"]//a')[0]
         href = urljoin(url, link.attrib.get('href'))
-        title = ' '.join(link.xpath('.//text()'))
-        content = escape(' '.join(result.xpath(content_xpath)))
+        title = extract_text(link)
+        content = escape(extract_text(result.xpath(content_xpath)))
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
 
         # convert seed to int if possible
@@ -76,6 +78,7 @@ def response(resp):
             leech = 0
 
         magnetlink = result.xpath(magnet_xpath)[0]
+        torrentfile = result.xpath(torrent_xpath)[0]
 
         # append result
         results.append({'url': href,
@@ -83,7 +86,8 @@ def response(resp):
                         'content': content,
                         'seed': seed,
                         'leech': leech,
-                        'magnetlink': magnetlink.attrib['href'],
+                        'magnetlink': magnetlink.attrib.get('href'),
+                        'torrentfile': torrentfile.attrib.get('href'),
                         'template': 'torrent.html'})
 
     # return results sorted by seeder

+ 3 - 3
searx/settings.yml

@@ -152,9 +152,9 @@ engines:
     engine : photon
     shortcut : ph
 
-#  - name : piratebay
-#    engine : piratebay
-#    shortcut : tpb
+  - name : piratebay
+    engine : piratebay
+    shortcut : tpb
 
   - name : kickass
     engine : kickass

+ 137 - 0
searx/tests/engines/test_piratebay.py

@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import piratebay
+from searx.testing import SearxTestCase
+
+
+class TestPiratebayEngine(SearxTestCase):
+
+    def test_request(self):
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        dicto['pageno'] = 1
+        dicto['category'] = 'Toto'
+        params = piratebay.request(query, dicto)
+        self.assertIn('url', params)
+        self.assertIn(query, params['url'])
+        self.assertIn('piratebay.cr', params['url'])
+        self.assertIn('0', params['url'])
+
+        dicto['category'] = 'music'
+        params = piratebay.request(query, dicto)
+        self.assertIn('100', params['url'])
+
+    def test_response(self):
+        self.assertRaises(AttributeError, piratebay.response, None)
+        self.assertRaises(AttributeError, piratebay.response, [])
+        self.assertRaises(AttributeError, piratebay.response, '')
+        self.assertRaises(AttributeError, piratebay.response, '[]')
+
+        response = mock.Mock(text='<html></html>')
+        self.assertEqual(piratebay.response(response), [])
+
+        html = """
+        <table id="searchResult">
+            <tr>
+            </tr>
+            <tr>
+                <td class="vertTh">
+                    <center>
+                        <a href="#" title="More from this category">Anime</a><br/>
+                        (<a href="#" title="More from this category">Anime</a>)
+                    </center>
+                </td>
+                <td>
+                    <div class="detName">
+                        <a href="/this.is.the.link" class="detLink" title="Title">
+                            This is the title
+                        </a>
+                    </div>
+                    <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
+                        <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
+                    </a>
+                    <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
+                        <img src="/static/img/dl.gif" class="dl" alt="Download"/>
+                    </a>
+                    <a href="/user/HorribleSubs">
+                        <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
+                    </a>
+                    <img src="/static/img/11x11p.png"/>
+                    <font class="detDesc">
+                        This is the content <span>and should be</span> OK
+                    </font>
+                </td>
+                <td align="right">13</td>
+                <td align="right">334</td>
+            </tr>
+        </table>
+        """
+        response = mock.Mock(text=html)
+        results = piratebay.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'], 'This is the title')
+        self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
+        self.assertEqual(results[0]['content'], 'This is the content and should be OK')
+        self.assertEqual(results[0]['seed'], 13)
+        self.assertEqual(results[0]['leech'], 334)
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
+        self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
+
+        html = """
+        <table id="searchResult">
+            <tr>
+            </tr>
+            <tr>
+                <td class="vertTh">
+                    <center>
+                        <a href="#" title="More from this category">Anime</a><br/>
+                        (<a href="#" title="More from this category">Anime</a>)
+                    </center>
+                </td>
+                <td>
+                    <div class="detName">
+                        <a href="/this.is.the.link" class="detLink" title="Title">
+                            This is the title
+                        </a>
+                    </div>
+                    <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
+                        <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
+                    </a>
+                    <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
+                        <img src="/static/img/dl.gif" class="dl" alt="Download"/>
+                    </a>
+                    <a href="/user/HorribleSubs">
+                        <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
+                    </a>
+                    <img src="/static/img/11x11p.png"/>
+                    <font class="detDesc">
+                        This is the content <span>and should be</span> OK
+                    </font>
+                </td>
+                <td align="right">s</td>
+                <td align="right">d</td>
+            </tr>
+        </table>
+        """
+        response = mock.Mock(text=html)
+        results = piratebay.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'], 'This is the title')
+        self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
+        self.assertEqual(results[0]['content'], 'This is the content and should be OK')
+        self.assertEqual(results[0]['seed'], 0)
+        self.assertEqual(results[0]['leech'], 0)
+        self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
+        self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
+
+        html = """
+        <table id="searchResult">
+        </table>
+        """
+        response = mock.Mock(text=html)
+        results = piratebay.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)

+ 1 - 0
searx/tests/test_engines.py

@@ -14,6 +14,7 @@ from searx.tests.engines.test_google_images import *  # noqa
 from searx.tests.engines.test_google_news import *  # noqa
 from searx.tests.engines.test_kickass import *  # noqa
 from searx.tests.engines.test_mixcloud import *  # noqa
+from searx.tests.engines.test_piratebay import *  # noqa
 from searx.tests.engines.test_searchcode_code import *  # noqa
 from searx.tests.engines.test_searchcode_doc import *  # noqa
 from searx.tests.engines.test_soundcloud import *  # noqa