Browse Source

update piratebay engine and add comments

Thomas Pointhuber 10 years ago
parent
commit
dae88d862b
2 changed files with 34 additions and 8 deletions
  1. 34 7
      searx/engines/piratebay.py
  2. 0 1
      searx/settings.yml

+ 34 - 7
searx/engines/piratebay.py

@@ -1,39 +1,61 @@
+## Piratebay (Videos, Music, Files)
+# 
+# @website     https://thepiratebay.se
+# @provide-api no (nothing found)
+# 
+# @using-api   no
+# @results     HTML (using search portal)
+# @stable      yes (HTML can change)
+# @parse       url, title, content, seed, leech, magnetlink
+
 from urlparse import urljoin
 from urlparse import urljoin
 from cgi import escape
 from cgi import escape
 from urllib import quote
 from urllib import quote
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
 
 
-categories = ['videos', 'music']
+# engine dependent config
+categories = ['videos', 'music', 'files']
+paging = True
 
 
+# search-url
 url = 'https://thepiratebay.se/'
 url = 'https://thepiratebay.se/'
 search_url = url + 'search/{search_term}/{pageno}/99/{search_type}'
 search_url = url + 'search/{search_term}/{pageno}/99/{search_type}'
-search_types = {'videos': '200',
+
+# piratebay specific type-definitions
+search_types = {'files': '0',                
                 'music': '100',
                 'music': '100',
-                'files': '0'}
+                'videos': '200'}
 
 
+# specific xpath variables
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
 magnet_xpath = './/a[@title="Download this torrent using magnet"]'
 content_xpath = './/font[@class="detDesc"]//text()'
 content_xpath = './/font[@class="detDesc"]//text()'
 
 
-paging = True
-
 
 
+# do search-request
 def request(query, params):
 def request(query, params):
-    search_type = search_types.get(params['category'], '200')
+    search_type = search_types.get(params['category'], '0')
+
     params['url'] = search_url.format(search_term=quote(query),
     params['url'] = search_url.format(search_term=quote(query),
                                       search_type=search_type,
                                       search_type=search_type,
                                       pageno=params['pageno'] - 1)
                                       pageno=params['pageno'] - 1)
+
     return params
     return params
 
 
 
 
+# get response from search-request
 def response(resp):
 def response(resp):
     results = []
     results = []
+
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
+
     search_res = dom.xpath('//table[@id="searchResult"]//tr')
     search_res = dom.xpath('//table[@id="searchResult"]//tr')
 
 
+    # return empty array if nothing is found
     if not search_res:
     if not search_res:
-        return results
+        return []
 
 
+    # parse results
     for result in search_res[1:]:
     for result in search_res[1:]:
         link = result.xpath('.//div[@class="detName"]//a')[0]
         link = result.xpath('.//div[@class="detName"]//a')[0]
         href = urljoin(url, link.attrib.get('href'))
         href = urljoin(url, link.attrib.get('href'))
@@ -41,17 +63,21 @@ def response(resp):
         content = escape(' '.join(result.xpath(content_xpath)))
         content = escape(' '.join(result.xpath(content_xpath)))
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
 
 
+        # convert seed to int if possible
         if seed.isdigit():
         if seed.isdigit():
             seed = int(seed)
             seed = int(seed)
         else:
         else:
             seed = 0
             seed = 0
 
 
+        # convert leech to int if possible
         if leech.isdigit():
         if leech.isdigit():
             leech = int(leech)
             leech = int(leech)
         else:
         else:
             leech = 0
             leech = 0
 
 
         magnetlink = result.xpath(magnet_xpath)[0]
         magnetlink = result.xpath(magnet_xpath)[0]
+
+        # append result
         results.append({'url': href,
         results.append({'url': href,
                         'title': title,
                         'title': title,
                         'content': content,
                         'content': content,
@@ -60,4 +86,5 @@ def response(resp):
                         'magnetlink': magnetlink.attrib['href'],
                         'magnetlink': magnetlink.attrib['href'],
                         'template': 'torrent.html'})
                         'template': 'torrent.html'})
 
 
+    # return results sorted by seeder
     return sorted(results, key=itemgetter('seed'), reverse=True)
     return sorted(results, key=itemgetter('seed'), reverse=True)

+ 0 - 1
searx/settings.yml

@@ -82,7 +82,6 @@ engines:
 
 
   - name : piratebay
   - name : piratebay
     engine : piratebay
     engine : piratebay
-    categories : videos, music, files
     shortcut : tpb
     shortcut : tpb
 
 
   - name : soundcloud
   - name : soundcloud