Browse Source

[fix] engine - kickass update url, fix parsing, use multiple mirrors

jazzzooo 1 year ago
parent
commit
1a66d74673
2 changed files with 38 additions and 67 deletions
  1. 32 66
      searx/engines/kickass.py
  2. 6 1
      searx/settings.yml

+ 32 - 66
searx/engines/kickass.py

@@ -1,16 +1,23 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Kickass Torrent (Videos, Music, Files)
-"""
+# lint: pylint
+"""Kickass Torrent (Videos, Music, Files)"""
 
 
-from lxml import html
+import random
 from operator import itemgetter
 from operator import itemgetter
-from urllib.parse import quote, urljoin
-from searx.utils import extract_text, get_torrent_size, convert_str_to_int
+from urllib.parse import quote
+
+from lxml import html
+from searx.utils import (
+    eval_xpath,
+    eval_xpath_getindex,
+    eval_xpath_list,
+    extract_text,
+    get_torrent_size,
+    int_or_zero,
+)
 
 
-# about
 about = {
 about = {
-    "website": 'https://kickass.so',
+    "website": 'https://kickasstorrents.to',
     "wikidata_id": 'Q17062285',
     "wikidata_id": 'Q17062285',
     "official_api_documentation": None,
     "official_api_documentation": None,
     "use_official_api": False,
     "use_official_api": False,
@@ -18,80 +25,39 @@ about = {
     "results": 'HTML',
     "results": 'HTML',
 }
 }
 
 
-# engine dependent config
 categories = ['files']
 categories = ['files']
 paging = True
 paging = True
 
 
-# search-url
-url = 'https://kickass.cd/'
-search_url = url + 'search/{search_term}/{pageno}/'
+# base_url can be overwritten by a list of URLs in the settings.yml
+base_url = 'https://kickasstorrents.to'
 
 
-# specific xpath variables
-magnet_xpath = './/a[@title="Torrent magnet link"]'
-torrent_xpath = './/a[@title="Download torrent file"]'
-content_xpath = './/span[@class="font11px lightgrey block"]'
 
 
-
-# do search-request
 def request(query, params):
 def request(query, params):
-    params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'])
+    params['base_url'] = random.choice(base_url) if isinstance(base_url, list) else base_url
+    params['url'] = params['base_url'] + f'/usearch/{quote(query)}/{params["pageno"]}/'
 
 
     return params
     return params
 
 
 
 
-# get response from search-request
 def response(resp):
 def response(resp):
     results = []
     results = []
-
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
 
 
-    search_res = dom.xpath('//table[@class="data"]//tr')
-
-    # return empty array if nothing is found
-    if not search_res:
+    search_res = eval_xpath_list(dom, '//table[contains(@class, "data")]//tr', None)
+    if search_res is None:
         return []
         return []
 
 
-    # parse results
-    for result in search_res[1:]:
-        link = result.xpath('.//a[@class="cellMainLink"]')[0]
-        href = urljoin(url, link.attrib['href'])
-        title = extract_text(link)
-        content = extract_text(result.xpath(content_xpath))
-        seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
-        leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
-        filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]'))
-        files = extract_text(result.xpath('.//td[contains(@class, "center")][2]'))
-
-        seed = convert_str_to_int(seed)
-        leech = convert_str_to_int(leech)
-
-        filesize, filesize_multiplier = filesize_info.split()
-        filesize = get_torrent_size(filesize, filesize_multiplier)
-        if files.isdigit():
-            files = int(files)
-        else:
-            files = None
-
-        magnetlink = result.xpath(magnet_xpath)[0].attrib['href']
-
-        torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
-        torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*")
+    for tag in search_res[1:]:
+        result = {'template': 'torrent.html'}
+        url = eval_xpath_getindex(tag, './/a[contains(@class, "cellMainLink")]/@href', 0, None)
+        result['url'] = resp.search_params['base_url'] + url
+        result['title'] = extract_text(eval_xpath(tag, './/a[contains(@class, "cellMainLink")]'))
+        result['content'] = extract_text(eval_xpath(tag, './/span[@class="font11px lightgrey block"]'))
+        result['seed'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "green")]')))
+        result['leech'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "red")]')))
+        result['filesize'] = get_torrent_size(*extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]')).split())
 
 
-        # append result
-        results.append(
-            {
-                'url': href,
-                'title': title,
-                'content': content,
-                'seed': seed,
-                'leech': leech,
-                'filesize': filesize,
-                'files': files,
-                'magnetlink': magnetlink,
-                'torrentfile': torrentfileurl,
-                'template': 'torrent.html',
-            }
-        )
+        results.append(result)
 
 
-    # return results sorted by seeder
+    # results sorted by seeder count
     return sorted(results, key=itemgetter('seed'), reverse=True)
     return sorted(results, key=itemgetter('seed'), reverse=True)

+ 6 - 1
searx/settings.yml

@@ -912,9 +912,14 @@ engines:
 
 
   - name: kickass
   - name: kickass
     engine: kickass
     engine: kickass
+    base_url:
+      - https://kickasstorrents.to
+      - https://kickasstorrents.cr
+      - https://kickasstorrent.cr
+      - https://kickass.sx
+      - https://kat.am
     shortcut: kc
     shortcut: kc
     timeout: 4.0
     timeout: 4.0
-    disabled: true
 
 
   - name: lemmy communities
   - name: lemmy communities
     engine: lemmy
     engine: lemmy