btdigg.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. ## BTDigg (Videos, Music, Files)
  2. #
  3. # @website https://btdigg.org
  4. # @provide-api yes (on demand)
  5. #
  6. # @using-api no
  7. # @results HTML (using search portal)
  8. # @stable no (HTML can change)
  9. # @parse url, title, content, seed, leech, magnetlink
  10. from urlparse import urljoin
  11. from cgi import escape
  12. from urllib import quote
  13. from lxml import html
  14. from operator import itemgetter
  15. from searx.engines.xpath import extract_text
  16. # engine dependent config
  17. categories = ['videos', 'music', 'files']
  18. paging = True
  19. # search-url
  20. url = 'https://btdigg.org'
  21. search_url = url + '/search?q={search_term}&p={pageno}'
  22. # do search-request
  23. def request(query, params):
  24. params['url'] = search_url.format(search_term=quote(query),
  25. pageno=params['pageno']-1)
  26. # FIX: SSLError: hostname 'btdigg.org'
  27. # doesn't match either of 'ssl2000.cloudflare.com', 'cloudflare.com', '*.cloudflare.com'
  28. params['verify'] = False
  29. return params
  30. # get response from search-request
  31. def response(resp):
  32. results = []
  33. dom = html.fromstring(resp.text)
  34. search_res = dom.xpath('//div[@id="search_res"]/table/tr')
  35. # return empty array if nothing is found
  36. if not search_res:
  37. return []
  38. # parse results
  39. for result in search_res:
  40. link = result.xpath('.//td[@class="torrent_name"]//a')[0]
  41. href = urljoin(url, link.attrib.get('href'))
  42. title = escape(extract_text(link))
  43. content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
  44. content = "<br />".join(content.split("\n"))
  45. filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
  46. filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
  47. files = result.xpath('.//span[@class="attr_val"]/text()')[1]
  48. seed = result.xpath('.//span[@class="attr_val"]/text()')[2]
  49. # convert seed to int if possible
  50. if seed.isdigit():
  51. seed = int(seed)
  52. else:
  53. seed = 0
  54. leech = 0
  55. # convert filesize to byte if possible
  56. try:
  57. filesize = float(filesize)
  58. # convert filesize to byte
  59. if filesize_multiplier == 'TB':
  60. filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
  61. elif filesize_multiplier == 'GB':
  62. filesize = int(filesize * 1024 * 1024 * 1024)
  63. elif filesize_multiplier == 'MB':
  64. filesize = int(filesize * 1024 * 1024)
  65. elif filesize_multiplier == 'KB':
  66. filesize = int(filesize * 1024)
  67. except:
  68. filesize = None
  69. # convert files to int if possible
  70. if files.isdigit():
  71. files = int(files)
  72. else:
  73. files = None
  74. magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']
  75. # append result
  76. results.append({'url': href,
  77. 'title': title,
  78. 'content': content,
  79. 'seed': seed,
  80. 'leech': leech,
  81. 'filesize': filesize,
  82. 'files': files,
  83. 'magnetlink': magnetlink,
  84. 'template': 'torrent.html'})
  85. # return results sorted by seeder
  86. return sorted(results, key=itemgetter('seed'), reverse=True)