btdigg.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. """
  2. BTDigg (Videos, Music, Files)
  3. @website https://btdigg.org
  4. @provide-api yes (on demand)
  5. @using-api no
  6. @results HTML (using search portal)
  7. @stable no (HTML can change)
  8. @parse url, title, content, seed, leech, magnetlink
  9. """
  10. from urlparse import urljoin
  11. from urllib import quote
  12. from lxml import html
  13. from operator import itemgetter
  14. from searx.engines.xpath import extract_text
  15. from searx.utils import get_torrent_size
  16. # engine dependent config
  17. categories = ['videos', 'music', 'files']
  18. paging = True
  19. # search-url
  20. url = 'https://btdigg.org'
  21. search_url = url + '/search?q={search_term}&p={pageno}'
  22. # do search-request
  23. def request(query, params):
  24. params['url'] = search_url.format(search_term=quote(query),
  25. pageno=params['pageno'] - 1)
  26. return params
  27. # get response from search-request
  28. def response(resp):
  29. results = []
  30. dom = html.fromstring(resp.content)
  31. search_res = dom.xpath('//div[@id="search_res"]/table/tr')
  32. # return empty array if nothing is found
  33. if not search_res:
  34. return []
  35. # parse results
  36. for result in search_res:
  37. link = result.xpath('.//td[@class="torrent_name"]//a')[0]
  38. href = urljoin(url, link.attrib.get('href'))
  39. title = extract_text(link)
  40. content = extract_text(result.xpath('.//pre[@class="snippet"]')[0])
  41. content = "<br />".join(content.split("\n"))
  42. filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
  43. filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
  44. files = result.xpath('.//span[@class="attr_val"]/text()')[1]
  45. seed = result.xpath('.//span[@class="attr_val"]/text()')[2]
  46. # convert seed to int if possible
  47. if seed.isdigit():
  48. seed = int(seed)
  49. else:
  50. seed = 0
  51. leech = 0
  52. # convert filesize to byte if possible
  53. filesize = get_torrent_size(filesize, filesize_multiplier)
  54. # convert files to int if possible
  55. if files.isdigit():
  56. files = int(files)
  57. else:
  58. files = None
  59. magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']
  60. # append result
  61. results.append({'url': href,
  62. 'title': title,
  63. 'content': content,
  64. 'seed': seed,
  65. 'leech': leech,
  66. 'filesize': filesize,
  67. 'files': files,
  68. 'magnetlink': magnetlink,
  69. 'template': 'torrent.html'})
  70. # return results sorted by seeder
  71. return sorted(results, key=itemgetter('seed'), reverse=True)