subtitleseeker.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. """
  2. Subtitleseeker (Video)
  3. @website http://www.subtitleseeker.com
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, content
  9. """
  10. from urllib import quote_plus
  11. from lxml import html
  12. from searx.languages import language_codes
  13. from searx.engines.xpath import extract_text
  14. # engine dependent config
  15. categories = ['videos']
  16. paging = True
  17. language = ""
  18. # search-url
  19. url = 'http://www.subtitleseeker.com/'
  20. search_url = url + 'search/TITLES/{query}&p={pageno}'
  21. # specific xpath variables
  22. results_xpath = '//div[@class="boxRows"]'
  23. # do search-request
  24. def request(query, params):
  25. params['url'] = search_url.format(query=quote_plus(query),
  26. pageno=params['pageno'])
  27. return params
  28. # get response from search-request
  29. def response(resp):
  30. results = []
  31. dom = html.fromstring(resp.text)
  32. search_lang = ""
  33. # dirty fix for languages named differenly in their site
  34. if resp.search_params['language'][:2] == 'fa':
  35. search_lang = 'Farsi'
  36. elif resp.search_params['language'] == 'pt_BR':
  37. search_lang = 'Brazilian'
  38. elif resp.search_params['language'] != 'all':
  39. search_lang = [lc[3]
  40. for lc in language_codes
  41. if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
  42. # parse results
  43. for result in dom.xpath(results_xpath):
  44. link = result.xpath(".//a")[0]
  45. href = link.attrib.get('href')
  46. if language is not "":
  47. href = href + language + '/'
  48. elif search_lang:
  49. href = href + search_lang + '/'
  50. title = extract_text(link)
  51. content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
  52. content = content + " - "
  53. text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
  54. content = content + text
  55. if result.xpath(".//span") != []:
  56. content = content +\
  57. " - (" +\
  58. extract_text(result.xpath(".//span")) +\
  59. ")"
  60. # append result
  61. results.append({'url': href,
  62. 'title': title,
  63. 'content': content})
  64. # return results
  65. return results