subtitleseeker.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. """
  2. Subtitleseeker (Video)
  3. @website http://www.subtitleseeker.com
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, content
  9. """
  10. from urllib import quote_plus
  11. from lxml import html
  12. from searx.languages import language_codes
  13. from searx.engines.xpath import extract_text
  14. # engine dependent config
  15. categories = ['videos']
  16. paging = True
  17. language = ""
  18. # search-url
  19. url = 'http://www.subtitleseeker.com/'
  20. search_url = url + 'search/TITLES/{query}&p={pageno}'
  21. # specific xpath variables
  22. results_xpath = '//div[@class="boxRows"]'
  23. # do search-request
  24. def request(query, params):
  25. params['url'] = search_url.format(query=quote_plus(query),
  26. pageno=params['pageno'])
  27. return params
  28. # get response from search-request
  29. def response(resp):
  30. results = []
  31. dom = html.fromstring(resp.text)
  32. search_lang = ""
  33. if resp.search_params['language'] != 'all':
  34. search_lang = [lc[1]
  35. for lc in language_codes
  36. if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
  37. # parse results
  38. for result in dom.xpath(results_xpath):
  39. link = result.xpath(".//a")[0]
  40. href = link.attrib.get('href')
  41. if language is not "":
  42. href = href + language + '/'
  43. elif search_lang:
  44. href = href + search_lang + '/'
  45. title = extract_text(link)
  46. content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
  47. content = content + " - "
  48. text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
  49. content = content + text
  50. if result.xpath(".//span") != []:
  51. content = content +\
  52. " - (" +\
  53. extract_text(result.xpath(".//span")) +\
  54. ")"
  55. # append result
  56. results.append({'url': href,
  57. 'title': title,
  58. 'content': content})
  59. # return results
  60. return results