rottentomatoes.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """RottenTomatoes (movies)
  4. """
  5. from urllib.parse import quote_plus
  6. from lxml import html
  7. from searx.utils import eval_xpath, eval_xpath_list, extract_text
  8. # about
  9. about = {
  10. "website": 'https://www.rottentomatoes.com/',
  11. "wikidata_id": 'Q105584',
  12. "official_api_documentation": None,
  13. "use_official_api": False,
  14. "require_api_key": False,
  15. "results": 'HTML',
  16. }
  17. categories = ['movies']
  18. base_url = "https://www.rottentomatoes.com"
  19. results_xpath = "//search-page-media-row"
  20. url_xpath = "./a[1]/@href"
  21. title_xpath = "./a/img/@alt"
  22. img_src_xpath = "./a/img/@src"
  23. release_year_xpath = "concat('From ', string(./@releaseyear))"
  24. score_xpath = "concat('Score: ', string(./@tomatometerscore))"
  25. cast_xpath = "concat('Starring ', string(./@cast))"
  26. def request(query, params):
  27. params["url"] = f"{base_url}/search?search={quote_plus(query)}"
  28. return params
  29. def response(resp):
  30. results = []
  31. dom = html.fromstring(resp.text)
  32. for result in eval_xpath_list(dom, results_xpath):
  33. content = []
  34. for xpath in (release_year_xpath, score_xpath, cast_xpath):
  35. info = extract_text(eval_xpath(result, xpath))
  36. # a gap in the end means that no data was found
  37. if info and info[-1] != " ":
  38. content.append(info)
  39. results.append(
  40. {
  41. 'url': extract_text(eval_xpath(result, url_xpath)),
  42. 'title': extract_text(eval_xpath(result, title_xpath)),
  43. 'content': ', '.join(content),
  44. 'img_src': extract_text(eval_xpath(result, img_src_xpath)),
  45. }
  46. )
  47. return results