seznam.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Seznam
  4. """
  5. from urllib.parse import urlencode
  6. from lxml import html
  7. from searx.network import get
  8. from searx.exceptions import SearxEngineAccessDeniedException
  9. from searx.utils import (
  10. extract_text,
  11. eval_xpath_list,
  12. eval_xpath_getindex,
  13. eval_xpath,
  14. )
  15. # about
  16. about = {
  17. "website": "https://www.seznam.cz/",
  18. "wikidata_id": "Q3490485",
  19. "official_api_documentation": "https://api.sklik.cz/",
  20. "use_official_api": False,
  21. "require_api_key": False,
  22. "results": "HTML",
  23. "language": "cz",
  24. }
  25. base_url = 'https://search.seznam.cz/'
  26. def request(query, params):
  27. response_index = get(base_url, headers=params['headers'], raise_for_httperror=True)
  28. dom = html.fromstring(response_index.text)
  29. url_params = {
  30. 'q': query,
  31. 'oq': query,
  32. }
  33. for e in eval_xpath_list(dom, '//input[@type="hidden"]'):
  34. name = e.get('name')
  35. value = e.get('value')
  36. url_params[name] = value
  37. params['url'] = base_url + '?' + urlencode(url_params)
  38. params['cookies'] = response_index.cookies
  39. return params
  40. def response(resp):
  41. if resp.url.path.startswith('/verify'):
  42. raise SearxEngineAccessDeniedException()
  43. results = []
  44. dom = html.fromstring(resp.content.decode())
  45. for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'):
  46. result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "bec586")]', 0, default=None)
  47. if result_data is None:
  48. continue
  49. title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
  50. results.append({
  51. 'url': title_element.get('href'),
  52. 'title': extract_text(title_element),
  53. 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')),
  54. })
  55. return results