seznam.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Seznam
  4. """
  5. from urllib.parse import urlencode
  6. from lxml import html
  7. from searx.network import get
  8. from searx.exceptions import SearxEngineAccessDeniedException
  9. from searx.utils import (
  10. extract_text,
  11. eval_xpath_list,
  12. eval_xpath_getindex,
  13. eval_xpath,
  14. )
  15. # about
  16. about = {
  17. "website": "https://www.seznam.cz/",
  18. "wikidata_id": "Q3490485",
  19. "official_api_documentation": "https://api.sklik.cz/",
  20. "use_official_api": False,
  21. "require_api_key": False,
  22. "results": "HTML",
  23. "language": "cz",
  24. }
  25. categories = ['general', 'web']
  26. base_url = 'https://search.seznam.cz/'
  27. def request(query, params):
  28. response_index = get(base_url, headers=params['headers'], raise_for_httperror=True)
  29. dom = html.fromstring(response_index.text)
  30. url_params = {
  31. 'q': query,
  32. 'oq': query,
  33. }
  34. for e in eval_xpath_list(dom, '//input[@type="hidden"]'):
  35. name = e.get('name')
  36. value = e.get('value')
  37. url_params[name] = value
  38. params['url'] = base_url + '?' + urlencode(url_params)
  39. params['cookies'] = response_index.cookies
  40. return params
  41. def response(resp):
  42. if resp.url.path.startswith('/verify'):
  43. raise SearxEngineAccessDeniedException()
  44. results = []
  45. dom = html.fromstring(resp.content.decode())
  46. for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'):
  47. result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "bec586")]', 0, default=None)
  48. if result_data is None:
  49. continue
  50. title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
  51. results.append(
  52. {
  53. 'url': title_element.get('href'),
  54. 'title': extract_text(title_element),
  55. 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')),
  56. }
  57. )
  58. return results