seznam.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Seznam
  4. """
  5. from urllib.parse import urlencode
  6. from lxml import html
  7. from searx.network import get
  8. from searx.exceptions import SearxEngineAccessDeniedException
  9. from searx.utils import (
  10. extract_text,
  11. eval_xpath_list,
  12. eval_xpath_getindex,
  13. eval_xpath,
  14. )
  15. # about
  16. about = {
  17. "website": "https://www.seznam.cz/",
  18. "wikidata_id": "Q3490485",
  19. "official_api_documentation": "https://api.sklik.cz/",
  20. "use_official_api": False,
  21. "require_api_key": False,
  22. "results": "HTML",
  23. "language": "cz",
  24. }
  25. categories = ['general', 'web']
  26. base_url = 'https://search.seznam.cz/'
  27. def request(query, params):
  28. response_index = get(
  29. base_url, headers=params['headers'], raise_for_httperror=True)
  30. dom = html.fromstring(response_index.text)
  31. url_params = {
  32. 'q': query,
  33. 'oq': query,
  34. }
  35. for e in eval_xpath_list(dom, '//input[@type="hidden"]'):
  36. name = e.get('name')
  37. value = e.get('value')
  38. url_params[name] = value
  39. params['url'] = base_url + '?' + urlencode(url_params)
  40. params['cookies'] = response_index.cookies
  41. return params
  42. def response(resp):
  43. if resp.url.path.startswith('/verify'):
  44. raise SearxEngineAccessDeniedException()
  45. results = []
  46. dom = html.fromstring(resp.content.decode())
  47. for result_element in eval_xpath_list(dom, '//div[@id="searchpage-root"]//div[@class="Layout--left"]/div[@class="f2c528"]'):
  48. result_data = eval_xpath_getindex(
  49. result_element, './/div[@class="c8774a" or @class="e69e8d a11657"]', 0, default=None)
  50. if result_data is None:
  51. continue
  52. title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
  53. results.append(
  54. {
  55. 'url': title_element.get('href'),
  56. 'title': extract_text(title_element),
  57. 'content': extract_text(result_data),
  58. }
  59. )
  60. return results