yandex.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Yahoo (Web)
  4. """
  5. from urllib.parse import urlencode, urlparse
  6. from lxml import html
  7. from searx import logger
  8. from searx.exceptions import SearxEngineCaptchaException
  9. logger = logger.getChild('yandex engine')
  10. # about
  11. about = {
  12. "website": 'https://yandex.ru/',
  13. "wikidata_id": 'Q5281',
  14. "official_api_documentation": "?",
  15. "use_official_api": False,
  16. "require_api_key": False,
  17. "results": 'HTML',
  18. }
  19. # engine dependent config
  20. categories = ['general']
  21. paging = True
  22. default_tld = 'com'
  23. language_map = {'ru': 'ru',
  24. 'ua': 'ua',
  25. 'be': 'by',
  26. 'kk': 'kz',
  27. 'tr': 'com.tr'}
  28. # search-url
  29. base_url = 'https://yandex.{tld}/'
  30. search_url = 'search/?{query}&p={page}'
  31. results_xpath = '//li[@class="serp-item"]'
  32. url_xpath = './/h2/a/@href'
  33. title_xpath = './/h2/a//text()'
  34. content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m organic__text"]//text()'
  35. def request(query, params):
  36. lang = params['language'].split('-')[0]
  37. host = base_url.format(tld=language_map.get(lang) or default_tld)
  38. params['url'] = host + search_url.format(page=params['pageno'] - 1,
  39. query=urlencode({'text': query}))
  40. return params
  41. # get response from search-request
  42. def response(resp):
  43. resp_url = urlparse(resp.url)
  44. if resp_url.path.startswith('/showcaptcha'):
  45. raise SearxEngineCaptchaException()
  46. dom = html.fromstring(resp.text)
  47. results = []
  48. for result in dom.xpath(results_xpath):
  49. try:
  50. res = {'url': result.xpath(url_xpath)[0],
  51. 'title': ''.join(result.xpath(title_xpath)),
  52. 'content': ''.join(result.xpath(content_xpath))}
  53. except:
  54. logger.exception('yandex parse crash')
  55. continue
  56. results.append(res)
  57. return results