yandex.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Yahoo (Web)
  4. """
  5. from urllib.parse import urlencode, urlparse
  6. from lxml import html
  7. from searx import logger
  8. from searx.exceptions import SearxEngineCaptchaException
  9. logger = logger.getChild('yandex engine')
  10. # about
  11. about = {
  12. "website": 'https://yandex.ru/',
  13. "wikidata_id": 'Q5281',
  14. "official_api_documentation": "?",
  15. "use_official_api": False,
  16. "require_api_key": False,
  17. "results": 'HTML',
  18. }
  19. # engine dependent config
  20. categories = ['general']
  21. paging = True
  22. language_support = True # TODO
  23. default_tld = 'com'
  24. language_map = {'ru': 'ru',
  25. 'ua': 'ua',
  26. 'be': 'by',
  27. 'kk': 'kz',
  28. 'tr': 'com.tr'}
  29. # search-url
  30. base_url = 'https://yandex.{tld}/'
  31. search_url = 'search/?{query}&p={page}'
  32. results_xpath = '//li[@class="serp-item"]'
  33. url_xpath = './/h2/a/@href'
  34. title_xpath = './/h2/a//text()'
  35. content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m organic__text"]//text()'
  36. def request(query, params):
  37. lang = params['language'].split('-')[0]
  38. host = base_url.format(tld=language_map.get(lang) or default_tld)
  39. params['url'] = host + search_url.format(page=params['pageno'] - 1,
  40. query=urlencode({'text': query}))
  41. return params
  42. # get response from search-request
  43. def response(resp):
  44. resp_url = urlparse(resp.url)
  45. if resp_url.path.startswith('/showcaptcha'):
  46. raise SearxEngineCaptchaException()
  47. dom = html.fromstring(resp.text)
  48. results = []
  49. for result in dom.xpath(results_xpath):
  50. try:
  51. res = {'url': result.xpath(url_xpath)[0],
  52. 'title': ''.join(result.xpath(title_xpath)),
  53. 'content': ''.join(result.xpath(content_xpath))}
  54. except:
  55. logger.exception('yandex parse crash')
  56. continue
  57. results.append(res)
  58. return results