yandex.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. """
  2. Yahoo (Web)
  3. @website https://yandex.ru/
  4. @provide-api ?
  5. @using-api no
  6. @results HTML (using search portal)
  7. @stable no (HTML can change)
  8. @parse url, title, content
  9. """
  10. from urllib import urlencode
  11. from lxml import html
  12. from searx.search import logger
  13. logger = logger.getChild('yandex engine')
  14. # engine dependent config
  15. categories = ['general']
  16. paging = True
  17. language_support = True # TODO
  18. # search-url
  19. base_url = 'https://yandex.ru/'
  20. search_url = 'search/?{query}&p={page}'
  21. results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]'
  22. url_xpath = './/h2/a/@href'
  23. title_xpath = './/h2/a//text()'
  24. content_xpath = './/div[@class="serp-item__text"]//text()'
  25. def request(query, params):
  26. params['url'] = base_url + search_url.format(page=params['pageno']-1,
  27. query=urlencode({'text': query}))
  28. return params
  29. # get response from search-request
  30. def response(resp):
  31. dom = html.fromstring(resp.text)
  32. results = []
  33. for result in dom.xpath(results_xpath):
  34. try:
  35. res = {'url': result.xpath(url_xpath)[0],
  36. 'title': ''.join(result.xpath(title_xpath)),
  37. 'content': ''.join(result.xpath(content_xpath))}
  38. except:
  39. logger.exception('yandex parse crash')
  40. continue
  41. results.append(res)
  42. return results