Browse Source

[enh] yandex engine added

Adam Tauber 9 years ago
parent
commit
fafc564874
2 changed files with 60 additions and 0 deletions
  1. 55 0
      searx/engines/yandex.py
  2. 5 0
      searx/settings.yml

+ 55 - 0
searx/engines/yandex.py

@@ -0,0 +1,55 @@
+"""
+ Yahoo (Web)
+
+ @website     https://yandex.ru/
+ @provide-api ?
+ @using-api   no
+ @results     HTML (using search portal)
+ @stable      no (HTML can change)
+ @parse       url, title, content
+"""
+
+from urllib import urlencode
+from lxml import html
+from searx.search import logger
+
+logger = logger.getChild('yandex engine')
+
+# engine dependent config
+categories = ['general']
+paging = True
+language_support = True  # TODO
+
+# search-url
+base_url = 'https://yandex.ru/'
+search_url = 'search/?{query}&p={page}'
+
+results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]'
+url_xpath = './/h2/a/@href'
+title_xpath = './/h2/a//text()'
+content_xpath = './/div[@class="serp-item__text"]//text()'
+
+
+def request(query, params):
+    params['url'] = base_url + search_url.format(page=params['pageno']-1,
+                                                 query=urlencode({'text': query}))
+    return params
+
+
+# get response from search-request
+def response(resp):
+    dom = html.fromstring(resp.text)
+    results = []
+
+    for result in dom.xpath(results_xpath):
+        try:
+            res = {'url': result.xpath(url_xpath)[0],
+                   'title': ''.join(result.xpath(title_xpath)),
+                   'content': ''.join(result.xpath(content_xpath))}
+        except:
+            logger.exception('yandex parse crash')
+            continue
+
+        results.append(res)
+
+    return results

+ 5 - 0
searx/settings.yml

@@ -274,6 +274,11 @@ engines:
     engine : yahoo
     shortcut : yh
 
+  - name : yandex
+    engine : yandex
+    shortcut : ya
+    disabled : True
+
   - name : yahoo news
     engine : yahoo_news
     shortcut : yhn