Browse Source

rewrite duckduckgo engine and add comments

Thomas Pointhuber 10 years ago
parent
commit
e6e4de8ba0
2 changed files with 37 additions and 36 deletions
  1. 37 34
      searx/engines/duckduckgo.py
  2. 0 2
      searx/settings.yml

+ 37 - 34
searx/engines/duckduckgo.py

@@ -1,24 +1,48 @@
+## DuckDuckGo (Web)
+# 
+# @website     https://duckduckgo.com/
+# @provide-api yes (https://duckduckgo.com/api), but not all results from search-site
+# 
+# @using-api   no
+# @results     HTML (using search portal)
+# @stable      no (HTML can change)
+# @parse       url, title, content
+#
+# @todo        rewrite to api
+# @todo        language support
+
 from urllib import urlencode
 from urllib import urlencode
 from lxml.html import fromstring
 from lxml.html import fromstring
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 
-url = 'https://duckduckgo.com/html?{query}&s={offset}'
+# engine dependent config
+categories = ['general']
+paging = True
 locale = 'us-en'
 locale = 'us-en'
 
 
+# search-url
+url = 'https://duckduckgo.com/html?{query}&s={offset}'
+
+# specific xpath variables
+result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
+url_xpath = './/a[@class="large"]/@href'
+title_xpath = './/a[@class="large"]//text()'
+content_xpath = './/div[@class="snippet"]//text()'
 
 
+
+# do search-request
 def request(query, params):
 def request(query, params):
     offset = (params['pageno'] - 1) * 30
     offset = (params['pageno'] - 1) * 30
-    q = urlencode({'q': query,
-                   'l': locale})
-    params['url'] = url.format(query=q, offset=offset)
+
+    params['url'] = url.format(
+        query=urlencode({'q': query, 'l': locale}),
+        offset=offset)
+
     return params
     return params
 
 
 
 
+# get response from search-request
 def response(resp):
 def response(resp):
-    result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
-    url_xpath = './/a[@class="large"]/@href'
-    title_xpath = './/a[@class="large"]//text()'
-    content_xpath = './/div[@class="snippet"]//text()'
     results = []
     results = []
 
 
     doc = fromstring(resp.text)
     doc = fromstring(resp.text)
@@ -28,38 +52,17 @@ def response(resp):
             res_url = r.xpath(url_xpath)[-1]
             res_url = r.xpath(url_xpath)[-1]
         except:
         except:
             continue
             continue
+
         if not res_url:
         if not res_url:
             continue
             continue
+
         title = html_to_text(''.join(r.xpath(title_xpath)))
         title = html_to_text(''.join(r.xpath(title_xpath)))
         content = html_to_text(''.join(r.xpath(content_xpath)))
         content = html_to_text(''.join(r.xpath(content_xpath)))
+
+        # append result
         results.append({'title': title,
         results.append({'title': title,
                         'content': content,
                         'content': content,
                         'url': res_url})
                         'url': res_url})
 
 
+    # return results
     return results
     return results
-
-
-#from json import loads
-#search_url = url + 'd.js?{query}&p=1&s={offset}'
-#
-#paging = True
-#
-#
-#def request(query, params):
-#    offset = (params['pageno'] - 1) * 30
-#    q = urlencode({'q': query,
-#                   'l': locale})
-#    params['url'] = search_url.format(query=q, offset=offset)
-#    return params
-#
-#
-#def response(resp):
-#    results = []
-#    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
-#    for r in search_res:
-#        if not r.get('t'):
-#            continue
-#        results.append({'title': r['t'],
-#                       'content': html_to_text(r['a']),
-#                       'url': r['u']})
-#    return results

+ 0 - 2
searx/settings.yml

@@ -37,7 +37,6 @@ engines:
 
 
   - name : deviantart
   - name : deviantart
     engine : deviantart
     engine : deviantart
-    categories : images
     shortcut : da
     shortcut : da
     timeout: 3.0
     timeout: 3.0
 
 
@@ -47,7 +46,6 @@ engines:
 
 
   - name : duckduckgo
   - name : duckduckgo
     engine : duckduckgo
     engine : duckduckgo
-    locale : en-us
     shortcut : ddg
     shortcut : ddg
 
 
 # down - website is under criminal investigation by the UK
 # down - website is under criminal investigation by the UK