duckduckgo.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. """
  2. DuckDuckGo (Web)
  3. @website https://duckduckgo.com/
  4. @provide-api yes (https://duckduckgo.com/api),
  5. but not all results from search-site
  6. @using-api no
  7. @results HTML (using search portal)
  8. @stable no (HTML can change)
  9. @parse url, title, content
  10. @todo rewrite to api
  11. """
  12. from urllib import urlencode
  13. from lxml.html import fromstring
  14. from searx.engines.xpath import extract_text
  15. # engine dependent config
  16. categories = ['general']
  17. paging = True
  18. language_support = True
  19. # search-url
  20. url = 'https://duckduckgo.com/html?{query}&s={offset}'
  21. # specific xpath variables
  22. result_xpath = '//div[@class="result results_links results_links_deep web-result "]' # noqa
  23. url_xpath = './/a[@class="result__a"]/@href'
  24. title_xpath = './/a[@class="result__a"]'
  25. content_xpath = './/a[@class="result__snippet"]'
  26. # do search-request
  27. def request(query, params):
  28. offset = (params['pageno'] - 1) * 30
  29. if params['language'] == 'all':
  30. locale = None
  31. else:
  32. locale = params['language'].split('_')
  33. if len(locale) == 2:
  34. # country code goes first
  35. locale = locale[1].lower() + '-' + locale[0].lower()
  36. else:
  37. # doesn't actually do anything because ddg requires both country and language
  38. locale = locale[0].lower()
  39. if locale:
  40. params['url'] = url.format(
  41. query=urlencode({'q': query, 'kl': locale}), offset=offset)
  42. else:
  43. params['url'] = url.format(
  44. query=urlencode({'q': query}), offset=offset)
  45. return params
  46. # get response from search-request
  47. def response(resp):
  48. results = []
  49. doc = fromstring(resp.text)
  50. # parse results
  51. for r in doc.xpath(result_xpath):
  52. try:
  53. res_url = r.xpath(url_xpath)[-1]
  54. except:
  55. continue
  56. if not res_url:
  57. continue
  58. title = extract_text(r.xpath(title_xpath))
  59. content = extract_text(r.xpath(content_xpath))
  60. # append result
  61. results.append({'title': title,
  62. 'content': content,
  63. 'url': res_url})
  64. # return results
  65. return results