duckduckgo.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. from urllib import urlencode
  2. from lxml.html import fromstring
  3. from searx.utils import html_to_text
  4. url = 'https://duckduckgo.com/html?{query}&s={offset}'
  5. locale = 'us-en'
  6. def request(query, params):
  7. offset = (params['pageno'] - 1) * 30
  8. q = urlencode({'q': query,
  9. 'l': locale})
  10. params['url'] = url.format(query=q, offset=offset)
  11. return params
  12. def response(resp):
  13. result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa
  14. url_xpath = './/a[@class="large"]/@href'
  15. title_xpath = './/a[@class="large"]//text()'
  16. content_xpath = './/div[@class="snippet"]//text()'
  17. results = []
  18. doc = fromstring(resp.text)
  19. for r in doc.xpath(result_xpath):
  20. try:
  21. res_url = r.xpath(url_xpath)[-1]
  22. except:
  23. continue
  24. if not res_url:
  25. continue
  26. title = html_to_text(''.join(r.xpath(title_xpath)))
  27. content = html_to_text(''.join(r.xpath(content_xpath)))
  28. results.append({'title': title,
  29. 'content': content,
  30. 'url': res_url})
  31. return results
  32. #from json import loads
  33. #search_url = url + 'd.js?{query}&p=1&s={offset}'
  34. #
  35. #paging = True
  36. #
  37. #
  38. #def request(query, params):
  39. # offset = (params['pageno'] - 1) * 30
  40. # q = urlencode({'q': query,
  41. # 'l': locale})
  42. # params['url'] = search_url.format(query=q, offset=offset)
  43. # return params
  44. #
  45. #
  46. #def response(resp):
  47. # results = []
  48. # search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
  49. # for r in search_res:
  50. # if not r.get('t'):
  51. # continue
  52. # results.append({'title': r['t'],
  53. # 'content': html_to_text(r['a']),
  54. # 'url': r['u']})
  55. # return results