duckduckgo.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. from urllib import urlencode
  2. from lxml.html import fromstring
  3. from searx.utils import html_to_text
  4. url = 'https://duckduckgo.com/html?{query}&s={offset}'
  5. locale = 'us-en'
  6. def request(query, params):
  7. offset = (params['pageno'] - 1) * 30
  8. q = urlencode({'q': query,
  9. 'l': locale})
  10. params['url'] = url.format(query=q, offset=offset)
  11. return params
  12. def response(resp):
  13. result_xpath = '//div[@class="results_links results_links_deep web-result"]'
  14. url_xpath = './/a[@class="large"]/@href'
  15. title_xpath = './/a[@class="large"]//text()'
  16. content_xpath = './/div[@class="snippet"]//text()'
  17. results = []
  18. doc = fromstring(resp.text)
  19. for r in doc.xpath(result_xpath):
  20. res_url = r.xpath(url_xpath)[-1]
  21. if not res_url:
  22. continue
  23. title = html_to_text(''.join(r.xpath(title_xpath)))
  24. content = html_to_text(''.join(r.xpath(content_xpath)))
  25. results.append({'title': title,
  26. 'content': content,
  27. 'url': res_url})
  28. return results
  29. #from json import loads
  30. #search_url = url + 'd.js?{query}&p=1&s={offset}'
  31. #
  32. #paging = True
  33. #
  34. #
  35. #def request(query, params):
  36. # offset = (params['pageno'] - 1) * 30
  37. # q = urlencode({'q': query,
  38. # 'l': locale})
  39. # params['url'] = search_url.format(query=q, offset=offset)
  40. # return params
  41. #
  42. #
  43. #def response(resp):
  44. # results = []
  45. # search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
  46. # for r in search_res:
  47. # if not r.get('t'):
  48. # continue
  49. # results.append({'title': r['t'],
  50. # 'content': html_to_text(r['a']),
  51. # 'url': r['u']})
  52. # return results