duckduckgo.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. """
  2. DuckDuckGo (Web)
  3. @website https://duckduckgo.com/
  4. @provide-api yes (https://duckduckgo.com/api),
  5. but not all results from search-site
  6. @using-api no
  7. @results HTML (using search portal)
  8. @stable no (HTML can change)
  9. @parse url, title, content
  10. @todo rewrite to api
  11. """
  12. from urllib import urlencode
  13. from lxml.html import fromstring
  14. from requests import get
  15. from json import loads
  16. from searx.engines.xpath import extract_text
  17. # engine dependent config
  18. categories = ['general']
  19. paging = True
  20. language_support = True
  21. supported_languages_url = 'https://duckduckgo.com/d2030.js'
  22. time_range_support = True
  23. # search-url
  24. url = 'https://duckduckgo.com/html?{query}&s={offset}'
  25. time_range_url = '&df={range}'
  26. time_range_dict = {'day': 'd',
  27. 'week': 'w',
  28. 'month': 'm'}
  29. # specific xpath variables
  30. result_xpath = '//div[@class="result results_links results_links_deep web-result "]' # noqa
  31. url_xpath = './/a[@class="result__a"]/@href'
  32. title_xpath = './/a[@class="result__a"]'
  33. content_xpath = './/a[@class="result__snippet"]'
  34. # do search-request
  35. def request(query, params):
  36. if params['time_range'] and params['time_range'] not in time_range_dict:
  37. return params
  38. offset = (params['pageno'] - 1) * 30
  39. # custom fixes for languages
  40. if params['language'] == 'all':
  41. locale = None
  42. elif params['language'][:2] == 'ja':
  43. locale = 'jp-jp'
  44. elif params['language'][:2] == 'sl':
  45. locale = 'sl-sl'
  46. elif params['language'] == 'zh-TW':
  47. locale = 'tw-tzh'
  48. elif params['language'] == 'zh-HK':
  49. locale = 'hk-tzh'
  50. elif params['language'][-2:] == 'SA':
  51. locale = 'xa-' + params['language'].split('-')[0]
  52. elif params['language'][-2:] == 'GB':
  53. locale = 'uk-' + params['language'].split('-')[0]
  54. else:
  55. locale = params['language'].split('-')
  56. if len(locale) == 2:
  57. # country code goes first
  58. locale = locale[1].lower() + '-' + locale[0].lower()
  59. else:
  60. # tries to get a country code from language
  61. locale = locale[0].lower()
  62. for lc in supported_languages:
  63. lc = lc.split('-')
  64. if locale == lc[0]:
  65. locale = lc[1].lower() + '-' + lc[0].lower()
  66. break
  67. if locale:
  68. params['url'] = url.format(
  69. query=urlencode({'q': query, 'kl': locale}), offset=offset)
  70. else:
  71. params['url'] = url.format(
  72. query=urlencode({'q': query}), offset=offset)
  73. if params['time_range'] in time_range_dict:
  74. params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
  75. return params
  76. # get response from search-request
  77. def response(resp):
  78. results = []
  79. doc = fromstring(resp.text)
  80. # parse results
  81. for r in doc.xpath(result_xpath):
  82. try:
  83. res_url = r.xpath(url_xpath)[-1]
  84. except:
  85. continue
  86. if not res_url:
  87. continue
  88. title = extract_text(r.xpath(title_xpath))
  89. content = extract_text(r.xpath(content_xpath))
  90. # append result
  91. results.append({'title': title,
  92. 'content': content,
  93. 'url': res_url})
  94. # return results
  95. return results
  96. # get supported languages from their site
  97. def _fetch_supported_languages(resp):
  98. # response is a js file with regions as an embedded object
  99. response_page = resp.text
  100. response_page = response_page[response_page.find('regions:{') + 8:]
  101. response_page = response_page[:response_page.find('}') + 1]
  102. regions_json = loads(response_page)
  103. supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
  104. return supported_languages