Browse Source

[fix] fix duckduckgo engine

- remove paging support: a "vqd" parameter is required between each request. This parameter is uniq for each request
- update the URL (no redirect), use the POST method
- language support: works if there is no more than request per minute, otherwise it is ignored !
Alexandre Flament 4 years ago
parent
commit
cfd21bc475
1 changed files with 13 additions and 30 deletions
  1. 13 30
      searx/engines/duckduckgo.py

+ 13 - 30
searx/engines/duckduckgo.py

@@ -21,7 +21,7 @@ from searx.utils import extract_text, match_language, eval_xpath
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
-paging = True
+paging = False
 language_support = True
 language_support = True
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 supported_languages_url = 'https://duckduckgo.com/util/u172.js'
 time_range_support = True
 time_range_support = True
@@ -37,9 +37,7 @@ language_aliases = {
 }
 }
 
 
 # search-url
 # search-url
-url = 'https://duckduckgo.com/html?{query}&s={offset}&dc={dc_param}'
-time_range_url = '&df={range}'
-
+url = 'https://html.duckduckgo.com/html'
 time_range_dict = {'day': 'd',
 time_range_dict = {'day': 'd',
                    'week': 'w',
                    'week': 'w',
                    'month': 'm'}
                    'month': 'm'}
@@ -65,36 +63,21 @@ def get_region_code(lang, lang_list=[]):
 
 
 
 
 def request(query, params):
 def request(query, params):
-    if params['time_range'] not in (None, 'None', '') and params['time_range'] not in time_range_dict:
+    if params['time_range'] is not None and params['time_range'] not in time_range_dict:
         return params
         return params
 
 
-    offset = (params['pageno'] - 1) * 30
+    params['url'] = url
+    params['method'] = 'POST'
+    params['data']['b'] = ''
+    params['data']['q'] = query
+    params['data']['df'] = ''
 
 
     region_code = get_region_code(params['language'], supported_languages)
     region_code = get_region_code(params['language'], supported_languages)
-    params['url'] = 'https://duckduckgo.com/html/'
-    if params['pageno'] > 1:
-        params['method'] = 'POST'
-        params['data']['q'] = query
-        params['data']['s'] = offset
-        params['data']['dc'] = 30
-        params['data']['nextParams'] = ''
-        params['data']['v'] = 'l'
-        params['data']['o'] = 'json'
-        params['data']['api'] = '/d.js'
-        if params['time_range'] in time_range_dict:
-            params['data']['df'] = time_range_dict[params['time_range']]
-        if region_code:
-            params['data']['kl'] = region_code
-    else:
-        if region_code:
-            params['url'] = url.format(
-                query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
-        else:
-            params['url'] = url.format(
-                query=urlencode({'q': query}), offset=offset, dc_param=offset)
-
-        if params['time_range'] in time_range_dict:
-            params['url'] += time_range_url.format(range=time_range_dict[params['time_range']])
+    if region_code:
+        params['data']['kl'] = region_code
+        params['cookies']['kl'] = region_code
+    if params['time_range'] in time_range_dict:
+        params['data']['df'] = time_range_dict[params['time_range']]
 
 
     return params
     return params