Browse Source

[fix] duckduckgo engine: "!ddg !g" do not redirect to google

* searx understand "!ddg !g time" as : send "!g time" to DDG
* !g a DDG bang for Google: DDG return a HTTP redirect to Google

This commit adds a the allows_redirect param not to follow HTTP redirect.

The DDG engine returns a empty result as before without HTTP redirect.
Alexandre Flament 4 years ago
parent
commit
c22d4c764c
3 changed files with 12 additions and 1 deletions
  1. 1 0
      docs/dev/engine_overview.rst
  2. 7 1
      searx/engines/duckduckgo.py
  3. 4 0
      searx/search/processors/online.py

+ 1 - 0
docs/dev/engine_overview.rst

@@ -169,6 +169,7 @@ headers             set         HTTP header information
 data                set         HTTP data information
 data                set         HTTP data information
 cookies             set         HTTP cookies
 cookies             set         HTTP cookies
 verify              bool        Performing SSL-Validity check
 verify              bool        Performing SSL-Validity check
+allow_redirects     bool        Follow redirects
 max_redirects       int         maximum redirects, hard limit
 max_redirects       int         maximum redirects, hard limit
 soft_max_redirects  int         maximum redirects, soft limit. Record an error but don't stop the engine
 soft_max_redirects  int         maximum redirects, soft limit. Record an error but don't stop the engine
 raise_for_httperror bool        True by default: raise an exception if the HTTP code of response is >= 300
 raise_for_httperror bool        True by default: raise an exception if the HTTP code of response is >= 300

+ 7 - 1
searx/engines/duckduckgo.py

@@ -75,12 +75,18 @@ def request(query, params):
         params['data']['kl'] = region_code
         params['data']['kl'] = region_code
         params['cookies']['kl'] = region_code
         params['cookies']['kl'] = region_code
 
 
-    params['data']['df'] = time_range_dict.get(params['time_range'], '')
+    if params['time_range'] in time_range_dict:
+        params['data']['df'] = time_range_dict[params['time_range']]
+
+    params['allow_redirects'] = False
     return params
     return params
 
 
 
 
 # get response from search-request
 # get response from search-request
 def response(resp):
 def response(resp):
+    if resp.status_code == 303:
+        return []
+
     # ping
     # ping
     headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
     headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
     get(url_ping, headers=headers_ping)
     get(url_ping, headers=headers_ping)

+ 4 - 0
searx/search/processors/online.py

@@ -73,6 +73,10 @@ class OnlineProcessor(EngineProcessor):
         if max_redirects:
         if max_redirects:
             request_args['max_redirects'] = max_redirects
             request_args['max_redirects'] = max_redirects
 
 
+        # allow_redirects
+        if 'allow_redirects' in params:
+            request_args['allow_redirects'] = params['allow_redirects']
+
         # soft_max_redirects
         # soft_max_redirects
         soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0)
         soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0)