|
@@ -61,17 +61,7 @@ form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
|
|
|
|
|
|
|
|
|
def cache_vqd(query, value):
|
|
def cache_vqd(query, value):
|
|
- """Caches a ``vqd`` value from a query.
|
|
|
|
-
|
|
|
|
- The vqd value depends on the query string and is needed for the follow up
|
|
|
|
- pages or the images loaded by a XMLHttpRequest:
|
|
|
|
-
|
|
|
|
- - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
|
|
|
|
- - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
|
|
|
|
- - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
|
|
|
|
- - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
|
|
|
|
-
|
|
|
|
- """
|
|
|
|
|
|
+ """Caches a ``vqd`` value from a query."""
|
|
c = redisdb.client()
|
|
c = redisdb.client()
|
|
if c:
|
|
if c:
|
|
logger.debug("cache vqd value: %s", value)
|
|
logger.debug("cache vqd value: %s", value)
|
|
@@ -84,13 +74,43 @@ def get_vqd(query):
|
|
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
|
|
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
|
|
response.
|
|
response.
|
|
|
|
|
|
|
|
+ .. hint::
|
|
|
|
+
|
|
|
|
+ If an empty string is returned there are no results for the ``query`` and
|
|
|
|
+ therefore no ``vqd`` value.
|
|
|
|
+
|
|
|
|
+ DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
|
|
|
|
+ (such as extremely long search terms that are often sent by bots), no ``vqd``
|
|
|
|
+ value can be determined.
|
|
|
|
+
|
|
|
|
+ If SearXNG cannot determine a ``vqd`` value, then no request should go out
|
|
|
|
+ to DDG:
|
|
|
|
+
|
|
|
|
+ A request with a wrong ``vqd`` value leads to DDG temporarily putting
|
|
|
|
+ SearXNG's IP on a block list.
|
|
|
|
+
|
|
|
|
+ Requests from IPs in this block list run into timeouts.
|
|
|
|
+
|
|
|
|
+ Not sure, but it seems the block list is a sliding window: to get my IP rid
|
|
|
|
+ from the bot list I had to cool down my IP for 1h (send no requests from
|
|
|
|
+ that IP to DDG).
|
|
|
|
+
|
|
|
|
+ TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used
|
|
|
|
+ by all request to DDG:
|
|
|
|
+
|
|
|
|
+ - DuckDuckGo Lite: ``https://lite.duckduckgo.com/lite`` (POST form data)
|
|
|
|
+ - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
|
|
|
|
+ - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
|
|
|
|
+ - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
|
|
|
|
+ - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
|
|
|
|
+
|
|
"""
|
|
"""
|
|
- value = None
|
|
|
|
|
|
+ value = ''
|
|
c = redisdb.client()
|
|
c = redisdb.client()
|
|
if c:
|
|
if c:
|
|
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
|
|
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
|
|
value = c.get(key)
|
|
value = c.get(key)
|
|
- if value:
|
|
|
|
|
|
+ if value or value == b'':
|
|
value = value.decode('utf-8')
|
|
value = value.decode('utf-8')
|
|
logger.debug("re-use cached vqd value: %s", value)
|
|
logger.debug("re-use cached vqd value: %s", value)
|
|
return value
|
|
return value
|
|
@@ -102,9 +122,11 @@ def get_vqd(query):
|
|
if value:
|
|
if value:
|
|
value = value[0]
|
|
value = value[0]
|
|
else:
|
|
else:
|
|
- # some search terms do not have results and therefore no vqd value
|
|
|
|
|
|
+ # Some search terms do not have results and therefore no vqd value. If
|
|
|
|
+ # no vqd value can be determined for the search term, an empty string is
|
|
|
|
+ # chached.
|
|
value = ''
|
|
value = ''
|
|
- logger.debug("new vqd value: %s", value)
|
|
|
|
|
|
+ logger.debug("new vqd value: '%s'", value)
|
|
cache_vqd(query, value)
|
|
cache_vqd(query, value)
|
|
return value
|
|
return value
|
|
|
|
|
|
@@ -204,6 +226,13 @@ ddg_lang_map = {
|
|
|
|
|
|
def request(query, params):
|
|
def request(query, params):
|
|
|
|
|
|
|
|
+ # request needs a vqd argument
|
|
|
|
+ vqd = get_vqd(query)
|
|
|
|
+ if not vqd:
|
|
|
|
+ # some search terms do not have results and therefore no vqd value
|
|
|
|
+ params['url'] = None
|
|
|
|
+ return params
|
|
|
|
+
|
|
# quote ddg bangs
|
|
# quote ddg bangs
|
|
query_parts = []
|
|
query_parts = []
|
|
# for val in re.split(r'(\s+)', query):
|
|
# for val in re.split(r'(\s+)', query):
|
|
@@ -227,6 +256,7 @@ def request(query, params):
|
|
# link again and again ..
|
|
# link again and again ..
|
|
|
|
|
|
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
|
|
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
|
|
|
|
+ params['data']['vqd'] = vqd
|
|
|
|
|
|
# initial page does not have an offset
|
|
# initial page does not have an offset
|
|
if params['pageno'] == 2:
|
|
if params['pageno'] == 2:
|
|
@@ -248,9 +278,6 @@ def request(query, params):
|
|
params['data']['api'] = form_data.get('api', 'd.js')
|
|
params['data']['api'] = form_data.get('api', 'd.js')
|
|
params['data']['nextParams'] = form_data.get('nextParams', '')
|
|
params['data']['nextParams'] = form_data.get('nextParams', '')
|
|
params['data']['v'] = form_data.get('v', 'l')
|
|
params['data']['v'] = form_data.get('v', 'l')
|
|
-
|
|
|
|
- # request needs a vqd argument
|
|
|
|
- params['data']['vqd'] = get_vqd(query)
|
|
|
|
params['headers']['Referer'] = 'https://lite.duckduckgo.com/'
|
|
params['headers']['Referer'] = 'https://lite.duckduckgo.com/'
|
|
|
|
|
|
params['data']['kl'] = eng_region
|
|
params['data']['kl'] = eng_region
|