Browse Source

[fix] startpage engine - avoid captcha

Startpage has introduced new anti-scraping measures that make SearXNG instances
run into captchas:

1. some arguments has been removed and a new `sc` has been added.
2. search path changed from `do/search` to `sp/search`
3. POST request is no longer needed

Closes: https://github.com/searxng/searxng/issues/692
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
f1f5e69c42
1 changed files with 9 additions and 8 deletions
  1. 9 8
      searx/engines/startpage.py

+ 9 - 8
searx/engines/startpage.py

@@ -3,6 +3,8 @@
  Startpage (Web)
  Startpage (Web)
 """
 """
 
 
+from urllib.parse import urlencode
+
 from lxml import html
 from lxml import html
 from dateutil import parser
 from dateutil import parser
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
@@ -33,7 +35,7 @@ supported_languages_url = 'https://www.startpage.com/do/settings'
 
 
 # search-url
 # search-url
 base_url = 'https://startpage.com/'
 base_url = 'https://startpage.com/'
-search_url = base_url + 'do/search'
+search_url = base_url + 'sp/search?'
 
 
 # specific xpath variables
 # specific xpath variables
 # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
 # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@@ -46,14 +48,12 @@ content_xpath = './/p[@class="w-gl__description"]'
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
 
 
-    params['url'] = search_url
-    params['method'] = 'POST'
-    params['data'] = {
+    args = {
         'query': query,
         'query': query,
         'page': params['pageno'],
         'page': params['pageno'],
         'cat': 'web',
         'cat': 'web',
-        'cmd': 'process_search',
-        'engine0': 'v1all',
+        # 'abp': "-1",
+        'sc': 'Mj4jZy61QETj20',
     }
     }
 
 
     # set language if specified
     # set language if specified
@@ -61,9 +61,10 @@ def request(query, params):
         lang_code = match_language(params['language'], supported_languages, fallback=None)
         lang_code = match_language(params['language'], supported_languages, fallback=None)
         if lang_code:
         if lang_code:
             language_name = supported_languages[lang_code]['alias']
             language_name = supported_languages[lang_code]['alias']
-            params['data']['language'] = language_name
-            params['data']['lui'] = language_name
+            args['language'] = language_name
+            args['lui'] = language_name
 
 
+    params['url'] = search_url + urlencode(args)
     return params
     return params