Browse Source

Merge pull request #2319 from dalf/multiple-proxies

[enh] Add multiple outgoing proxies
Alexandre Flament 4 years ago
parent
commit
aec6a2656c
7 changed files with 174 additions and 38 deletions
  1. 35 17
      docs/admin/settings.rst
  2. 10 8
      searx/engines/__init__.py
  3. 28 2
      searx/poolrequests.py
  4. 1 1
      searx/search.py
  5. 9 7
      searx/settings.yml
  6. 2 3
      searx/webapp.py
  7. 89 0
      tests/unit/test_poolrequests.py

+ 35 - 17
docs/admin/settings.rst

@@ -36,18 +36,26 @@ Global Settings
        image_proxy : False           # proxying image results through searx
        default_locale : ""           # default interface locale
 
-   # uncomment below section if you want to use a proxy
-
-   #outgoing_proxies :
-   #    http : http://127.0.0.1:8080
-   #    https: http://127.0.0.1:8080
-
-   # uncomment below section only if you have more than one network interface
-   # which can be the source of outgoing search requests
-
-   #source_ips:
-   #  - 1.1.1.1
-   #  - 1.1.1.2
+   outgoing: # communication with search engines
+       request_timeout : 2.0 # default timeout in seconds, can be override by engine
+       # max_request_timeout: 10.0 # the maximum timeout in seconds
+       useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator
+       pool_connections : 100 # Number of different hosts
+       pool_maxsize : 10 # Number of simultaneous requests by host
+
+       #proxies:
+       #    http:
+       #        - http://proxy1:8080
+       #        - http://proxy2:8080
+       #    https:
+       #        - http://proxy1:8080
+       #        - http://proxy2:8080
+       #        - socks5://user:password@proxy3:1080
+       #        - socks5h://user:password@proxy4:1080
+
+       #source_ips:
+       #    - 1.1.1.1
+       #    - 1.1.1.2
 
    locales:
        en : English
@@ -105,15 +113,16 @@ Global Settings
   code, like ``fr``, ``en``, ``de``.
 
 .. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies
-.. _PR SOCKS support: https://github.com/kennethreitz/requests/pull/478
+.. _PySocks: https://pypi.org/project/PySocks/
 
-``outgoing_proxies`` :
-  Define a proxy you wish to use, see `requests proxies`_.  SOCKS proxies are
-  not supported / see `PR SOCKS support`.
+``proxies`` :
+  Define one or more proxies you wish to use, see `requests proxies`_.
+  If there are more than one proxy for one protocol (http, https),
+  requests to the engines are distributed in a round-robin fashion.
 
 ``source_ips`` :
   If you use multiple network interfaces, define from which IP the requests must
-  be made.
+  be made. This parameter is ignored when ``proxies`` is set.
 
 ``locales`` :
   Locales codes and their names.  Available translations of searx interface.
@@ -139,6 +148,15 @@ Engine settings
      api_key : 'apikey'
      disabled : True
      language : en_US
+     #proxies:
+     #    http:
+     #        - http://proxy1:8080
+     #        - http://proxy2:8080
+     #    https:
+     #        - http://proxy1:8080
+     #        - http://proxy2:8080
+     #        - socks5://user:password@proxy3:1080
+     #        - socks5h://user:password@proxy4:1080
 
 ``name`` :
   Name that will be used across searx to define this engine.  In settings, on

+ 10 - 8
searx/engines/__init__.py

@@ -25,7 +25,7 @@ from operator import itemgetter
 from searx import settings
 from searx import logger
 from searx.data import ENGINES_LANGUAGES
-from searx.poolrequests import get
+from searx.poolrequests import get, get_proxy_cycles
 from searx.utils import load_module, match_language, get_engine_from_settings
 
 
@@ -79,16 +79,18 @@ def load_engine(engine_data):
         logger.exception('Cannot load engine "{}"'.format(engine_module))
         return None
 
-    for param_name in engine_data:
+    for param_name, param_value in engine_data.items():
         if param_name == 'engine':
-            continue
-        if param_name == 'categories':
-            if engine_data['categories'] == 'none':
+            pass
+        elif param_name == 'categories':
+            if param_value == 'none':
                 engine.categories = []
             else:
-                engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
-            continue
-        setattr(engine, param_name, engine_data[param_name])
+                engine.categories = list(map(str.strip, param_value.split(',')))
+        elif param_name == 'proxies':
+            engine.proxies = get_proxy_cycles(param_value)
+        else:
+            setattr(engine, param_name, param_value)
 
     for arg_name, arg_value in engine_default_args.items():
         if not hasattr(engine, arg_name):

+ 28 - 2
searx/poolrequests.py

@@ -111,6 +111,32 @@ def get_time_for_thread():
     return threadLocal.total_time
 
 
+def get_proxy_cycles(proxy_settings):
+    if not proxy_settings:
+        return None
+    # Backwards compatibility for single proxy in settings.yml
+    for protocol, proxy in proxy_settings.items():
+        if isinstance(proxy, str):
+            proxy_settings[protocol] = [proxy]
+
+    for protocol in proxy_settings:
+        proxy_settings[protocol] = cycle(proxy_settings[protocol])
+    return proxy_settings
+
+
+GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies'))
+
+
+def get_proxies(proxy_cycles):
+    if proxy_cycles:
+        return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()}
+    return None
+
+
+def get_global_proxies():
+    return get_proxies(GLOBAL_PROXY_CYCLES)
+
+
 def request(method, url, **kwargs):
     """same as requests/requests/api.py request(...)"""
     time_before_request = time()
@@ -119,8 +145,8 @@ def request(method, url, **kwargs):
     session = SessionSinglePool()
 
     # proxies
-    if kwargs.get('proxies') is None:
-        kwargs['proxies'] = settings['outgoing'].get('proxies')
+    if not kwargs.get('proxies'):
+        kwargs['proxies'] = get_global_proxies()
 
     # timeout
     if 'timeout' in kwargs:

+ 1 - 1
searx/search.py

@@ -119,7 +119,7 @@ def send_http_request(engine, request_params):
 
     # setting engine based proxies
     if hasattr(engine, 'proxies'):
-        request_args['proxies'] = engine.proxies
+        request_args['proxies'] = requests_lib.get_proxies(engine.proxies)
 
     # specific type of request (GET or POST)
     if request_params['method'] == 'GET':

+ 9 - 7
searx/settings.yml

@@ -63,13 +63,15 @@ outgoing: # communication with search engines
     pool_connections : 100 # Number of different hosts
     pool_maxsize : 10 # Number of simultaneous requests by host
 # uncomment below section if you want to use a proxy
-# see http://docs.python-requests.org/en/latest/user/advanced/#proxies
-# SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks
-#    proxies :
-#        http : socks5h://127.0.0.1:9050
-#        https: socks5h://127.0.0.1:9050
-#    using_tor_proxy : True
-#    extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy
+# see https://2.python-requests.org/en/latest/user/advanced/#proxies
+# SOCKS proxies are also supported: see https://2.python-requests.org/en/latest/user/advanced/#socks
+#    proxies:
+#        http:
+#            - http://proxy1:8080
+#            - http://proxy2:8080
+#        https:
+#            - http://proxy1:8080
+#            - http://proxy2:8080
 # uncomment below section only if you have more than one network interface
 # which can be the source of outgoing search requests
 #    source_ips:

+ 2 - 3
searx/webapp.py

@@ -78,6 +78,7 @@ from searx.plugins import plugins
 from searx.plugins.oa_doi_rewrite import get_doi_resolver
 from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
 from searx.answerers import answerers
+from searx.poolrequests import get_global_proxies
 
 
 # serve pages with HTTP/1.1
@@ -149,8 +150,6 @@ _category_names = (gettext('files'),
                    gettext('onions'),
                    gettext('science'))
 
-outgoing_proxies = settings['outgoing'].get('proxies') or None
-
 _flask_babel_get_translations = flask_babel.get_translations
 
 
@@ -905,7 +904,7 @@ def image_proxy():
                         stream=True,
                         timeout=settings['outgoing']['request_timeout'],
                         headers=headers,
-                        proxies=outgoing_proxies)
+                        proxies=get_global_proxies())
 
     if resp.status_code == 304:
         return '', resp.status_code

+ 89 - 0
tests/unit/test_poolrequests.py

@@ -0,0 +1,89 @@
+from unittest.mock import patch
+from requests.models import Response
+
+from searx.testing import SearxTestCase
+
+import searx.poolrequests
+from searx.poolrequests import get_proxy_cycles, get_proxies
+
+
+CONFIG = {'http': ['http://localhost:9090', 'http://localhost:9092'],
+          'https': ['http://localhost:9091', 'http://localhost:9093']}
+
+
+class TestProxy(SearxTestCase):
+
+    def test_noconfig(self):
+        cycles = get_proxy_cycles(None)
+        self.assertIsNone(cycles)
+
+        cycles = get_proxy_cycles(False)
+        self.assertIsNone(cycles)
+
+    def test_oldconfig(self):
+        config = {
+            'http': 'http://localhost:9090',
+            'https': 'http://localhost:9091',
+        }
+        cycles = get_proxy_cycles(config)
+        self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+        self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+        self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+        self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+
+    def test_one_proxy(self):
+        config = {
+            'http': ['http://localhost:9090'],
+            'https': ['http://localhost:9091'],
+        }
+        cycles = get_proxy_cycles(config)
+        self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+        self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+        self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+        self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+
+    def test_multiple_proxies(self):
+        cycles = get_proxy_cycles(CONFIG)
+        self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+        self.assertEqual(next(cycles['http']), 'http://localhost:9092')
+        self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+        self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+        self.assertEqual(next(cycles['https']), 'http://localhost:9093')
+        self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+
+    def test_getproxies_none(self):
+        self.assertIsNone(get_proxies(None))
+
+    def test_getproxies_config(self):
+        cycles = get_proxy_cycles(CONFIG)
+        self.assertEqual(get_proxies(cycles), {
+            'http': 'http://localhost:9090',
+            'https': 'http://localhost:9091'
+        })
+        self.assertEqual(get_proxies(cycles), {
+            'http': 'http://localhost:9092',
+            'https': 'http://localhost:9093'
+        })
+
+    @patch('searx.poolrequests.get_global_proxies')
+    def test_request(self, mock_get_global_proxies):
+        method = 'GET'
+        url = 'http://localhost'
+        custom_proxies = {
+            'https': 'http://localhost:1080'
+        }
+        global_proxies = {
+            'http': 'http://localhost:9092',
+            'https': 'http://localhost:9093'
+        }
+        mock_get_global_proxies.return_value = global_proxies
+
+        # check the global proxies usage
+        with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
+            searx.poolrequests.request(method, url)
+        mock_method.assert_called_once_with(method=method, url=url, proxies=global_proxies)
+
+        # check if the proxies parameter overrides the global proxies
+        with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
+            searx.poolrequests.request(method, url, proxies=custom_proxies)
+        mock_method.assert_called_once_with(method=method, url=url, proxies=custom_proxies)