Browse Source

Merge pull request #94 from return42/pylint-network

Pylint searx.network
Alexandre Flament 3 years ago
parent
commit
7c76cef746
4 changed files with 163 additions and 100 deletions
  1. 22 13
      searx/network/__init__.py
  2. 64 41
      searx/network/client.py
  3. 49 32
      searx/network/network.py
  4. 28 14
      searx/network/raise_for_httperror.py

+ 22 - 13
searx/network/__init__.py

@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-module-docstring, missing-function-docstring, global-statement
 
 import asyncio
 import threading
@@ -31,29 +33,33 @@ except ImportError:
             self._count.release()
 
         def get(self):
-            if not self._count.acquire(True):
+            if not self._count.acquire(True):  #pylint: disable=consider-using-with
                 raise Empty
             return self._queue.popleft()
 
 
 THREADLOCAL = threading.local()
-
+"""Thread-local data is data for thread specific values."""
 
 def reset_time_for_thread():
+    global THREADLOCAL
     THREADLOCAL.total_time = 0
 
 
 def get_time_for_thread():
     """returns thread's total time or None"""
+    global THREADLOCAL
     return THREADLOCAL.__dict__.get('total_time')
 
 
 def set_timeout_for_thread(timeout, start_time=None):
+    global THREADLOCAL
     THREADLOCAL.timeout = timeout
     THREADLOCAL.start_time = start_time
 
 
 def set_context_network_name(network_name):
+    global THREADLOCAL
     THREADLOCAL.network = get_network(network_name)
 
 
@@ -62,11 +68,13 @@ def get_context_network():
 
     If unset, return value from :py:obj:`get_network`.
     """
+    global THREADLOCAL
     return THREADLOCAL.__dict__.get('network') or get_network()
 
 
 def request(method, url, **kwargs):
     """same as requests/requests/api.py request(...)"""
+    global THREADLOCAL
     time_before_request = default_timer()
 
     # timeout (httpx)
@@ -153,18 +161,17 @@ def patch(url, data=None, **kwargs):
 def delete(url, **kwargs):
     return request('delete', url, **kwargs)
 
-
-async def stream_chunk_to_queue(network, q, method, url, **kwargs):
+async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
     try:
         async with network.stream(method, url, **kwargs) as response:
-            q.put(response)
+            queue.put(response)
             async for chunk in response.aiter_bytes(65536):
                 if len(chunk) > 0:
-                    q.put(chunk)
+                    queue.put(chunk)
     except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e:
-        q.put(e)
+        queue.put(e)
     finally:
-        q.put(None)
+        queue.put(None)
 
 
 def stream(method, url, **kwargs):
@@ -179,13 +186,15 @@ def stream(method, url, **kwargs):
     httpx.Client.stream requires to write the httpx.HTTPTransport version of the
     the httpx.AsyncHTTPTransport declared above.
     """
-    q = SimpleQueue()
-    future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(get_network(), q, method, url, **kwargs),
-                                              get_loop())
-    chunk_or_exception = q.get()
+    queue = SimpleQueue()
+    future = asyncio.run_coroutine_threadsafe(
+        stream_chunk_to_queue(get_network(), queue, method, url, **kwargs),
+        get_loop()
+    )
+    chunk_or_exception = queue.get()
     while chunk_or_exception is not None:
         if isinstance(chunk_or_exception, Exception):
             raise chunk_or_exception
         yield chunk_or_exception
-        chunk_or_exception = q.get()
+        chunk_or_exception = queue.get()
     return future.result()

+ 64 - 41
searx/network/client.py

@@ -1,14 +1,19 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-module-docstring, missing-function-docstring, global-statement
 
 import asyncio
 import logging
 import threading
-
 import httpcore
 import httpx
 from httpx_socks import AsyncProxyTransport
-from python_socks import parse_proxy_url
-import python_socks._errors
+from python_socks import (
+    parse_proxy_url,
+    ProxyConnectionError,
+    ProxyTimeoutError,
+    ProxyError
+)
 
 from searx import logger
 
@@ -30,7 +35,11 @@ TRANSPORT_KWARGS = {
 }
 
 
-async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL):
+# pylint: disable=protected-access
+async def close_connections_for_url(
+        connection_pool: httpcore.AsyncConnectionPool,
+        url: httpcore._utils.URL ):
+
     origin = httpcore._utils.url_to_origin(url)
     logger.debug('Drop connections for %r', origin)
     connections_to_close = connection_pool._connections_for_origin(origin)
@@ -40,6 +49,7 @@ async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPoo
             await connection.aclose()
         except httpcore.NetworkError as e:
             logger.warning('Error closing an existing connection', exc_info=e)
+# pylint: enable=protected-access
 
 
 def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False):
@@ -80,9 +90,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport):
             retry -= 1
             try:
                 return await super().arequest(method, url, headers, stream, ext)
-            except (python_socks._errors.ProxyConnectionError,
-                    python_socks._errors.ProxyTimeoutError,
-                    python_socks._errors.ProxyError) as e:
+            except (ProxyConnectionError, ProxyTimeoutError, ProxyError) as e:
                 raise httpcore.ProxyError(e)
             except OSError as e:
                 # socket.gaierror when DNS resolution fails
@@ -114,7 +122,7 @@ class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport):
             except httpcore.CloseError as e:
                 # httpcore.CloseError: [Errno 104] Connection reset by peer
                 # raised by _keepalive_sweep()
-                #   from https://github.com/encode/httpcore/blob/4b662b5c42378a61e54d673b4c949420102379f5/httpcore/_backends/asyncio.py#L198  # noqa
+                #   from https://github.com/encode/httpcore/blob/4b662b5c42378a61e54d673b4c949420102379f5/httpcore/_backends/asyncio.py#L198  # pylint: disable=line-too-long
                 await close_connections_for_url(self._pool, url)
                 logger.warning('httpcore.CloseError: retry', exc_info=e)
                 # retry
@@ -129,6 +137,7 @@ class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport):
 
 
 def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries):
+    global TRANSPORT_KWARGS
     # support socks5h (requests compatibility):
     # https://requests.readthedocs.io/en/master/user/advanced/#socks
     # socks5://   hostname is resolved on client side
@@ -141,29 +150,35 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit
 
     proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)
     verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify
-    return AsyncProxyTransportFixed(proxy_type=proxy_type, proxy_host=proxy_host, proxy_port=proxy_port,
-                                    username=proxy_username, password=proxy_password,
-                                    rdns=rdns,
-                                    loop=get_loop(),
-                                    verify=verify,
-                                    http2=http2,
-                                    local_address=local_address,
-                                    max_connections=limit.max_connections,
-                                    max_keepalive_connections=limit.max_keepalive_connections,
-                                    keepalive_expiry=limit.keepalive_expiry,
-                                    retries=retries,
-                                    **TRANSPORT_KWARGS)
+    return AsyncProxyTransportFixed(
+        proxy_type=proxy_type, proxy_host=proxy_host, proxy_port=proxy_port,
+        username=proxy_username, password=proxy_password,
+        rdns=rdns,
+        loop=get_loop(),
+        verify=verify,
+        http2=http2,
+        local_address=local_address,
+        max_connections=limit.max_connections,
+        max_keepalive_connections=limit.max_keepalive_connections,
+        keepalive_expiry=limit.keepalive_expiry,
+        retries=retries,
+        **TRANSPORT_KWARGS
+    )
 
 
 def get_transport(verify, http2, local_address, proxy_url, limit, retries):
+    global TRANSPORT_KWARGS
     verify = get_sslcontexts(None, None, True, False, http2) if verify is True else verify
-    return AsyncHTTPTransportFixed(verify=verify,
-                                   http2=http2,
-                                   local_address=local_address,
-                                   proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,
-                                   limits=limit,
-                                   retries=retries,
-                                   **TRANSPORT_KWARGS)
+    return AsyncHTTPTransportFixed(
+        # pylint: disable=protected-access
+        verify=verify,
+        http2=http2,
+        local_address=local_address,
+        proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,
+        limits=limit,
+        retries=retries,
+        **TRANSPORT_KWARGS
+    )
 
 
 def iter_proxies(proxies):
@@ -175,24 +190,32 @@ def iter_proxies(proxies):
             yield pattern, proxy_url
 
 
-def new_client(enable_http, verify, enable_http2,
-               max_connections, max_keepalive_connections, keepalive_expiry,
-               proxies, local_address, retries, max_redirects):
-    limit = httpx.Limits(max_connections=max_connections,
-                         max_keepalive_connections=max_keepalive_connections,
-                         keepalive_expiry=keepalive_expiry)
+def new_client(
+        # pylint: disable=too-many-arguments
+        enable_http, verify, enable_http2,
+        max_connections, max_keepalive_connections, keepalive_expiry,
+        proxies, local_address, retries, max_redirects  ):
+    limit = httpx.Limits(
+        max_connections=max_connections,
+        max_keepalive_connections=max_keepalive_connections,
+        keepalive_expiry=keepalive_expiry
+    )
     # See https://www.python-httpx.org/advanced/#routing
     mounts = {}
     for pattern, proxy_url in iter_proxies(proxies):
         if not enable_http and (pattern == 'http' or pattern.startswith('http://')):
             continue
-        if proxy_url.startswith('socks4://') \
-           or proxy_url.startswith('socks5://') \
-           or proxy_url.startswith('socks5h://'):
-            mounts[pattern] = get_transport_for_socks_proxy(verify, enable_http2, local_address, proxy_url, limit,
-                                                            retries)
+        if (proxy_url.startswith('socks4://')
+           or proxy_url.startswith('socks5://')
+            or proxy_url.startswith('socks5h://')
+        ):
+            mounts[pattern] = get_transport_for_socks_proxy(
+                verify, enable_http2, local_address, proxy_url, limit, retries
+            )
         else:
-            mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries)
+            mounts[pattern] = get_transport(
+                verify, enable_http2, local_address, proxy_url, limit, retries
+            )
 
     if not enable_http:
         mounts['http://'] = AsyncHTTPTransportNoHttp()
@@ -217,12 +240,12 @@ def init():
         LOOP = asyncio.new_event_loop()
         LOOP.run_forever()
 
-    th = threading.Thread(
+    thread = threading.Thread(
         target=loop_thread,
         name='asyncio_loop',
         daemon=True,
     )
-    th.start()
+    thread.start()
 
 
 init()

+ 49 - 32
searx/network/network.py

@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=global-statement
+# pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring
 
 import atexit
 import asyncio
@@ -34,23 +37,28 @@ ADDRESS_MAPPING = {
 
 class Network:
 
-    __slots__ = ('enable_http', 'verify', 'enable_http2',
-                 'max_connections', 'max_keepalive_connections', 'keepalive_expiry',
-                 'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error',
-                 '_local_addresses_cycle', '_proxies_cycle', '_clients')
-
-    def __init__(self,
-                 enable_http=True,
-                 verify=True,
-                 enable_http2=False,
-                 max_connections=None,
-                 max_keepalive_connections=None,
-                 keepalive_expiry=None,
-                 proxies=None,
-                 local_addresses=None,
-                 retries=0,
-                 retry_on_http_error=None,
-                 max_redirects=30):
+    __slots__ = (
+        'enable_http', 'verify', 'enable_http2',
+        'max_connections', 'max_keepalive_connections', 'keepalive_expiry',
+        'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error',
+        '_local_addresses_cycle', '_proxies_cycle', '_clients'
+    )
+
+    def __init__(
+            # pylint: disable=too-many-arguments
+            self,
+            enable_http=True,
+            verify=True,
+            enable_http2=False,
+            max_connections=None,
+            max_keepalive_connections=None,
+            keepalive_expiry=None,
+            proxies=None,
+            local_addresses=None,
+            retries=0,
+            retry_on_http_error=None,
+            max_redirects=30 ):
+
         self.enable_http = enable_http
         self.verify = verify
         self.enable_http2 = enable_http2
@@ -81,7 +89,7 @@ class Network:
         local_addresses = self.local_addresses
         if not local_addresses:
             return
-        elif isinstance(local_addresses, str):
+        if isinstance(local_addresses, str):
             local_addresses = [local_addresses]
         for address in local_addresses:
             yield address
@@ -119,6 +127,7 @@ class Network:
         for pattern, proxy_urls in self.iter_proxies():
             proxy_settings[pattern] = cycle(proxy_urls)
         while True:
+            # pylint: disable=stop-iteration-return
             yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items())
 
     def get_client(self, verify=None, max_redirects=None):
@@ -128,16 +137,18 @@ class Network:
         proxies = next(self._proxies_cycle)  # is a tuple so it can be part of the key
         key = (verify, max_redirects, local_address, proxies)
         if key not in self._clients or self._clients[key].is_closed:
-            self._clients[key] = new_client(self.enable_http,
-                                            verify,
-                                            self.enable_http2,
-                                            self.max_connections,
-                                            self.max_keepalive_connections,
-                                            self.keepalive_expiry,
-                                            dict(proxies),
-                                            local_address,
-                                            0,
-                                            max_redirects)
+            self._clients[key] = new_client(
+                self.enable_http,
+                verify,
+                self.enable_http2,
+                self.max_connections,
+                self.max_keepalive_connections,
+                self.keepalive_expiry,
+                dict(proxies),
+                local_address,
+                0,
+                max_redirects
+            )
         return self._clients[key]
 
     async def aclose(self):
@@ -158,9 +169,11 @@ class Network:
         return kwargs_clients
 
     def is_valid_respones(self, response):
-        if (self.retry_on_http_error is True and 400 <= response.status_code <= 599) \
-           or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error) \
-           or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error):
+        # pylint: disable=too-many-boolean-expressions
+        if ((self.retry_on_http_error is True and 400 <= response.status_code <= 599)
+            or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error)
+            or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error)
+        ):
             return False
         return True
 
@@ -194,6 +207,7 @@ class Network:
 
     @classmethod
     async def aclose_all(cls):
+        global NETWORKS
         await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False)
 
 
@@ -203,8 +217,10 @@ def get_network(name=None):
 
 
 def initialize(settings_engines=None, settings_outgoing=None):
+    # pylint: disable=import-outside-toplevel)
     from searx.engines import engines
     from searx import settings
+    # pylint: enable=import-outside-toplevel)
 
     global NETWORKS
 
@@ -212,7 +228,7 @@ def initialize(settings_engines=None, settings_outgoing=None):
     settings_outgoing = settings_outgoing or settings.get('outgoing')
 
     # default parameters for AsyncHTTPTransport
-    # see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121  # noqa
+    # see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121  # pylint: disable=line-too-long
     default_params = {
         'enable_http': False,
         'verify': True,
@@ -290,6 +306,7 @@ def done():
     Note: since Network.aclose has to be async, it is not possible to call this method on Network.__del__
     So Network.aclose is called here using atexit.register
     """
+    global NETWORKS
     try:
         loop = get_loop()
         if loop:

+ 28 - 14
searx/network/raise_for_httperror.py

@@ -1,17 +1,23 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-function-docstring
+"""Raise exception for an HTTP response is an error.
+
 """
-Raise exception for an HTTP response is an error.
-"""
-from searx.exceptions import (SearxEngineCaptchaException, SearxEngineTooManyRequestsException,
-                              SearxEngineAccessDeniedException)
 
+from searx.exceptions import (
+    SearxEngineCaptchaException,
+    SearxEngineTooManyRequestsException,
+    SearxEngineAccessDeniedException,
+)
 
 def is_cloudflare_challenge(resp):
     if resp.status_code in [429, 503]:
-        if ('__cf_chl_jschl_tk__=' in resp.text)\
-           or ('/cdn-cgi/challenge-platform/' in resp.text
-               and 'orchestrate/jsch/v1' in resp.text
-               and 'window._cf_chl_enter(' in resp.text):
+        if (('__cf_chl_jschl_tk__=' in resp.text)
+            or ('/cdn-cgi/challenge-platform/' in resp.text
+                and 'orchestrate/jsch/v1' in resp.text
+                and 'window._cf_chl_enter(' in resp.text
+            )):
             return True
     if resp.status_code == 403 and '__cf_chl_captcha_tk__=' in resp.text:
         return True
@@ -27,15 +33,21 @@ def raise_for_cloudflare_captcha(resp):
         if is_cloudflare_challenge(resp):
             # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
             # suspend for 2 weeks
-            raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15)
+            raise SearxEngineCaptchaException(
+                message='Cloudflare CAPTCHA',
+                suspended_time=3600 * 24 * 15
+            )
 
         if is_cloudflare_firewall(resp):
-            raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24)
+            raise SearxEngineAccessDeniedException(
+                message='Cloudflare Firewall', suspended_time=3600 * 24
+            )
 
 
 def raise_for_recaptcha(resp):
-    if resp.status_code == 503 \
-       and '"https://www.google.com/recaptcha/' in resp.text:
+    if (resp.status_code == 503
+        and '"https://www.google.com/recaptcha/' in resp.text
+    ):
         raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7)
 
 
@@ -59,8 +71,10 @@ def raise_for_httperror(resp):
     if resp.status_code and resp.status_code >= 400:
         raise_for_captcha(resp)
         if resp.status_code in (402, 403):
-            raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code),
-                                                   suspended_time=3600 * 24)
+            raise SearxEngineAccessDeniedException(
+                message='HTTP error ' + str(resp.status_code),
+                suspended_time=3600 * 24
+            )
         if resp.status_code == 429:
             raise SearxEngineTooManyRequestsException()
         resp.raise_for_status()