| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236 | # SPDX-License-Identifier: AGPL-3.0-or-later# pylint: disable=missing-module-docstring, global-statementimport asyncioimport loggingimport randomfrom ssl import SSLContextimport threadingfrom typing import Any, Dictimport httpxfrom httpx_socks import AsyncProxyTransportfrom python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyErrorfrom searx import logger# Optional uvloop (support Python 3.6)try:    import uvloopexcept ImportError:    passelse:    uvloop.install()logger = logger.getChild('searx.network.client')LOOP = NoneSSLCONTEXTS: Dict[Any, SSLContext] = {}def shuffle_ciphers(ssl_context):    """Shuffle httpx's default ciphers of a SSL context randomly.    From `What Is TLS Fingerprint and How to Bypass It`_    > When implementing TLS fingerprinting, servers can't operate based on a    > locked-in whitelist database of fingerprints.  New fingerprints appear    > when web clients or TLS libraries release new versions. So, they have to    > live off a blocklist database instead.    > ...    > It's safe to leave the first three as is but shuffle the remaining ciphers    > and you can bypass the TLS fingerprint check.    .. _What Is TLS Fingerprint and How to Bypass It:       https://www.zenrows.com/blog/what-is-tls-fingerprint#how-to-bypass-tls-fingerprinting    """    c_list = httpx._config.DEFAULT_CIPHERS.split(':')  # pylint: disable=protected-access    sc_list, c_list = c_list[:3], c_list[3:]    random.shuffle(c_list)    ssl_context.set_ciphers(":".join(sc_list + c_list))def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False):    key = (proxy_url, cert, verify, trust_env, http2)    if key not in SSLCONTEXTS:        SSLCONTEXTS[key] = httpx.create_ssl_context(cert, verify, trust_env, http2)    shuffle_ciphers(SSLCONTEXTS[key])    return SSLCONTEXTS[key]class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport):    """Block HTTP request    The constructor is blank because httpx.AsyncHTTPTransport.__init__ creates an SSLContext unconditionally:    https://github.com/encode/httpx/blob/0f61aa58d66680c239ce43c8cdd453e7dc532bfc/httpx/_transports/default.py#L271    Each SSLContext consumes more than 500kb of memory, since there is about one network per engine.    In consequence, this class overrides all public methods    For reference: https://github.com/encode/httpx/issues/2298    """    def __init__(self, *args, **kwargs):        # pylint: disable=super-init-not-called        # this on purpose if the base class is not called        pass    async def handle_async_request(self, request):        raise httpx.UnsupportedProtocol('HTTP protocol is disabled')    async def aclose(self) -> None:        pass    async def __aenter__(self):        return self    async def __aexit__(        self,        exc_type=None,        exc_value=None,        traceback=None,    ) -> None:        passclass AsyncProxyTransportFixed(AsyncProxyTransport):    """Fix httpx_socks.AsyncProxyTransport    Map python_socks exceptions to httpx.ProxyError exceptions    """    async def handle_async_request(self, request):        try:            return await super().handle_async_request(request)        except ProxyConnectionError as e:            raise httpx.ProxyError("ProxyConnectionError: " + e.strerror, request=request) from e        except ProxyTimeoutError as e:            raise httpx.ProxyError("ProxyTimeoutError: " + e.args[0], request=request) from e        except ProxyError as e:            raise httpx.ProxyError("ProxyError: " + e.args[0], request=request) from edef get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries):    # support socks5h (requests compatibility):    # https://requests.readthedocs.io/en/master/user/advanced/#socks    # socks5://   hostname is resolved on client side    # socks5h://  hostname is resolved on proxy side    rdns = False    socks5h = 'socks5h://'    if proxy_url.startswith(socks5h):        proxy_url = 'socks5://' + proxy_url[len(socks5h) :]        rdns = True    proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url)    verify = get_sslcontexts(proxy_url, None, verify, True, http2) if verify is True else verify    return AsyncProxyTransportFixed(        proxy_type=proxy_type,        proxy_host=proxy_host,        proxy_port=proxy_port,        username=proxy_username,        password=proxy_password,        rdns=rdns,        loop=get_loop(),        verify=verify,        http2=http2,        local_address=local_address,        limits=limit,        retries=retries,    )def get_transport(verify, http2, local_address, proxy_url, limit, retries):    verify = get_sslcontexts(None, None, verify, True, http2) if verify is True else verify    return httpx.AsyncHTTPTransport(        # pylint: disable=protected-access        verify=verify,        http2=http2,        limits=limit,        proxy=httpx._config.Proxy(proxy_url) if proxy_url else None,        local_address=local_address,        retries=retries,    )def new_client(    # pylint: disable=too-many-arguments    enable_http,    verify,    enable_http2,    max_connections,    max_keepalive_connections,    keepalive_expiry,    proxies,    local_address,    retries,    max_redirects,    hook_log_response,):    limit = httpx.Limits(        max_connections=max_connections,        max_keepalive_connections=max_keepalive_connections,        keepalive_expiry=keepalive_expiry,    )    # See https://www.python-httpx.org/advanced/#routing    mounts = {}    for pattern, proxy_url in proxies.items():        if not enable_http and pattern.startswith('http://'):            continue        if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'):            mounts[pattern] = get_transport_for_socks_proxy(                verify, enable_http2, local_address, proxy_url, limit, retries            )        else:            mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries)    if not enable_http:        mounts['http://'] = AsyncHTTPTransportNoHttp()    transport = get_transport(verify, enable_http2, local_address, None, limit, retries)    event_hooks = None    if hook_log_response:        event_hooks = {'response': [hook_log_response]}    return httpx.AsyncClient(        transport=transport,        mounts=mounts,        max_redirects=max_redirects,        event_hooks=event_hooks,    )def get_loop():    return LOOPdef init():    # log    for logger_name in (        'httpx',        'httpcore.proxy',        'httpcore.connection',        'httpcore.http11',        'httpcore.http2',        'hpack.hpack',        'hpack.table',    ):        logging.getLogger(logger_name).setLevel(logging.WARNING)    # loop    def loop_thread():        global LOOP        LOOP = asyncio.new_event_loop()        LOOP.run_forever()    thread = threading.Thread(        target=loop_thread,        name='asyncio_loop',        daemon=True,    )    thread.start()init()
 |