Browse Source

[fix] image_proxy: always close the httpx respone

previously, when the content type was not an image and some other error,
the httpx response was not closed
Alexandre Flament 3 years ago
parent
commit
43fcaa642a
2 changed files with 52 additions and 20 deletions
  1. 22 1
      searx/network/__init__.py
  2. 30 19
      searx/webapp.py

+ 22 - 1
searx/network/__init__.py

@@ -5,6 +5,7 @@
 import asyncio
 import threading
 import concurrent.futures
+from types import MethodType
 from timeit import default_timer
 
 import httpx
@@ -161,6 +162,7 @@ def patch(url, data=None, **kwargs):
 def delete(url, **kwargs):
     return request('delete', url, **kwargs)
 
+
 async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
     try:
         async with network.stream(method, url, **kwargs) as response:
@@ -170,12 +172,22 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
             async for chunk in response.aiter_raw(65536):
                 if len(chunk) > 0:
                     queue.put(chunk)
+    except httpx.ResponseClosed as e:
+        # the response was closed
+        pass
     except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e:
         queue.put(e)
     finally:
         queue.put(None)
 
 
+def _close_response_method(self):
+    asyncio.run_coroutine_threadsafe(
+        self.aclose(),
+        get_loop()
+    )
+
+
 def stream(method, url, **kwargs):
     """Replace httpx.stream.
 
@@ -193,10 +205,19 @@ def stream(method, url, **kwargs):
         stream_chunk_to_queue(get_network(), queue, method, url, **kwargs),
         get_loop()
     )
+
+    # yield response
+    response = queue.get()
+    if isinstance(response, Exception):
+        raise response
+    response.close = MethodType(_close_response_method, response)
+    yield response
+
+    # yield chunks
     chunk_or_exception = queue.get()
     while chunk_or_exception is not None:
         if isinstance(chunk_or_exception, Exception):
             raise chunk_or_exception
         yield chunk_or_exception
         chunk_or_exception = queue.get()
-    return future.result()
+    future.result()

+ 30 - 19
searx/webapp.py

@@ -1065,7 +1065,7 @@ def _is_selected_language_supported(engine, preferences):  # pylint: disable=red
 
 @app.route('/image_proxy', methods=['GET'])
 def image_proxy():
-    # pylint: disable=too-many-return-statements
+    # pylint: disable=too-many-return-statements, too-many-branches
 
     url = request.args.get('url')
     if not url:
@@ -1076,14 +1076,20 @@ def image_proxy():
         return '', 400
 
     maximum_size = 5 * 1024 * 1024
-
+    forward_resp = False
+    resp = None
     try:
-        headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
-        headers['User-Agent'] = gen_useragent()
+        request_headers = {
+            'User-Agent': gen_useragent(),
+            'Accept': 'image/webp,*/*',
+            'Accept-Encoding': 'gzip, deflate',
+            'Sec-GPC': '1',
+            'DNT': '1',
+        }
         stream = http_stream(
             method = 'GET',
             url = url,
-            headers = headers,
+            headers = request_headers,
             timeout = settings['outgoing']['request_timeout'],
             allow_redirects = True,
             max_redirects = 20
@@ -1095,32 +1101,37 @@ def image_proxy():
             and int(content_length) > maximum_size ):
             return 'Max size', 400
 
-        if resp.status_code == 304:
-            return '', resp.status_code
-
         if resp.status_code != 200:
-            logger.debug(
-                'image-proxy: wrong response code: {0}'.format(
-                    resp.status_code))
+            logger.debug('image-proxy: wrong response code: %i', resp.status_code)
             if resp.status_code >= 400:
                 return '', resp.status_code
             return '', 400
 
-        if not resp.headers.get('content-type', '').startswith('image/'):
-            logger.debug(
-                'image-proxy: wrong content-type: {0}'.format(
-                    resp.headers.get('content-type')))
+        if not resp.headers.get('Content-Type', '').startswith('image/'):
+            logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', ''))
             return '', 400
 
+        forward_resp = True
+    except httpx.HTTPError:
+        logger.exception('HTTP error')
+        return '', 400
+    finally:
+        if resp and not forward_resp:
+            # the code is about to return an HTTP 400 error to the browser
+            # we make sure to close the response between searxng and the HTTP server
+            try:
+                resp.close()
+            except httpx.HTTPError:
+                logger.exception('HTTP error on closing')
+
+    try:
         headers = dict_subset(
             resp.headers,
-            {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}
+            {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}
         )
 
-        total_length = 0
-
         def forward_chunk():
-            nonlocal total_length
+            total_length = 0
             for chunk in stream:
                 total_length += len(chunk)
                 if total_length > maximum_size: