Browse Source

[mod] multithreading only in searx.search.* packages

it prepares the new architecture change,
everything about multithreading in moved in the searx.search.* packages

previously the call to the "init" function of the engines was done in searx.engines:
* the network was not set (request not sent using the defined proxy)
* it requires to monkey patch the code to avoid HTTP requests during the tests
Alexandre Flament 4 years ago
parent
commit
8c1a65d32f

+ 0 - 23
searx/engines/__init__.py

@@ -167,26 +167,3 @@ def load_engines(engine_list):
         if engine is not None:
             engines[engine.name] = engine
     return engines
-
-
-def initialize_engines(engine_list):
-    load_engines(engine_list)
-    initialize_network(engine_list, settings['outgoing'])
-
-    def engine_init(engine_name, init_fn):
-        try:
-            set_context_network_name(engine_name)
-            init_fn(get_engine_from_settings(engine_name))
-        except SearxEngineResponseException as exc:
-            logger.warn('%s engine: Fail to initialize // %s', engine_name, exc)
-        except Exception:
-            logger.exception('%s engine: Fail to initialize', engine_name)
-        else:
-            logger.debug('%s engine: Initialized', engine_name)
-
-    for engine_name, engine in engines.items():
-        if hasattr(engine, 'init'):
-            init_fn = getattr(engine, 'init')
-            if init_fn:
-                logger.debug('%s engine: Starting background initialization', engine_name)
-                threading.Thread(target=engine_init, args=(engine_name, init_fn)).start()

+ 9 - 5
searx/search/__init__.py

@@ -29,9 +29,11 @@ from searx.results import ResultContainer
 from searx import logger
 from searx.plugins import plugins
 from searx.search.models import EngineRef, SearchQuery
-from searx.search.processors import processors, initialize as initialize_processors
-from searx.search.checker import initialize as initialize_checker
+from searx.engines import load_engines
+from searx.network import initialize as initialize_network
 from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
+from searx.search.processors import PROCESSORS, initialize as initialize_processors
+from searx.search.checker import initialize as initialize_checker
 
 
 logger = logger.getChild('search')
@@ -50,8 +52,10 @@ else:
 
 def initialize(settings_engines=None, enable_checker=False):
     settings_engines = settings_engines or settings['engines']
-    initialize_processors(settings_engines)
+    load_engines(settings_engines)
+    initialize_network(settings_engines, settings['outgoing'])
     initialize_metrics([engine['name'] for engine in settings_engines])
+    initialize_processors(settings_engines)
     if enable_checker:
         initialize_checker()
 
@@ -106,7 +110,7 @@ class Search:
 
         # start search-reqest for all selected engines
         for engineref in self.search_query.engineref_list:
-            processor = processors[engineref.name]
+            processor = PROCESSORS[engineref.name]
 
             # stop the request now if the engine is suspend
             if processor.extend_container_if_suspended(self.result_container):
@@ -152,7 +156,7 @@ class Search:
 
         for engine_name, query, request_params in requests:
             th = threading.Thread(
-                target=processors[engine_name].search,
+                target=PROCESSORS[engine_name].search,
                 args=(query, request_params, self.result_container, self.start_time, self.actual_timeout),
                 name=search_id,
             )

+ 3 - 3
searx/search/checker/__main__.py

@@ -8,7 +8,7 @@ import logging
 
 import searx.search
 import searx.search.checker
-from searx.search import processors
+from searx.search import PROCESSORS
 from searx.engines import engine_shortcuts
 
 
@@ -41,13 +41,13 @@ def iter_processor(engine_name_list):
     if len(engine_name_list) > 0:
         for name in engine_name_list:
             name = engine_shortcuts.get(name, name)
-            processor = processors.get(name)
+            processor = PROCESSORS.get(name)
             if processor is not None:
                 yield name, processor
             else:
                 stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RED}Engine does not exist{RESET_SEQ}')
     else:
-        for name, processor in searx.search.processors.items():
+        for name, processor in searx.search.PROCESSORS.items():
             yield name, processor
 
 

+ 2 - 2
searx/search/checker/background.py

@@ -9,7 +9,7 @@ import signal
 
 from searx import logger, settings, searx_debug
 from searx.exceptions import SearxSettingsException
-from searx.search.processors import processors
+from searx.search.processors import PROCESSORS
 from searx.search.checker import Checker
 from searx.shared import schedule, storage
 
@@ -55,7 +55,7 @@ def run():
             'status': 'ok',
             'engines': {}
         }
-        for name, processor in processors.items():
+        for name, processor in PROCESSORS.items():
             logger.debug('Checking %s engine', name)
             checker = Checker(processor)
             checker.run()

+ 27 - 10
searx/search/processors/__init__.py

@@ -11,9 +11,11 @@ __all__ = [
     'OnlineProcessor',
     'OnlineDictionaryProcessor',
     'OnlineCurrencyProcessor',
-    'processors',
+    'PROCESSORS',
 ]
 
+import threading
+
 from searx import logger
 import searx.engines as engines
 
@@ -24,7 +26,7 @@ from .online_currency import OnlineCurrencyProcessor
 from .abstract import EngineProcessor
 
 logger = logger.getChild('search.processors')
-processors = {}
+PROCESSORS = {}
 """Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""
 
 def get_processor_class(engine_type):
@@ -34,6 +36,7 @@ def get_processor_class(engine_type):
             return c
     return None
 
+
 def get_processor(engine, engine_name):
     """Return processor instance that fits to ``engine.engine.type``)"""
     engine_type = getattr(engine, 'engine_type', 'online')
@@ -42,12 +45,26 @@ def get_processor(engine, engine_name):
         return processor_class(engine, engine_name)
     return None
 
+
+def initialize_processor(processor):
+    """Initialize one processor
+
+    Call the init function of the engine
+    """
+    if processor.has_initialize_function:
+        t = threading.Thread(target=processor.initialize, daemon=True)
+        t.start()
+
+
 def initialize(engine_list):
-    """Initialize all engines and store a processor for each engine in :py:obj:`processors`."""
-    engines.initialize_engines(engine_list)
-    for engine_name, engine in engines.engines.items():
-        processor = get_processor(engine, engine_name)
-        if processor is None:
-            logger.error('Error get processor for engine %s', engine_name)
-        else:
-            processors[engine_name] = processor
+    """Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`."""
+    for engine_data in engine_list:
+        engine_name = engine_data['name']
+        engine = engines.engines.get(engine_name)
+        if engine:
+            processor = get_processor(engine, engine_name)
+            initialize_processor(processor)
+            if processor is None:
+                logger.error('Error get processor for engine %s', engine_name)
+            else:
+                PROCESSORS[engine_name] = processor

+ 16 - 1
searx/search/processors/abstract.py

@@ -13,7 +13,8 @@ from searx import logger
 from searx.engines import settings
 from searx.network import get_time_for_thread, get_network
 from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
-from searx.exceptions import SearxEngineAccessDeniedException
+from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
+from searx.utils import get_engine_from_settings
 
 logger = logger.getChild('searx.search.processor')
 SUSPENDED_STATUS = {}
@@ -66,6 +67,20 @@ class EngineProcessor(ABC):
         key = id(key) if key else self.engine_name
         self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
 
+    def initialize(self):
+        try:
+            self.engine.init(get_engine_from_settings(self.engine_name))
+        except SearxEngineResponseException as exc:
+            logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc)
+        except Exception:  # pylint: disable=broad-except
+            logger.exception('%s engine: Fail to initialize', self.engine_name)
+        else:
+            logger.debug('%s engine: Initialized', self.engine_name)
+
+    @property
+    def has_initialize_function(self):
+        return hasattr(self.engine, 'init')
+
     def handle_exception(self, result_container, exception_or_message, suspend=False):
         # update result_container
         if isinstance(exception_or_message, BaseException):

+ 12 - 3
searx/search/processors/online.py

@@ -5,7 +5,7 @@
 
 """
 
-from time import time
+from timeit import default_timer
 import asyncio
 import httpx
 
@@ -40,6 +40,15 @@ class OnlineProcessor(EngineProcessor):
 
     engine_type = 'online'
 
+    def initialize(self):
+        # set timeout for all HTTP requests
+        searx.network.set_timeout_for_thread(self.engine.timeout, start_time=default_timer())
+        # reset the HTTP total time
+        searx.network.reset_time_for_thread()
+        # set the network
+        searx.network.set_context_network_name(self.engine_name)
+        super().initialize()
+
     def get_params(self, search_query, engine_category):
         params = super().get_params(search_query, engine_category)
         if params is None:
@@ -139,7 +148,7 @@ class OnlineProcessor(EngineProcessor):
             self.handle_exception(result_container, e, suspend=True)
             logger.error("engine {0} : HTTP requests timeout"
                          "(search duration : {1} s, timeout: {2} s) : {3}"
-                         .format(self.engine_name, time() - start_time,
+                         .format(self.engine_name, default_timer() - start_time,
                                  timeout_limit,
                                  e.__class__.__name__))
         except (httpx.HTTPError, httpx.StreamError) as e:
@@ -147,7 +156,7 @@ class OnlineProcessor(EngineProcessor):
             self.handle_exception(result_container, e, suspend=True)
             logger.exception("engine {0} : requests exception"
                              "(search duration : {1} s, timeout: {2} s) : {3}"
-                             .format(self.engine_name, time() - start_time,
+                             .format(self.engine_name, default_timer() - start_time,
                                      timeout_limit,
                                      e))
         except SearxEngineCaptchaException as e:

+ 2 - 2
tests/unit/test_engines_init.py

@@ -23,7 +23,7 @@ class TestEnginesInit(SearxTestCase):
         engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'},
                        {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}]
 
-        engines.initialize_engines(engine_list)
+        engines.load_engines(engine_list)
         self.assertEqual(len(engines.engines), 1)
         self.assertIn('engine1', engines.engines)
         self.assertNotIn('onions', engines.categories)
@@ -35,7 +35,7 @@ class TestEnginesInit(SearxTestCase):
                         'timeout': 20.0, 'onion_url': 'http://engine1.onion'},
                        {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}]
 
-        engines.initialize_engines(engine_list)
+        engines.load_engines(engine_list)
         self.assertEqual(len(engines.engines), 2)
         self.assertIn('engine1', engines.engines)
         self.assertIn('engine2', engines.engines)

+ 10 - 14
tests/unit/test_query.py

@@ -1,11 +1,8 @@
-from mock import patch
-
-from searx.search import initialize
+from searx import settings
+from searx.engines import load_engines
 from searx.query import RawTextQuery
 from searx.testing import SearxTestCase
 
-import searx.engines
-
 
 TEST_ENGINES = [
     {
@@ -241,7 +238,7 @@ class TestBang(SearxTestCase):
     THE_QUERY = 'the query'
 
     def test_bang(self):
-        initialize(TEST_ENGINES)
+        load_engines(TEST_ENGINES)
 
         for bang in TestBang.SPECIFIC_BANGS + TestBang.NOT_SPECIFIC_BANGS:
             with self.subTest(msg="Check bang", bang=bang):
@@ -267,12 +264,12 @@ class TestBang(SearxTestCase):
                 self.assertFalse(query.specific)
 
     def test_bang_not_found(self):
-        initialize(TEST_ENGINES)
+        load_engines(TEST_ENGINES)
         query = RawTextQuery('the query !bang_not_found', [])
         self.assertEqual(query.getFullQuery(), 'the query !bang_not_found')
 
     def test_bang_autocomplete(self):
-        initialize(TEST_ENGINES)
+        load_engines(TEST_ENGINES)
         query = RawTextQuery('the query !dum', [])
         self.assertEqual(query.autocomplete_list, ['!dummy_engine'])
 
@@ -281,10 +278,9 @@ class TestBang(SearxTestCase):
         self.assertEqual(query.getQuery(), '!dum the query')
 
     def test_bang_autocomplete_empty(self):
-        with patch.object(searx.engines, 'initialize_engines', searx.engines.load_engines):
-            initialize()
-            query = RawTextQuery('the query !', [])
-            self.assertEqual(query.autocomplete_list, ['!images', '!wikipedia', '!osm'])
+        load_engines(settings['engines'])
+        query = RawTextQuery('the query !', [])
+        self.assertEqual(query.autocomplete_list, ['!images', '!wikipedia', '!osm'])
 
-            query = RawTextQuery('the query ?', ['osm'])
-            self.assertEqual(query.autocomplete_list, ['?images', '?wikipedia'])
+        query = RawTextQuery('the query ?', ['osm'])
+        self.assertEqual(query.autocomplete_list, ['?images', '?wikipedia'])

+ 4 - 2
tests/unit/test_webapp.py

@@ -5,14 +5,16 @@ from urllib.parse import ParseResult
 from mock import Mock
 from searx.testing import SearxTestCase
 from searx.search import Search
-import searx.engines
+import searx.search.processors
 
 
 class ViewsTestCase(SearxTestCase):
 
     def setUp(self):
         # skip init function (no external HTTP request)
-        self.setattr4test(searx.engines, 'initialize_engines', searx.engines.load_engines)
+        def dummy(*args, **kwargs):
+            pass
+        self.setattr4test(searx.search.processors, 'initialize_processor', dummy)
 
         from searx import webapp  # pylint disable=import-outside-toplevel