4 years ago · 7ec8bc3ea7
--- a/docs/admin/engines.rst
+++ b/docs/admin/engines.rst
@@ -33,7 +33,7 @@ Engine        ..          Paging support       **P**
 
				 ------------------------- -------------------- ------------
			
 
				 Shortcut      **S**       Language support     **L**
			
 
				 Timeout       **TO**      Time range support   **TR**
			
 
				-Disabled      **D**       Offline              **O**
			
 
				+Disabled      **D**       Engine type          **ET**
			
 
				 ------------- ----------- -------------------- ------------
			
 
				 Safe search   **SS**
			
 
				 ------------- ----------- ---------------------------------
			
@@ -62,10 +62,10 @@ Show errors   **DE**
 
				         - SS
			
 
				         - D
			
 
				         - TR
			
 
				-        - O
			
 
				-	- W
			
 
				-	- D
			
 
				-	- DE
			
 
				+        - ET
			
 
				+        - W
			
 
				+        - D
			
 
				+        - DE
			
 
				 
			
 
				       {% for name, mod in engines.items() %}
			
 
				 
			
@@ -79,7 +79,7 @@ Show errors   **DE**
 
				         - {{(mod.safesearch and "y") or ""}}
			
 
				         - {{(mod.disabled and "y") or ""}}
			
 
				         - {{(mod.time_range_support and "y") or ""}}
			
 
				-        - {{(mod.offline and "y") or ""}}
			
 
				+        - {{mod.engine_type or ""}}
			
 
				         - {{mod.weight or 1 }}
			
 
				         - {{(mod.disabled and "y") or ""}}
			
 
				         - {{(mod.display_error_messages and "y") or ""}}
			
--- a/docs/dev/engine_overview.rst
+++ b/docs/dev/engine_overview.rst
@@ -44,7 +44,7 @@ categories              list        pages, in which the engine is working
 
				 paging                  boolean     support multible pages
			
 
				 language_support        boolean     support language choosing
			
 
				 time_range_support      boolean     support search time range
			
 
				-offline                 boolean     engine runs offline
			
 
				+engine_type             str         ``online`` by default, ``offline``
			
 
				 ======================= =========== ===========================================
			
 
				 
			
 
				 .. _engine settings:
			
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -53,7 +53,7 @@ engine_default_args = {'paging': False,
 
				                        'suspend_end_time': 0,
			
 
				                        'continuous_errors': 0,
			
 
				                        'time_range_support': False,
			
 
				-                       'offline': False,
			
 
				+                       'engine_type': 'online',
			
 
				                        'display_error_messages': True,
			
 
				                        'tokens': []}
			
 
				 
			
@@ -142,7 +142,9 @@ def load_engine(engine_data):
 
				         'errors': 0
			
 
				     }
			
 
				 
			
 
				-    if not engine.offline:
			
 
				+    engine_type = getattr(engine, 'engine_type', 'online')
			
 
				+
			
 
				+    if engine_type != 'offline':
			
 
				         engine.stats['page_load_time'] = 0
			
 
				         engine.stats['page_load_count'] = 0
			
 
				 
			
@@ -209,7 +211,7 @@ def get_engines_stats(preferences):
 
				         else:
			
 
				             score = score_per_result = 0.0
			
 
				 
			
 
				-        if not engine.offline:
			
 
				+        if engine.engine_type != 'offline':
			
 
				             load_times = 0
			
 
				             if engine.stats['page_load_count'] != 0:
			
 
				                 load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count'])  # noqa
			
@@ -300,7 +302,7 @@ def initialize_engines(engine_list):
 
				 
			
 
				 def _set_https_support_for_engine(engine):
			
 
				     # check HTTPS support if it is not disabled
			
 
				-    if not engine.offline and not hasattr(engine, 'https_support'):
			
 
				+    if engine.engine_type != 'offline' and not hasattr(engine, 'https_support'):
			
 
				         params = engine.request('http_test', {
			
 
				             'method': 'GET',
			
 
				             'headers': {},
			
--- a/searx/engines/command.py
+++ b/searx/engines/command.py
@@ -23,7 +23,7 @@ from threading import Thread
 
				 from searx import logger
			
 
				 
			
 
				 
			
 
				-offline = True
			
 
				+engine_type = 'offline'
			
 
				 paging = True
			
 
				 command = []
			
 
				 delimiter = {}
			
--- a/searx/search/__init__.py
+++ b/searx/search/__init__.py
@@ -20,21 +20,15 @@ import gc
 
				 import threading
			
 
				 from time import time
			
 
				 from uuid import uuid4
			
 
				-from urllib.parse import urlparse
			
 
				 from _thread import start_new_thread
			
 
				 
			
 
				-import requests.exceptions
			
 
				-import searx.poolrequests as requests_lib
			
 
				-from searx.engines import engines, settings
			
 
				+from searx import settings
			
 
				 from searx.answerers import ask
			
 
				 from searx.external_bang import get_bang_url
			
 
				-from searx.utils import gen_useragent
			
 
				 from searx.results import ResultContainer
			
 
				 from searx import logger
			
 
				 from searx.plugins import plugins
			
 
				-from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
			
 
				-                              SearxEngineTooManyRequestsException,)
			
 
				-from searx.metrology.error_recorder import record_exception, record_error
			
 
				+from searx.search.processors import processors, initialize as initialize_processors
			
 
				 
			
 
				 
			
 
				 logger = logger.getChild('search')
			
@@ -51,6 +45,11 @@ else:
 
				         sys.exit(1)
			
 
				 
			
 
				 
			
 
				+def initialize(settings_engines=None):
			
 
				+    settings_engines = settings_engines or settings['engines']
			
 
				+    initialize_processors(settings_engines)
			
 
				+
			
 
				+
			
 
				 class EngineRef:
			
 
				 
			
 
				     __slots__ = 'name', 'category', 'from_bang'
			
@@ -110,231 +109,6 @@ class SearchQuery:
 
				             and self.external_bang == other.external_bang
			
 
				 
			
 
				 
			
 
				-def send_http_request(engine, request_params):
			
 
				-    # create dictionary which contain all
			
 
				-    # informations about the request
			
 
				-    request_args = dict(
			
 
				-        headers=request_params['headers'],
			
 
				-        cookies=request_params['cookies'],
			
 
				-        verify=request_params['verify'],
			
 
				-        auth=request_params['auth']
			
 
				-    )
			
 
				-
			
 
				-    # setting engine based proxies
			
 
				-    if hasattr(engine, 'proxies'):
			
 
				-        request_args['proxies'] = requests_lib.get_proxies(engine.proxies)
			
 
				-
			
 
				-    # max_redirects
			
 
				-    max_redirects = request_params.get('max_redirects')
			
 
				-    if max_redirects:
			
 
				-        request_args['max_redirects'] = max_redirects
			
 
				-
			
 
				-    # soft_max_redirects
			
 
				-    soft_max_redirects = request_params.get('soft_max_redirects', max_redirects or 0)
			
 
				-
			
 
				-    # raise_for_status
			
 
				-    request_args['raise_for_httperror'] = request_params.get('raise_for_httperror', False)
			
 
				-
			
 
				-    # specific type of request (GET or POST)
			
 
				-    if request_params['method'] == 'GET':
			
 
				-        req = requests_lib.get
			
 
				-    else:
			
 
				-        req = requests_lib.post
			
 
				-
			
 
				-    request_args['data'] = request_params['data']
			
 
				-
			
 
				-    # send the request
			
 
				-    response = req(request_params['url'], **request_args)
			
 
				-
			
 
				-    # check soft limit of the redirect count
			
 
				-    if len(response.history) > soft_max_redirects:
			
 
				-        # unexpected redirect : record an error
			
 
				-        # but the engine might still return valid results.
			
 
				-        status_code = str(response.status_code or '')
			
 
				-        reason = response.reason or ''
			
 
				-        hostname = str(urlparse(response.url or '').netloc)
			
 
				-        record_error(engine.name,
			
 
				-                     '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
			
 
				-                     (status_code, reason, hostname))
			
 
				-
			
 
				-    return response
			
 
				-
			
 
				-
			
 
				-def search_one_http_request(engine, query, request_params):
			
 
				-    # update request parameters dependent on
			
 
				-    # search-engine (contained in engines folder)
			
 
				-    engine.request(query, request_params)
			
 
				-
			
 
				-    # ignoring empty urls
			
 
				-    if request_params['url'] is None:
			
 
				-        return None
			
 
				-
			
 
				-    if not request_params['url']:
			
 
				-        return None
			
 
				-
			
 
				-    # send request
			
 
				-    response = send_http_request(engine, request_params)
			
 
				-
			
 
				-    # parse the response
			
 
				-    response.search_params = request_params
			
 
				-    return engine.response(response)
			
 
				-
			
 
				-
			
 
				-def search_one_http_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
			
 
				-    # set timeout for all HTTP requests
			
 
				-    requests_lib.set_timeout_for_thread(timeout_limit, start_time=start_time)
			
 
				-    # reset the HTTP total time
			
 
				-    requests_lib.reset_time_for_thread()
			
 
				-
			
 
				-    #
			
 
				-    engine = engines[engine_name]
			
 
				-
			
 
				-    # suppose everything will be alright
			
 
				-    requests_exception = False
			
 
				-    suspended_time = None
			
 
				-
			
 
				-    try:
			
 
				-        # send requests and parse the results
			
 
				-        search_results = search_one_http_request(engine, query, request_params)
			
 
				-
			
 
				-        # check if the engine accepted the request
			
 
				-        if search_results is not None:
			
 
				-            # yes, so add results
			
 
				-            result_container.extend(engine_name, search_results)
			
 
				-
			
 
				-            # update engine time when there is no exception
			
 
				-            engine_time = time() - start_time
			
 
				-            page_load_time = requests_lib.get_time_for_thread()
			
 
				-            result_container.add_timing(engine_name, engine_time, page_load_time)
			
 
				-            with threading.RLock():
			
 
				-                engine.stats['engine_time'] += engine_time
			
 
				-                engine.stats['engine_time_count'] += 1
			
 
				-                # update stats with the total HTTP time
			
 
				-                engine.stats['page_load_time'] += page_load_time
			
 
				-                engine.stats['page_load_count'] += 1
			
 
				-    except Exception as e:
			
 
				-        record_exception(engine_name, e)
			
 
				-
			
 
				-        # Timing
			
 
				-        engine_time = time() - start_time
			
 
				-        page_load_time = requests_lib.get_time_for_thread()
			
 
				-        result_container.add_timing(engine_name, engine_time, page_load_time)
			
 
				-
			
 
				-        # Record the errors
			
 
				-        with threading.RLock():
			
 
				-            engine.stats['errors'] += 1
			
 
				-
			
 
				-        if (issubclass(e.__class__, requests.exceptions.Timeout)):
			
 
				-            result_container.add_unresponsive_engine(engine_name, 'HTTP timeout')
			
 
				-            # requests timeout (connect or read)
			
 
				-            logger.error("engine {0} : HTTP requests timeout"
			
 
				-                         "(search duration : {1} s, timeout: {2} s) : {3}"
			
 
				-                         .format(engine_name, engine_time, timeout_limit, e.__class__.__name__))
			
 
				-            requests_exception = True
			
 
				-        elif (issubclass(e.__class__, requests.exceptions.RequestException)):
			
 
				-            result_container.add_unresponsive_engine(engine_name, 'HTTP error')
			
 
				-            # other requests exception
			
 
				-            logger.exception("engine {0} : requests exception"
			
 
				-                             "(search duration : {1} s, timeout: {2} s) : {3}"
			
 
				-                             .format(engine_name, engine_time, timeout_limit, e))
			
 
				-            requests_exception = True
			
 
				-        elif (issubclass(e.__class__, SearxEngineCaptchaException)):
			
 
				-            result_container.add_unresponsive_engine(engine_name, 'CAPTCHA required')
			
 
				-            logger.exception('engine {0} : CAPTCHA')
			
 
				-            suspended_time = e.suspended_time  # pylint: disable=no-member
			
 
				-        elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)):
			
 
				-            result_container.add_unresponsive_engine(engine_name, 'too many requests')
			
 
				-            logger.exception('engine {0} : Too many requests')
			
 
				-            suspended_time = e.suspended_time  # pylint: disable=no-member
			
 
				-        elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
			
 
				-            result_container.add_unresponsive_engine(engine_name, 'blocked')
			
 
				-            logger.exception('engine {0} : Searx is blocked')
			
 
				-            suspended_time = e.suspended_time  # pylint: disable=no-member
			
 
				-        else:
			
 
				-            result_container.add_unresponsive_engine(engine_name, 'unexpected crash')
			
 
				-            # others errors
			
 
				-            logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
			
 
				-    else:
			
 
				-        if getattr(threading.current_thread(), '_timeout', False):
			
 
				-            record_error(engine_name, 'Timeout')
			
 
				-
			
 
				-    # suspend the engine if there is an HTTP error
			
 
				-    # or suspended_time is defined
			
 
				-    with threading.RLock():
			
 
				-        if requests_exception or suspended_time:
			
 
				-            # update continuous_errors / suspend_end_time
			
 
				-            engine.continuous_errors += 1
			
 
				-            if suspended_time is None:
			
 
				-                suspended_time = min(settings['search']['max_ban_time_on_fail'],
			
 
				-                                     engine.continuous_errors * settings['search']['ban_time_on_fail'])
			
 
				-            engine.suspend_end_time = time() + suspended_time
			
 
				-        else:
			
 
				-            # reset the suspend variables
			
 
				-            engine.continuous_errors = 0
			
 
				-            engine.suspend_end_time = 0
			
 
				-
			
 
				-
			
 
				-def record_offline_engine_stats_on_error(engine, result_container, start_time):
			
 
				-    engine_time = time() - start_time
			
 
				-    result_container.add_timing(engine.name, engine_time, engine_time)
			
 
				-
			
 
				-    with threading.RLock():
			
 
				-        engine.stats['errors'] += 1
			
 
				-
			
 
				-
			
 
				-def search_one_offline_request(engine, query, request_params):
			
 
				-    return engine.search(query, request_params)
			
 
				-
			
 
				-
			
 
				-def search_one_offline_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
			
 
				-    engine = engines[engine_name]
			
 
				-
			
 
				-    try:
			
 
				-        search_results = search_one_offline_request(engine, query, request_params)
			
 
				-
			
 
				-        if search_results:
			
 
				-            result_container.extend(engine_name, search_results)
			
 
				-
			
 
				-            engine_time = time() - start_time
			
 
				-            result_container.add_timing(engine_name, engine_time, engine_time)
			
 
				-            with threading.RLock():
			
 
				-                engine.stats['engine_time'] += engine_time
			
 
				-                engine.stats['engine_time_count'] += 1
			
 
				-
			
 
				-    except ValueError as e:
			
 
				-        record_exception(engine_name, e)
			
 
				-        record_offline_engine_stats_on_error(engine, result_container, start_time)
			
 
				-        logger.exception('engine {0} : invalid input : {1}'.format(engine_name, e))
			
 
				-    except Exception as e:
			
 
				-        record_exception(engine_name, e)
			
 
				-        record_offline_engine_stats_on_error(engine, result_container, start_time)
			
 
				-        result_container.add_unresponsive_engine(engine_name, 'unexpected crash', str(e))
			
 
				-        logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
			
 
				-    else:
			
 
				-        if getattr(threading.current_thread(), '_timeout', False):
			
 
				-            record_error(engine_name, 'Timeout')
			
 
				-
			
 
				-
			
 
				-def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
			
 
				-    if engines[engine_name].offline:
			
 
				-        return search_one_offline_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit)  # noqa
			
 
				-    return search_one_http_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit)
			
 
				-
			
 
				-
			
 
				-# get default reqest parameter
			
 
				-def default_request_params():
			
 
				-    return {
			
 
				-        'method': 'GET',
			
 
				-        'headers': {},
			
 
				-        'data': {},
			
 
				-        'url': '',
			
 
				-        'cookies': {},
			
 
				-        'verify': True,
			
 
				-        'auth': None,
			
 
				-        'raise_for_httperror': True
			
 
				-    }
			
 
				-
			
 
				-
			
 
				 class Search:
			
 
				     """Search information container"""
			
 
				 
			
@@ -375,69 +149,20 @@ class Search:
 
				             return True
			
 
				         return False
			
 
				 
			
 
				-    def _is_accepted(self, engine_name, engine):
			
 
				-        # skip suspended engines
			
 
				-        if engine.suspend_end_time >= time():
			
 
				-            logger.debug('Engine currently suspended: %s', engine_name)
			
 
				-            return False
			
 
				-
			
 
				-        # if paging is not supported, skip
			
 
				-        if self.search_query.pageno > 1 and not engine.paging:
			
 
				-            return False
			
 
				-
			
 
				-        # if time_range is not supported, skip
			
 
				-        if self.search_query.time_range and not engine.time_range_support:
			
 
				-            return False
			
 
				-
			
 
				-        return True
			
 
				-
			
 
				-    def _get_params(self, engineref, user_agent):
			
 
				-        if engineref.name not in engines:
			
 
				-            return None, None
			
 
				-
			
 
				-        engine = engines[engineref.name]
			
 
				-
			
 
				-        if not self._is_accepted(engineref.name, engine):
			
 
				-            return None, None
			
 
				-
			
 
				-        # set default request parameters
			
 
				-        request_params = {}
			
 
				-        if not engine.offline:
			
 
				-            request_params = default_request_params()
			
 
				-            request_params['headers']['User-Agent'] = user_agent
			
 
				-
			
 
				-            if hasattr(engine, 'language') and engine.language:
			
 
				-                request_params['language'] = engine.language
			
 
				-            else:
			
 
				-                request_params['language'] = self.search_query.lang
			
 
				-
			
 
				-            request_params['safesearch'] = self.search_query.safesearch
			
 
				-            request_params['time_range'] = self.search_query.time_range
			
 
				-
			
 
				-        request_params['category'] = engineref.category
			
 
				-        request_params['pageno'] = self.search_query.pageno
			
 
				-
			
 
				-        with threading.RLock():
			
 
				-            engine.stats['sent_search_count'] += 1
			
 
				-
			
 
				-        return request_params, engine.timeout
			
 
				-
			
 
				     # do search-request
			
 
				     def _get_requests(self):
			
 
				         # init vars
			
 
				         requests = []
			
 
				 
			
 
				-        # set default useragent
			
 
				-        # user_agent = request.headers.get('User-Agent', '')
			
 
				-        user_agent = gen_useragent()
			
 
				-
			
 
				         # max of all selected engine timeout
			
 
				         default_timeout = 0
			
 
				 
			
 
				         # start search-reqest for all selected engines
			
 
				         for engineref in self.search_query.engineref_list:
			
 
				+            processor = processors[engineref.name]
			
 
				+
			
 
				             # set default request parameters
			
 
				-            request_params, engine_timeout = self._get_params(engineref, user_agent)
			
 
				+            request_params = processor.get_params(self.search_query, engineref.category)
			
 
				             if request_params is None:
			
 
				                 continue
			
 
				 
			
@@ -445,7 +170,7 @@ class Search:
 
				             requests.append((engineref.name, self.search_query.query, request_params))
			
 
				 
			
 
				             # update default_timeout
			
 
				-            default_timeout = max(default_timeout, engine_timeout)
			
 
				+            default_timeout = max(default_timeout, processor.engine.timeout)
			
 
				 
			
 
				         # adjust timeout
			
 
				         actual_timeout = default_timeout
			
@@ -474,8 +199,8 @@ class Search:
 
				 
			
 
				         for engine_name, query, request_params in requests:
			
 
				             th = threading.Thread(
			
 
				-                target=search_one_request_safe,
			
 
				-                args=(engine_name, query, request_params, self.result_container, self.start_time, self.actual_timeout),
			
 
				+                target=processors[engine_name].search,
			
 
				+                args=(query, request_params, self.result_container, self.start_time, self.actual_timeout),
			
 
				                 name=search_id,
			
 
				             )
			
 
				             th._timeout = False
			
--- a/searx/search/processors/__init__.py
+++ b/searx/search/processors/__init__.py
@@ -0,0 +1,41 @@
 
				+# SPDX-License-Identifier: AGPL-3.0-or-later
			
 
				+
			
 
				+from .online import OnlineProcessor
			
 
				+from .offline import OfflineProcessor
			
 
				+from .online_dictionary import OnlineDictionaryProcessor
			
 
				+from .online_currency import OnlineCurrencyProcessor
			
 
				+from .abstract import EngineProcessor
			
 
				+from searx import logger
			
 
				+import searx.engines as engines
			
 
				+
			
 
				+
			
 
				+__all__ = ['EngineProcessor', 'OfflineProcessor', 'OnlineProcessor',
			
 
				+           'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', 'processors']
			
 
				+logger = logger.getChild('search.processors')
			
 
				+processors = {}
			
 
				+
			
 
				+
			
 
				+def get_processor_class(engine_type):
			
 
				+    for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]:
			
 
				+        if c.engine_type == engine_type:
			
 
				+            return c
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def get_processor(engine, engine_name):
			
 
				+    engine_type = getattr(engine, 'engine_type', 'online')
			
 
				+    processor_class = get_processor_class(engine_type)
			
 
				+    if processor_class:
			
 
				+        return processor_class(engine, engine_name)
			
 
				+    else:
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def initialize(engine_list):
			
 
				+    engines.initialize_engines(engine_list)
			
 
				+    for engine_name, engine in engines.engines.items():
			
 
				+        processor = get_processor(engine, engine_name)
			
 
				+        if processor is None:
			
 
				+            logger.error('Error get processor for engine %s', engine_name)
			
 
				+        else:
			
 
				+            processors[engine_name] = processor
			
--- a/searx/search/processors/abstract.py
+++ b/searx/search/processors/abstract.py
@@ -0,0 +1,39 @@
 
				+# SPDX-License-Identifier: AGPL-3.0-or-later
			
 
				+
			
 
				+from abc import abstractmethod
			
 
				+from searx import logger
			
 
				+
			
 
				+
			
 
				+logger = logger.getChild('searx.search.processor')
			
 
				+
			
 
				+
			
 
				+class EngineProcessor:
			
 
				+
			
 
				+    def __init__(self, engine, engine_name):
			
 
				+        self.engine = engine
			
 
				+        self.engine_name = engine_name
			
 
				+
			
 
				+    def get_params(self, search_query, engine_category):
			
 
				+        # if paging is not supported, skip
			
 
				+        if search_query.pageno > 1 and not self.engine.paging:
			
 
				+            return None
			
 
				+
			
 
				+        # if time_range is not supported, skip
			
 
				+        if search_query.time_range and not self.engine.time_range_support:
			
 
				+            return None
			
 
				+
			
 
				+        params = {}
			
 
				+        params['category'] = engine_category
			
 
				+        params['pageno'] = search_query.pageno
			
 
				+        params['safesearch'] = search_query.safesearch
			
 
				+        params['time_range'] = search_query.time_range
			
 
				+
			
 
				+        if hasattr(self.engine, 'language') and self.engine.language:
			
 
				+            params['language'] = self.engine.language
			
 
				+        else:
			
 
				+            params['language'] = search_query.lang
			
 
				+        return params
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def search(self, query, params, result_container, start_time, timeout_limit):
			
 
				+        pass
			
--- a/searx/search/processors/offline.py
+++ b/searx/search/processors/offline.py
@@ -0,0 +1,51 @@
 
				+# SPDX-License-Identifier: AGPL-3.0-or-later
			
 
				+
			
 
				+import threading
			
 
				+from time import time
			
 
				+from searx import logger
			
 
				+from searx.metrology.error_recorder import record_exception, record_error
			
 
				+from searx.search.processors.abstract import EngineProcessor
			
 
				+
			
 
				+
			
 
				+logger = logger.getChild('search.processor.offline')
			
 
				+
			
 
				+
			
 
				+class OfflineProcessor(EngineProcessor):
			
 
				+
			
 
				+    engine_type = 'offline'
			
 
				+
			
 
				+    def _record_stats_on_error(self, result_container, start_time):
			
 
				+        engine_time = time() - start_time
			
 
				+        result_container.add_timing(self.engine_name, engine_time, engine_time)
			
 
				+
			
 
				+        with threading.RLock():
			
 
				+            self.engine.stats['errors'] += 1
			
 
				+
			
 
				+    def _search_basic(self, query, params):
			
 
				+        return self.engine.search(query, params)
			
 
				+
			
 
				+    def search(self, query, params, result_container, start_time, timeout_limit):
			
 
				+        try:
			
 
				+            search_results = self._search_basic(query, params)
			
 
				+
			
 
				+            if search_results:
			
 
				+                result_container.extend(self.engine_name, search_results)
			
 
				+
			
 
				+                engine_time = time() - start_time
			
 
				+                result_container.add_timing(self.engine_name, engine_time, engine_time)
			
 
				+                with threading.RLock():
			
 
				+                    self.engine.stats['engine_time'] += engine_time
			
 
				+                    self.engine.stats['engine_time_count'] += 1
			
 
				+
			
 
				+        except ValueError as e:
			
 
				+            record_exception(self.engine_name, e)
			
 
				+            self._record_stats_on_error(result_container, start_time)
			
 
				+            logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
			
 
				+        except Exception as e:
			
 
				+            record_exception(self.engine_name, e)
			
 
				+            self._record_stats_on_error(result_container, start_time)
			
 
				+            result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash', str(e))
			
 
				+            logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
			
 
				+        else:
			
 
				+            if getattr(threading.current_thread(), '_timeout', False):
			
 
				+                record_error(self.engine_name, 'Timeout')
			
--- a/searx/search/processors/online.py
+++ b/searx/search/processors/online.py
@@ -0,0 +1,211 @@
 
				+# SPDX-License-Identifier: AGPL-3.0-or-later
			
 
				+
			
 
				+from urllib.parse import urlparse
			
 
				+from time import time
			
 
				+import threading
			
 
				+
			
 
				+import requests.exceptions
			
 
				+
			
 
				+import searx.poolrequests as poolrequests
			
 
				+from searx.engines import settings
			
 
				+from searx import logger
			
 
				+from searx.utils import gen_useragent
			
 
				+from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
			
 
				+                              SearxEngineTooManyRequestsException,)
			
 
				+from searx.metrology.error_recorder import record_exception, record_error
			
 
				+
			
 
				+from searx.search.processors.abstract import EngineProcessor
			
 
				+
			
 
				+
			
 
				+logger = logger.getChild('search.processor.online')
			
 
				+
			
 
				+DEFAULT_PARAMS = {
			
 
				+    'method': 'GET',
			
 
				+    'headers': {},
			
 
				+    'data': {},
			
 
				+    'url': '',
			
 
				+    'cookies': {},
			
 
				+    'verify': True,
			
 
				+    'auth': None
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class OnlineProcessor(EngineProcessor):
			
 
				+
			
 
				+    engine_type = 'online'
			
 
				+
			
 
				+    def get_params(self, search_query, engine_category):
			
 
				+        params = super().get_params(search_query, engine_category)
			
 
				+        if params is None:
			
 
				+            return None
			
 
				+
			
 
				+        # skip suspended engines
			
 
				+        if self.engine.suspend_end_time >= time():
			
 
				+            logger.debug('Engine currently suspended: %s', self.engine_name)
			
 
				+            return None
			
 
				+
			
 
				+        # add default params
			
 
				+        params.update(DEFAULT_PARAMS)
			
 
				+
			
 
				+        # add an user agent
			
 
				+        params['headers']['User-Agent'] = gen_useragent()
			
 
				+
			
 
				+        return params
			
 
				+
			
 
				+    def _send_http_request(self, params):
			
 
				+        # create dictionary which contain all
			
 
				+        # informations about the request
			
 
				+        request_args = dict(
			
 
				+            headers=params['headers'],
			
 
				+            cookies=params['cookies'],
			
 
				+            verify=params['verify'],
			
 
				+            auth=params['auth']
			
 
				+        )
			
 
				+
			
 
				+        # setting engine based proxies
			
 
				+        if hasattr(self.engine, 'proxies'):
			
 
				+            request_args['proxies'] = poolrequests.get_proxies(self.engine.proxies)
			
 
				+
			
 
				+        # max_redirects
			
 
				+        max_redirects = params.get('max_redirects')
			
 
				+        if max_redirects:
			
 
				+            request_args['max_redirects'] = max_redirects
			
 
				+
			
 
				+        # soft_max_redirects
			
 
				+        soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0)
			
 
				+
			
 
				+        # raise_for_status
			
 
				+        request_args['raise_for_httperror'] = params.get('raise_for_httperror', False)
			
 
				+
			
 
				+        # specific type of request (GET or POST)
			
 
				+        if params['method'] == 'GET':
			
 
				+            req = poolrequests.get
			
 
				+        else:
			
 
				+            req = poolrequests.post
			
 
				+
			
 
				+        request_args['data'] = params['data']
			
 
				+
			
 
				+        # send the request
			
 
				+        response = req(params['url'], **request_args)
			
 
				+
			
 
				+        # check soft limit of the redirect count
			
 
				+        if len(response.history) > soft_max_redirects:
			
 
				+            # unexpected redirect : record an error
			
 
				+            # but the engine might still return valid results.
			
 
				+            status_code = str(response.status_code or '')
			
 
				+            reason = response.reason or ''
			
 
				+            hostname = str(urlparse(response.url or '').netloc)
			
 
				+            record_error(self.engine_name,
			
 
				+                         '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
			
 
				+                         (status_code, reason, hostname))
			
 
				+
			
 
				+        return response
			
 
				+
			
 
				+    def _search_basic(self, query, params):
			
 
				+        # update request parameters dependent on
			
 
				+        # search-engine (contained in engines folder)
			
 
				+        self.engine.request(query, params)
			
 
				+
			
 
				+        # ignoring empty urls
			
 
				+        if params['url'] is None:
			
 
				+            return None
			
 
				+
			
 
				+        if not params['url']:
			
 
				+            return None
			
 
				+
			
 
				+        # send request
			
 
				+        response = self._send_http_request(params)
			
 
				+
			
 
				+        # parse the response
			
 
				+        response.search_params = params
			
 
				+        return self.engine.response(response)
			
 
				+
			
 
				+    def search(self, query, params, result_container, start_time, timeout_limit):
			
 
				+        # set timeout for all HTTP requests
			
 
				+        poolrequests.set_timeout_for_thread(timeout_limit, start_time=start_time)
			
 
				+        # reset the HTTP total time
			
 
				+        poolrequests.reset_time_for_thread()
			
 
				+
			
 
				+        # suppose everything will be alright
			
 
				+        requests_exception = False
			
 
				+        suspended_time = None
			
 
				+
			
 
				+        try:
			
 
				+            # send requests and parse the results
			
 
				+            search_results = self._search_basic(query, params)
			
 
				+
			
 
				+            # check if the engine accepted the request
			
 
				+            if search_results is not None:
			
 
				+                # yes, so add results
			
 
				+                result_container.extend(self.engine_name, search_results)
			
 
				+
			
 
				+                # update engine time when there is no exception
			
 
				+                engine_time = time() - start_time
			
 
				+                page_load_time = poolrequests.get_time_for_thread()
			
 
				+                result_container.add_timing(self.engine_name, engine_time, page_load_time)
			
 
				+                with threading.RLock():
			
 
				+                    self.engine.stats['engine_time'] += engine_time
			
 
				+                    self.engine.stats['engine_time_count'] += 1
			
 
				+                    # update stats with the total HTTP time
			
 
				+                    self.engine.stats['page_load_time'] += page_load_time
			
 
				+                    self.engine.stats['page_load_count'] += 1
			
 
				+        except Exception as e:
			
 
				+            record_exception(self.engine_name, e)
			
 
				+
			
 
				+            # Timing
			
 
				+            engine_time = time() - start_time
			
 
				+            page_load_time = poolrequests.get_time_for_thread()
			
 
				+            result_container.add_timing(self.engine_name, engine_time, page_load_time)
			
 
				+
			
 
				+            # Record the errors
			
 
				+            with threading.RLock():
			
 
				+                self.engine.stats['errors'] += 1
			
 
				+
			
 
				+            if (issubclass(e.__class__, requests.exceptions.Timeout)):
			
 
				+                result_container.add_unresponsive_engine(self.engine_name, 'HTTP timeout')
			
 
				+                # requests timeout (connect or read)
			
 
				+                logger.error("engine {0} : HTTP requests timeout"
			
 
				+                             "(search duration : {1} s, timeout: {2} s) : {3}"
			
 
				+                             .format(self.engine_name, engine_time, timeout_limit, e.__class__.__name__))
			
 
				+                requests_exception = True
			
 
				+            elif (issubclass(e.__class__, requests.exceptions.RequestException)):
			
 
				+                result_container.add_unresponsive_engine(self.engine_name, 'HTTP error')
			
 
				+                # other requests exception
			
 
				+                logger.exception("engine {0} : requests exception"
			
 
				+                                 "(search duration : {1} s, timeout: {2} s) : {3}"
			
 
				+                                 .format(self.engine_name, engine_time, timeout_limit, e))
			
 
				+                requests_exception = True
			
 
				+            elif (issubclass(e.__class__, SearxEngineCaptchaException)):
			
 
				+                result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required')
			
 
				+                logger.exception('engine {0} : CAPTCHA')
			
 
				+                suspended_time = e.suspended_time  # pylint: disable=no-member
			
 
				+            elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)):
			
 
				+                result_container.add_unresponsive_engine(self.engine_name, 'too many requests')
			
 
				+                logger.exception('engine {0} : Too many requests')
			
 
				+                suspended_time = e.suspended_time  # pylint: disable=no-member
			
 
				+            elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
			
 
				+                result_container.add_unresponsive_engine(self.engine_name, 'blocked')
			
 
				+                logger.exception('engine {0} : Searx is blocked')
			
 
				+                suspended_time = e.suspended_time  # pylint: disable=no-member
			
 
				+            else:
			
 
				+                result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash')
			
 
				+                # others errors
			
 
				+                logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
			
 
				+        else:
			
 
				+            if getattr(threading.current_thread(), '_timeout', False):
			
 
				+                record_error(self.engine_name, 'Timeout')
			
 
				+
			
 
				+        # suspend the engine if there is an HTTP error
			
 
				+        # or suspended_time is defined
			
 
				+        with threading.RLock():
			
 
				+            if requests_exception or suspended_time:
			
 
				+                # update continuous_errors / suspend_end_time
			
 
				+                self.engine.continuous_errors += 1
			
 
				+                if suspended_time is None:
			
 
				+                    suspended_time = min(settings['search']['max_ban_time_on_fail'],
			
 
				+                                         self.engine.continuous_errors * settings['search']['ban_time_on_fail'])
			
 
				+                self.engine.suspend_end_time = time() + suspended_time
			
 
				+            else:
			
 
				+                # reset the suspend variables
			
 
				+                self.engine.continuous_errors = 0
			
 
				+                self.engine.suspend_end_time = 0
			
--- a/searx/search/processors/online_currency.py
+++ b/searx/search/processors/online_currency.py
@@ -0,0 +1,57 @@
 
				+# SPDX-License-Identifier: AGPL-3.0-or-later
			
 
				+
			
 
				+import unicodedata
			
 
				+import re
			
 
				+
			
 
				+from searx.data import CURRENCIES
			
 
				+from .online import OnlineProcessor
			
 
				+
			
 
				+
			
 
				+parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
			
 
				+
			
 
				+
			
 
				+def normalize_name(name):
			
 
				+    name = name.lower().replace('-', ' ').rstrip('s')
			
 
				+    name = re.sub(' +', ' ', name)
			
 
				+    return unicodedata.normalize('NFKD', name).lower()
			
 
				+
			
 
				+
			
 
				+def name_to_iso4217(name):
			
 
				+    global CURRENCIES
			
 
				+    name = normalize_name(name)
			
 
				+    currency = CURRENCIES['names'].get(name, [name])
			
 
				+    return currency[0]
			
 
				+
			
 
				+
			
 
				+def iso4217_to_name(iso4217, language):
			
 
				+    global CURRENCIES
			
 
				+    return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
			
 
				+
			
 
				+
			
 
				+class OnlineCurrencyProcessor(OnlineProcessor):
			
 
				+
			
 
				+    engine_type = 'online_currency'
			
 
				+
			
 
				+    def get_params(self, search_query, engine_category):
			
 
				+        params = super().get_params(search_query, engine_category)
			
 
				+        if params is None:
			
 
				+            return None
			
 
				+
			
 
				+        m = parser_re.match(search_query.query)
			
 
				+        if not m:
			
 
				+            return None
			
 
				+
			
 
				+        amount_str, from_currency, to_currency = m.groups()
			
 
				+        try:
			
 
				+            amount = float(amount_str)
			
 
				+        except ValueError:
			
 
				+            return None
			
 
				+        from_currency = name_to_iso4217(from_currency.strip())
			
 
				+        to_currency = name_to_iso4217(to_currency.strip())
			
 
				+
			
 
				+        params['amount'] = amount
			
 
				+        params['from'] = from_currency
			
 
				+        params['to'] = to_currency
			
 
				+        params['from_name'] = iso4217_to_name(from_currency, 'en')
			
 
				+        params['to_name'] = iso4217_to_name(to_currency, 'en')
			
 
				+        return params
			
--- a/searx/search/processors/online_dictionary.py
+++ b/searx/search/processors/online_dictionary.py
@@ -0,0 +1,37 @@
 
				+# SPDX-License-Identifier: AGPL-3.0-or-later
			
 
				+
			
 
				+import re
			
 
				+
			
 
				+from searx.utils import is_valid_lang
			
 
				+from .online import OnlineProcessor
			
 
				+
			
 
				+
			
 
				+parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
			
 
				+
			
 
				+
			
 
				+class OnlineDictionaryProcessor(OnlineProcessor):
			
 
				+
			
 
				+    engine_type = 'online_dictionnary'
			
 
				+
			
 
				+    def get_params(self, search_query, engine_category):
			
 
				+        params = super().get_params(search_query, engine_category)
			
 
				+        if params is None:
			
 
				+            return None
			
 
				+
			
 
				+        m = parser_re.match(search_query.query)
			
 
				+        if not m:
			
 
				+            return None
			
 
				+
			
 
				+        from_lang, to_lang, query = m.groups()
			
 
				+
			
 
				+        from_lang = is_valid_lang(from_lang)
			
 
				+        to_lang = is_valid_lang(to_lang)
			
 
				+
			
 
				+        if not from_lang or not to_lang:
			
 
				+            return None
			
 
				+
			
 
				+        params['from_lang'] = from_lang
			
 
				+        params['to_lang'] = to_lang
			
 
				+        params['query'] = query
			
 
				+
			
 
				+        return params
			
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -60,7 +60,7 @@ from searx import brand, static_path
 
				 from searx import settings, searx_dir, searx_debug
			
 
				 from searx.exceptions import SearxParameterException
			
 
				 from searx.engines import (
			
 
				-    categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
			
 
				+    categories, engines, engine_shortcuts, get_engines_stats
			
 
				 )
			
 
				 from searx.webutils import (
			
 
				     UnicodeWriter, highlight_content, get_resources_directory,
			
@@ -71,7 +71,7 @@ from searx.webadapter import get_search_query_from_webapp, get_selected_categori
 
				 from searx.utils import html_to_text, gen_useragent, dict_subset, match_language
			
 
				 from searx.version import VERSION_STRING
			
 
				 from searx.languages import language_codes as languages
			
 
				-from searx.search import SearchWithPlugins
			
 
				+from searx.search import SearchWithPlugins, initialize
			
 
				 from searx.query import RawTextQuery
			
 
				 from searx.autocomplete import searx_bang, backends as autocomplete_backends
			
 
				 from searx.plugins import plugins
			
@@ -131,7 +131,7 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai
 
				 # initialize the engines except on the first run of the werkzeug server.
			
 
				 if not werkzeug_reloader\
			
 
				    or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"):
			
 
				-    initialize_engines(settings['engines'])
			
 
				+    initialize()
			
 
				 
			
 
				 babel = Babel(app)
			
 
				 
			
--- a/tests/unit/test_search.py
+++ b/tests/unit/test_search.py
@@ -3,7 +3,6 @@
 
				 from searx.testing import SearxTestCase
			
 
				 from searx.search import SearchQuery, EngineRef
			
 
				 import searx.search
			
 
				-import searx.engines
			
 
				 
			
 
				 
			
 
				 SAFESEARCH = 0
			
@@ -39,7 +38,7 @@ class SearchTestCase(SearxTestCase):
 
				 
			
 
				     @classmethod
			
 
				     def setUpClass(cls):
			
 
				-        searx.engines.initialize_engines(TEST_ENGINES)
			
 
				+        searx.search.initialize(TEST_ENGINES)
			
 
				 
			
 
				     def test_timeout_simple(self):
			
 
				         searx.search.max_request_timeout = None
			
--- a/tests/unit/test_standalone_searx.py
+++ b/tests/unit/test_standalone_searx.py
@@ -8,8 +8,7 @@ import sys
 
				 from mock import Mock, patch
			
 
				 from nose2.tools import params
			
 
				 
			
 
				-from searx.search import SearchQuery, EngineRef
			
 
				-from searx.engines import initialize_engines
			
 
				+from searx.search import SearchQuery, EngineRef, initialize
			
 
				 from searx.testing import SearxTestCase
			
 
				 
			
 
				 
			
@@ -30,7 +29,7 @@ class StandaloneSearx(SearxTestCase):
 
				     def setUpClass(cls):
			
 
				         engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}]
			
 
				 
			
 
				-        initialize_engines(engine_list)
			
 
				+        initialize(engine_list)
			
 
				 
			
 
				     def test_parse_argument_no_args(self):
			
 
				         """Test parse argument without args."""
			
--- a/tests/unit/test_webadapter.py
+++ b/tests/unit/test_webadapter.py
@@ -17,7 +17,7 @@ TEST_ENGINES = [
 
				         'categories': 'general',
			
 
				         'shortcut': 'do',
			
 
				         'timeout': 3.0,
			
 
				-        'offline': True,
			
 
				+        'engine_type': 'offline',
			
 
				         'tokens': ['my-token'],
			
 
				     },
			
 
				 ]
			
@@ -28,7 +28,7 @@ class ValidateQueryCase(SearxTestCase):
 
				 
			
 
				     @classmethod
			
 
				     def setUpClass(cls):
			
 
				-        searx.engines.initialize_engines(TEST_ENGINES)
			
 
				+        searx.search.initialize(TEST_ENGINES)
			
 
				 
			
 
				     def test_query_private_engine_without_token(self):
			
 
				         preferences = Preferences(['oscar'], ['general'], engines, [])
			
--- a/utils/standalone_searx.py
+++ b/utils/standalone_searx.py
@@ -6,8 +6,8 @@ Getting categories without initiate the engine will only return `['general']`
 
				 >>> import searx.engines
			
 
				 ... list(searx.engines.categories.keys())
			
 
				 ['general']
			
 
				->>> import searx
			
 
				-... searx.engines.initialize_engines(searx.settings['engines'])
			
 
				+>>> import searx.search
			
 
				+... searx.search.initialize()
			
 
				 ... list(searx.engines.categories.keys())
			
 
				 ['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
			
 
				 
			
@@ -22,11 +22,11 @@ Example to run it from python:
 
				 >>> import importlib
			
 
				 ... import json
			
 
				 ... import sys
			
 
				-... import searx
			
 
				 ... import searx.engines
			
 
				+... import searx.search
			
 
				 ... search_query = 'rain'
			
 
				 ... # initialize engines
			
 
				-... searx.engines.initialize_engines(searx.settings['engines'])
			
 
				+... searx.search.initialize()
			
 
				 ... # load engines categories once instead of each time the function called
			
 
				 ... engine_cs = list(searx.engines.categories.keys())
			
 
				 ... # load module
			
@@ -82,7 +82,6 @@ from json import dumps
 
				 from typing import Any, Dict, List, Optional
			
 
				 
			
 
				 import searx
			
 
				-import searx.engines
			
 
				 import searx.preferences
			
 
				 import searx.query
			
 
				 import searx.search
			
@@ -208,7 +207,7 @@ def parse_argument(
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    searx.engines.initialize_engines(searx.settings['engines'])
			
 
				+    searx.search.initialize()
			
 
				     engine_cs = list(searx.engines.categories.keys())
			
 
				     prog_args = parse_argument(category_choices=engine_cs)
			
 
				     search_q = get_search_query(prog_args, engine_categories=engine_cs)