Browse Source

Merge pull request #2225 from dalf/processors

Processors
Alexandre Flament 4 years ago
parent
commit
9b27935f71

+ 6 - 6
docs/admin/engines.rst

@@ -33,7 +33,7 @@ Engine        ..          Paging support       **P**
 ------------------------- -------------------- ------------
 ------------------------- -------------------- ------------
 Shortcut      **S**       Language support     **L**
 Shortcut      **S**       Language support     **L**
 Timeout       **TO**      Time range support   **TR**
 Timeout       **TO**      Time range support   **TR**
-Disabled      **D**       Offline              **O**
+Disabled      **D**       Engine type          **ET**
 ------------- ----------- -------------------- ------------
 ------------- ----------- -------------------- ------------
 Safe search   **SS**
 Safe search   **SS**
 ------------- ----------- ---------------------------------
 ------------- ----------- ---------------------------------
@@ -62,10 +62,10 @@ Show errors   **DE**
         - SS
         - SS
         - D
         - D
         - TR
         - TR
-        - O
-	- W
-	- D
-	- DE
+        - ET
+        - W
+        - D
+        - DE
 
 
       {% for name, mod in engines.items() %}
       {% for name, mod in engines.items() %}
 
 
@@ -79,7 +79,7 @@ Show errors   **DE**
         - {{(mod.safesearch and "y") or ""}}
         - {{(mod.safesearch and "y") or ""}}
         - {{(mod.disabled and "y") or ""}}
         - {{(mod.disabled and "y") or ""}}
         - {{(mod.time_range_support and "y") or ""}}
         - {{(mod.time_range_support and "y") or ""}}
-        - {{(mod.offline and "y") or ""}}
+        - {{mod.engine_type or ""}}
         - {{mod.weight or 1 }}
         - {{mod.weight or 1 }}
         - {{(mod.disabled and "y") or ""}}
         - {{(mod.disabled and "y") or ""}}
         - {{(mod.display_error_messages and "y") or ""}}
         - {{(mod.display_error_messages and "y") or ""}}

+ 47 - 19
docs/dev/engine_overview.rst

@@ -37,15 +37,16 @@ settings.  However, the standard way is the following:
 engine file
 engine file
 -----------
 -----------
 
 
-======================= =========== ===========================================
+======================= =========== ========================================================
 argument                type        information
 argument                type        information
-======================= =========== ===========================================
+======================= =========== ========================================================
 categories              list        pages, in which the engine is working
 categories              list        pages, in which the engine is working
 paging                  boolean     support multible pages
 paging                  boolean     support multible pages
 language_support        boolean     support language choosing
 language_support        boolean     support language choosing
 time_range_support      boolean     support search time range
 time_range_support      boolean     support search time range
-offline                 boolean     engine runs offline
-======================= =========== ===========================================
+engine_type             str         ``online`` by default, other possibles values are 
+                                    ``offline``, ``online_dictionnary``, ``online_currency``
+======================= =========== ========================================================
 
 
 .. _engine settings:
 .. _engine settings:
 
 
@@ -111,22 +112,49 @@ passed arguments
 These arguments can be used to construct the search query.  Furthermore,
 These arguments can be used to construct the search query.  Furthermore,
 parameters with default value can be redefined for special purposes.
 parameters with default value can be redefined for special purposes.
 
 
+If the ``engine_type`` is ``online```:
+
+====================== ============== ========================================================================
+argument               type           default-value, information
+====================== ============== ========================================================================
+url                    str            ``''``
+method                 str            ``'GET'``
+headers                set            ``{}``
+data                   set            ``{}``
+cookies                set            ``{}``
+verify                 bool           ``True``
+headers.User-Agent     str            a random User-Agent
+category               str            current category, like ``'general'``
+safesearch             int            ``0``, between ``0`` and ``2`` (normal, moderate, strict)
+time_range             Optional[str]  ``None``, can be ``day``, ``week``, ``month``, ``year``
+pageno                 int            current pagenumber
+language               str            specific language code like ``'en_US'``, or ``'all'`` if unspecified
+====================== ============== ========================================================================
+
+
+If the ``engine_type`` is ``online_dictionnary```, in addition to the ``online`` arguments:
+
 ====================== ============ ========================================================================
 ====================== ============ ========================================================================
 argument               type         default-value, information
 argument               type         default-value, information
 ====================== ============ ========================================================================
 ====================== ============ ========================================================================
-url                    string       ``''``
-method                 string       ``'GET'``
-headers                set          ``{}``
-data                   set          ``{}``
-cookies                set          ``{}``
-verify                 boolean      ``True``
-headers.User-Agent     string       a random User-Agent
-category               string       current category, like ``'general'``
-started                datetime     current date-time
-pageno                 int          current pagenumber
-language               string       specific language code like ``'en_US'``, or ``'all'`` if unspecified
+from_lang              str          specific language code like ``'en_US'``
+to_lang                str          specific language code like ``'en_US'``
+query                  str          the text query without the languages
 ====================== ============ ========================================================================
 ====================== ============ ========================================================================
 
 
+If the ``engine_type`` is ``online_currency```, in addition to the ``online`` arguments:
+
+====================== ============ ========================================================================
+argument               type         default-value, information
+====================== ============ ========================================================================
+amount                 float        the amount to convert
+from                   str          ISO 4217 code
+to                     str          ISO 4217 code
+from_name              str          currency name
+to_name                str          currency name
+====================== ============ ========================================================================
+
+
 parsed arguments
 parsed arguments
 ----------------
 ----------------
 
 
@@ -137,12 +165,12 @@ request:
 =================== =========== ==========================================================================
 =================== =========== ==========================================================================
 argument            type        information
 argument            type        information
 =================== =========== ==========================================================================
 =================== =========== ==========================================================================
-url                 string      requested url
-method              string      HTTP request method
+url                 str         requested url
+method              str         HTTP request method
 headers             set         HTTP header information
 headers             set         HTTP header information
-data                set         HTTP data information (parsed if ``method != 'GET'``)
+data                set         HTTP data information
 cookies             set         HTTP cookies
 cookies             set         HTTP cookies
-verify              boolean     Performing SSL-Validity check
+verify              bool        Performing SSL-Validity check
 max_redirects       int         maximum redirects, hard limit
 max_redirects       int         maximum redirects, hard limit
 soft_max_redirects  int         maximum redirects, soft limit. Record an error but don't stop the engine
 soft_max_redirects  int         maximum redirects, soft limit. Record an error but don't stop the engine
 raise_for_httperror bool        True by default: raise an exception if the HTTP code of response is >= 300
 raise_for_httperror bool        True by default: raise an exception if the HTTP code of response is >= 300

+ 6 - 4
searx/engines/__init__.py

@@ -53,7 +53,7 @@ engine_default_args = {'paging': False,
                        'suspend_end_time': 0,
                        'suspend_end_time': 0,
                        'continuous_errors': 0,
                        'continuous_errors': 0,
                        'time_range_support': False,
                        'time_range_support': False,
-                       'offline': False,
+                       'engine_type': 'online',
                        'display_error_messages': True,
                        'display_error_messages': True,
                        'tokens': []}
                        'tokens': []}
 
 
@@ -142,7 +142,9 @@ def load_engine(engine_data):
         'errors': 0
         'errors': 0
     }
     }
 
 
-    if not engine.offline:
+    engine_type = getattr(engine, 'engine_type', 'online')
+
+    if engine_type != 'offline':
         engine.stats['page_load_time'] = 0
         engine.stats['page_load_time'] = 0
         engine.stats['page_load_count'] = 0
         engine.stats['page_load_count'] = 0
 
 
@@ -209,7 +211,7 @@ def get_engines_stats(preferences):
         else:
         else:
             score = score_per_result = 0.0
             score = score_per_result = 0.0
 
 
-        if not engine.offline:
+        if engine.engine_type != 'offline':
             load_times = 0
             load_times = 0
             if engine.stats['page_load_count'] != 0:
             if engine.stats['page_load_count'] != 0:
                 load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count'])  # noqa
                 load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count'])  # noqa
@@ -300,7 +302,7 @@ def initialize_engines(engine_list):
 
 
 def _set_https_support_for_engine(engine):
 def _set_https_support_for_engine(engine):
     # check HTTPS support if it is not disabled
     # check HTTPS support if it is not disabled
-    if not engine.offline and not hasattr(engine, 'https_support'):
+    if engine.engine_type != 'offline' and not hasattr(engine, 'https_support'):
         params = engine.request('http_test', {
         params = engine.request('http_test', {
             'method': 'GET',
             'method': 'GET',
             'headers': {},
             'headers': {},

+ 1 - 1
searx/engines/command.py

@@ -23,7 +23,7 @@ from threading import Thread
 from searx import logger
 from searx import logger
 
 
 
 
-offline = True
+engine_type = 'offline'
 paging = True
 paging = True
 command = []
 command = []
 delimiter = {}
 delimiter = {}

+ 2 - 40
searx/engines/currency_convert.py

@@ -1,54 +1,16 @@
 import json
 import json
-import re
-import unicodedata
-from searx.data import CURRENCIES  # NOQA
 
 
 
 
+engine_type = 'online_currency'
 categories = []
 categories = []
 url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
 url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
 weight = 100
 weight = 100
 
 
-parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
 https_support = True
 https_support = True
 
 
 
 
-def normalize_name(name):
-    name = name.lower().replace('-', ' ').rstrip('s')
-    name = re.sub(' +', ' ', name)
-    return unicodedata.normalize('NFKD', name).lower()
-
-
-def name_to_iso4217(name):
-    global CURRENCIES
-
-    name = normalize_name(name)
-    currency = CURRENCIES['names'].get(name, [name])
-    return currency[0]
-
-
-def iso4217_to_name(iso4217, language):
-    global CURRENCIES
-
-    return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
-
-
 def request(query, params):
 def request(query, params):
-    m = parser_re.match(query)
-    if not m:
-        # wrong query
-        return params
-    amount, from_currency, to_currency = m.groups()
-    amount = float(amount)
-    from_currency = name_to_iso4217(from_currency.strip())
-    to_currency = name_to_iso4217(to_currency.strip())
-
-    params['url'] = url.format(from_currency, to_currency)
-    params['amount'] = amount
-    params['from'] = from_currency
-    params['to'] = to_currency
-    params['from_name'] = iso4217_to_name(from_currency, 'en')
-    params['to_name'] = iso4217_to_name(to_currency, 'en')
-
+    params['url'] = url.format(params['from'], params['to'])
     return params
     return params
 
 
 
 

+ 6 - 18
searx/engines/dictzone.py

@@ -9,36 +9,24 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
-import re
 from urllib.parse import urljoin
 from urllib.parse import urljoin
 from lxml import html
 from lxml import html
-from searx.utils import is_valid_lang, eval_xpath
+from searx.utils import eval_xpath
 
 
+
+engine_type = 'online_dictionnary'
 categories = ['general']
 categories = ['general']
 url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 weight = 100
 weight = 100
 
 
-parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
 results_xpath = './/table[@id="r"]/tr'
 results_xpath = './/table[@id="r"]/tr'
 https_support = True
 https_support = True
 
 
 
 
 def request(query, params):
 def request(query, params):
-    m = parser_re.match(query)
-    if not m:
-        return params
-
-    from_lang, to_lang, query = m.groups()
-
-    from_lang = is_valid_lang(from_lang)
-    to_lang = is_valid_lang(to_lang)
-
-    if not from_lang or not to_lang:
-        return params
-
-    params['url'] = url.format(from_lang=from_lang[2],
-                               to_lang=to_lang[2],
-                               query=query)
+    params['url'] = url.format(from_lang=params['from_lang'][2],
+                               to_lang=params['to_lang'][2],
+                               query=params['query'])
 
 
     return params
     return params
 
 

+ 4 - 22
searx/engines/translated.py

@@ -8,44 +8,26 @@
  @stable      yes
  @stable      yes
  @parse       url, title, content
  @parse       url, title, content
 """
 """
-import re
-from searx.utils import is_valid_lang
 
 
+engine_type = 'online_dictionnary'
 categories = ['general']
 categories = ['general']
 url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
 url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
 web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
 web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
 weight = 100
 weight = 100
 https_support = True
 https_support = True
 
 
-parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
 api_key = ''
 api_key = ''
 
 
 
 
 def request(query, params):
 def request(query, params):
-    m = parser_re.match(query)
-    if not m:
-        return params
-
-    from_lang, to_lang, query = m.groups()
-
-    from_lang = is_valid_lang(from_lang)
-    to_lang = is_valid_lang(to_lang)
-
-    if not from_lang or not to_lang:
-        return params
-
     if api_key:
     if api_key:
         key_form = '&key=' + api_key
         key_form = '&key=' + api_key
     else:
     else:
         key_form = ''
         key_form = ''
-    params['url'] = url.format(from_lang=from_lang[1],
-                               to_lang=to_lang[1],
-                               query=query,
+    params['url'] = url.format(from_lang=params['from_lang'][1],
+                               to_lang=params['to_lang'][1],
+                               query=params['query'],
                                key=key_form)
                                key=key_form)
-    params['query'] = query
-    params['from_lang'] = from_lang
-    params['to_lang'] = to_lang
-
     return params
     return params
 
 
 
 

+ 0 - 541
searx/search.py

@@ -1,541 +0,0 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-
-(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
-'''
-
-import typing
-import gc
-import threading
-from time import time
-from uuid import uuid4
-from urllib.parse import urlparse
-from _thread import start_new_thread
-
-import requests.exceptions
-import searx.poolrequests as requests_lib
-from searx.engines import engines, settings
-from searx.answerers import ask
-from searx.external_bang import get_bang_url
-from searx.utils import gen_useragent
-from searx.results import ResultContainer
-from searx import logger
-from searx.plugins import plugins
-from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
-                              SearxEngineTooManyRequestsException,)
-from searx.metrology.error_recorder import record_exception, record_error
-
-
-logger = logger.getChild('search')
-
-max_request_timeout = settings.get('outgoing', {}).get('max_request_timeout' or None)
-if max_request_timeout is None:
-    logger.info('max_request_timeout={0}'.format(max_request_timeout))
-else:
-    if isinstance(max_request_timeout, float):
-        logger.info('max_request_timeout={0} second(s)'.format(max_request_timeout))
-    else:
-        logger.critical('outgoing.max_request_timeout if defined has to be float')
-        import sys
-        sys.exit(1)
-
-
-class EngineRef:
-
-    __slots__ = 'name', 'category', 'from_bang'
-
-    def __init__(self, name: str, category: str, from_bang: bool=False):
-        self.name = name
-        self.category = category
-        self.from_bang = from_bang
-
-    def __repr__(self):
-        return "EngineRef({!r}, {!r}, {!r})".format(self.name, self.category, self.from_bang)
-
-    def __eq__(self, other):
-        return self.name == other.name and self.category == other.category and self.from_bang == other.from_bang
-
-
-class SearchQuery:
-    """container for all the search parameters (query, language, etc...)"""
-
-    __slots__ = 'query', 'engineref_list', 'categories', 'lang', 'safesearch', 'pageno', 'time_range',\
-                'timeout_limit', 'external_bang'
-
-    def __init__(self,
-                 query: str,
-                 engineref_list: typing.List[EngineRef],
-                 categories: typing.List[str],
-                 lang: str,
-                 safesearch: int,
-                 pageno: int,
-                 time_range: typing.Optional[str],
-                 timeout_limit: typing.Optional[float]=None,
-                 external_bang: typing.Optional[str]=None):
-        self.query = query
-        self.engineref_list = engineref_list
-        self.categories = categories
-        self.lang = lang
-        self.safesearch = safesearch
-        self.pageno = pageno
-        self.time_range = time_range
-        self.timeout_limit = timeout_limit
-        self.external_bang = external_bang
-
-    def __repr__(self):
-        return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
-               format(self.query, self.engineref_list, self.categories, self.lang, self.safesearch,
-                      self.pageno, self.time_range, self.timeout_limit, self.external_bang)
-
-    def __eq__(self, other):
-        return self.query == other.query\
-            and self.engineref_list == other.engineref_list\
-            and self.categories == self.categories\
-            and self.lang == other.lang\
-            and self.safesearch == other.safesearch\
-            and self.pageno == other.pageno\
-            and self.time_range == other.time_range\
-            and self.timeout_limit == other.timeout_limit\
-            and self.external_bang == other.external_bang
-
-
-def send_http_request(engine, request_params):
-    # create dictionary which contain all
-    # informations about the request
-    request_args = dict(
-        headers=request_params['headers'],
-        cookies=request_params['cookies'],
-        verify=request_params['verify'],
-        auth=request_params['auth']
-    )
-
-    # setting engine based proxies
-    if hasattr(engine, 'proxies'):
-        request_args['proxies'] = requests_lib.get_proxies(engine.proxies)
-
-    # max_redirects
-    max_redirects = request_params.get('max_redirects')
-    if max_redirects:
-        request_args['max_redirects'] = max_redirects
-
-    # soft_max_redirects
-    soft_max_redirects = request_params.get('soft_max_redirects', max_redirects or 0)
-
-    # raise_for_status
-    request_args['raise_for_httperror'] = request_params.get('raise_for_httperror', False)
-
-    # specific type of request (GET or POST)
-    if request_params['method'] == 'GET':
-        req = requests_lib.get
-    else:
-        req = requests_lib.post
-
-    request_args['data'] = request_params['data']
-
-    # send the request
-    response = req(request_params['url'], **request_args)
-
-    # check soft limit of the redirect count
-    if len(response.history) > soft_max_redirects:
-        # unexpected redirect : record an error
-        # but the engine might still return valid results.
-        status_code = str(response.status_code or '')
-        reason = response.reason or ''
-        hostname = str(urlparse(response.url or '').netloc)
-        record_error(engine.name,
-                     '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
-                     (status_code, reason, hostname))
-
-    return response
-
-
-def search_one_http_request(engine, query, request_params):
-    # update request parameters dependent on
-    # search-engine (contained in engines folder)
-    engine.request(query, request_params)
-
-    # ignoring empty urls
-    if request_params['url'] is None:
-        return None
-
-    if not request_params['url']:
-        return None
-
-    # send request
-    response = send_http_request(engine, request_params)
-
-    # parse the response
-    response.search_params = request_params
-    return engine.response(response)
-
-
-def search_one_http_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
-    # set timeout for all HTTP requests
-    requests_lib.set_timeout_for_thread(timeout_limit, start_time=start_time)
-    # reset the HTTP total time
-    requests_lib.reset_time_for_thread()
-
-    #
-    engine = engines[engine_name]
-
-    # suppose everything will be alright
-    requests_exception = False
-    suspended_time = None
-
-    try:
-        # send requests and parse the results
-        search_results = search_one_http_request(engine, query, request_params)
-
-        # check if the engine accepted the request
-        if search_results is not None:
-            # yes, so add results
-            result_container.extend(engine_name, search_results)
-
-            # update engine time when there is no exception
-            engine_time = time() - start_time
-            page_load_time = requests_lib.get_time_for_thread()
-            result_container.add_timing(engine_name, engine_time, page_load_time)
-            with threading.RLock():
-                engine.stats['engine_time'] += engine_time
-                engine.stats['engine_time_count'] += 1
-                # update stats with the total HTTP time
-                engine.stats['page_load_time'] += page_load_time
-                engine.stats['page_load_count'] += 1
-    except Exception as e:
-        record_exception(engine_name, e)
-
-        # Timing
-        engine_time = time() - start_time
-        page_load_time = requests_lib.get_time_for_thread()
-        result_container.add_timing(engine_name, engine_time, page_load_time)
-
-        # Record the errors
-        with threading.RLock():
-            engine.stats['errors'] += 1
-
-        if (issubclass(e.__class__, requests.exceptions.Timeout)):
-            result_container.add_unresponsive_engine(engine_name, 'HTTP timeout')
-            # requests timeout (connect or read)
-            logger.error("engine {0} : HTTP requests timeout"
-                         "(search duration : {1} s, timeout: {2} s) : {3}"
-                         .format(engine_name, engine_time, timeout_limit, e.__class__.__name__))
-            requests_exception = True
-        elif (issubclass(e.__class__, requests.exceptions.RequestException)):
-            result_container.add_unresponsive_engine(engine_name, 'HTTP error')
-            # other requests exception
-            logger.exception("engine {0} : requests exception"
-                             "(search duration : {1} s, timeout: {2} s) : {3}"
-                             .format(engine_name, engine_time, timeout_limit, e))
-            requests_exception = True
-        elif (issubclass(e.__class__, SearxEngineCaptchaException)):
-            result_container.add_unresponsive_engine(engine_name, 'CAPTCHA required')
-            logger.exception('engine {0} : CAPTCHA')
-            suspended_time = e.suspended_time  # pylint: disable=no-member
-        elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)):
-            result_container.add_unresponsive_engine(engine_name, 'too many requests')
-            logger.exception('engine {0} : Too many requests')
-            suspended_time = e.suspended_time  # pylint: disable=no-member
-        elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
-            result_container.add_unresponsive_engine(engine_name, 'blocked')
-            logger.exception('engine {0} : Searx is blocked')
-            suspended_time = e.suspended_time  # pylint: disable=no-member
-        else:
-            result_container.add_unresponsive_engine(engine_name, 'unexpected crash')
-            # others errors
-            logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
-    else:
-        if getattr(threading.current_thread(), '_timeout', False):
-            record_error(engine_name, 'Timeout')
-
-    # suspend the engine if there is an HTTP error
-    # or suspended_time is defined
-    with threading.RLock():
-        if requests_exception or suspended_time:
-            # update continuous_errors / suspend_end_time
-            engine.continuous_errors += 1
-            if suspended_time is None:
-                suspended_time = min(settings['search']['max_ban_time_on_fail'],
-                                     engine.continuous_errors * settings['search']['ban_time_on_fail'])
-            engine.suspend_end_time = time() + suspended_time
-        else:
-            # reset the suspend variables
-            engine.continuous_errors = 0
-            engine.suspend_end_time = 0
-
-
-def record_offline_engine_stats_on_error(engine, result_container, start_time):
-    engine_time = time() - start_time
-    result_container.add_timing(engine.name, engine_time, engine_time)
-
-    with threading.RLock():
-        engine.stats['errors'] += 1
-
-
-def search_one_offline_request(engine, query, request_params):
-    return engine.search(query, request_params)
-
-
-def search_one_offline_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
-    engine = engines[engine_name]
-
-    try:
-        search_results = search_one_offline_request(engine, query, request_params)
-
-        if search_results:
-            result_container.extend(engine_name, search_results)
-
-            engine_time = time() - start_time
-            result_container.add_timing(engine_name, engine_time, engine_time)
-            with threading.RLock():
-                engine.stats['engine_time'] += engine_time
-                engine.stats['engine_time_count'] += 1
-
-    except ValueError as e:
-        record_exception(engine_name, e)
-        record_offline_engine_stats_on_error(engine, result_container, start_time)
-        logger.exception('engine {0} : invalid input : {1}'.format(engine_name, e))
-    except Exception as e:
-        record_exception(engine_name, e)
-        record_offline_engine_stats_on_error(engine, result_container, start_time)
-        result_container.add_unresponsive_engine(engine_name, 'unexpected crash', str(e))
-        logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
-    else:
-        if getattr(threading.current_thread(), '_timeout', False):
-            record_error(engine_name, 'Timeout')
-
-
-def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
-    if engines[engine_name].offline:
-        return search_one_offline_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit)  # noqa
-    return search_one_http_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit)
-
-
-def search_multiple_requests(requests, result_container, start_time, timeout_limit):
-    search_id = uuid4().__str__()
-
-    for engine_name, query, request_params in requests:
-        th = threading.Thread(
-            target=search_one_request_safe,
-            args=(engine_name, query, request_params, result_container, start_time, timeout_limit),
-            name=search_id,
-        )
-        th._timeout = False
-        th._engine_name = engine_name
-        th.start()
-
-    for th in threading.enumerate():
-        if th.name == search_id:
-            remaining_time = max(0.0, timeout_limit - (time() - start_time))
-            th.join(remaining_time)
-            if th.is_alive():
-                th._timeout = True
-                result_container.add_unresponsive_engine(th._engine_name, 'timeout')
-                logger.warning('engine timeout: {0}'.format(th._engine_name))
-
-
-# get default reqest parameter
-def default_request_params():
-    return {
-        'method': 'GET',
-        'headers': {},
-        'data': {},
-        'url': '',
-        'cookies': {},
-        'verify': True,
-        'auth': None,
-        'raise_for_httperror': True
-    }
-
-
-class Search:
-    """Search information container"""
-
-    __slots__ = "search_query", "result_container", "start_time", "actual_timeout"
-
-    def __init__(self, search_query):
-        # init vars
-        super().__init__()
-        self.search_query = search_query
-        self.result_container = ResultContainer()
-        self.start_time = None
-        self.actual_timeout = None
-
-    def search_external_bang(self):
-        """
-        Check if there is a external bang.
-        If yes, update self.result_container and return True
-        """
-        if self.search_query.external_bang:
-            self.result_container.redirect_url = get_bang_url(self.search_query)
-
-            # This means there was a valid bang and the
-            # rest of the search does not need to be continued
-            if isinstance(self.result_container.redirect_url, str):
-                return True
-        return False
-
-    def search_answerers(self):
-        """
-        Check if an answer return a result.
-        If yes, update self.result_container and return True
-        """
-        answerers_results = ask(self.search_query)
-
-        if answerers_results:
-            for results in answerers_results:
-                self.result_container.extend('answer', results)
-            return True
-        return False
-
-    def _is_accepted(self, engine_name, engine):
-        # skip suspended engines
-        if engine.suspend_end_time >= time():
-            logger.debug('Engine currently suspended: %s', engine_name)
-            return False
-
-        # if paging is not supported, skip
-        if self.search_query.pageno > 1 and not engine.paging:
-            return False
-
-        # if time_range is not supported, skip
-        if self.search_query.time_range and not engine.time_range_support:
-            return False
-
-        return True
-
-    def _get_params(self, engineref, user_agent):
-        if engineref.name not in engines:
-            return None, None
-
-        engine = engines[engineref.name]
-
-        if not self._is_accepted(engineref.name, engine):
-            return None, None
-
-        # set default request parameters
-        request_params = {}
-        if not engine.offline:
-            request_params = default_request_params()
-            request_params['headers']['User-Agent'] = user_agent
-
-            if hasattr(engine, 'language') and engine.language:
-                request_params['language'] = engine.language
-            else:
-                request_params['language'] = self.search_query.lang
-
-            request_params['safesearch'] = self.search_query.safesearch
-            request_params['time_range'] = self.search_query.time_range
-
-        request_params['category'] = engineref.category
-        request_params['pageno'] = self.search_query.pageno
-
-        with threading.RLock():
-            engine.stats['sent_search_count'] += 1
-
-        return request_params, engine.timeout
-
-    # do search-request
-    def _get_requests(self):
-        # init vars
-        requests = []
-
-        # set default useragent
-        # user_agent = request.headers.get('User-Agent', '')
-        user_agent = gen_useragent()
-
-        # max of all selected engine timeout
-        default_timeout = 0
-
-        # start search-reqest for all selected engines
-        for engineref in self.search_query.engineref_list:
-            # set default request parameters
-            request_params, engine_timeout = self._get_params(engineref, user_agent)
-            if request_params is None:
-                continue
-
-            # append request to list
-            requests.append((engineref.name, self.search_query.query, request_params))
-
-            # update default_timeout
-            default_timeout = max(default_timeout, engine_timeout)
-
-        # adjust timeout
-        actual_timeout = default_timeout
-        query_timeout = self.search_query.timeout_limit
-
-        if max_request_timeout is None and query_timeout is None:
-            # No max, no user query: default_timeout
-            pass
-        elif max_request_timeout is None and query_timeout is not None:
-            # No max, but user query: From user query except if above default
-            actual_timeout = min(default_timeout, query_timeout)
-        elif max_request_timeout is not None and query_timeout is None:
-            # Max, no user query: Default except if above max
-            actual_timeout = min(default_timeout, max_request_timeout)
-        elif max_request_timeout is not None and query_timeout is not None:
-            # Max & user query: From user query except if above max
-            actual_timeout = min(query_timeout, max_request_timeout)
-
-        logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})"
-                     .format(actual_timeout, default_timeout, query_timeout, max_request_timeout))
-
-        return requests, actual_timeout
-
-    def search_standard(self):
-        """
-        Update self.result_container, self.actual_timeout
-        """
-        requests, self.actual_timeout = self._get_requests()
-
-        # send all search-request
-        if requests:
-            search_multiple_requests(requests, self.result_container, self.start_time, self.actual_timeout)
-            start_new_thread(gc.collect, tuple())
-
-        # return results, suggestions, answers and infoboxes
-        return True
-
-    # do search-request
-    def search(self):
-        self.start_time = time()
-
-        if not self.search_external_bang():
-            if not self.search_answerers():
-                self.search_standard()
-
-        return self.result_container
-
-
-class SearchWithPlugins(Search):
-    """Similar to the Search class but call the plugins."""
-
-    __slots__ = 'ordered_plugin_list', 'request'
-
-    def __init__(self, search_query, ordered_plugin_list, request):
-        super().__init__(search_query)
-        self.ordered_plugin_list = ordered_plugin_list
-        self.request = request
-
-    def search(self):
-        if plugins.call(self.ordered_plugin_list, 'pre_search', self.request, self):
-            super().search()
-
-        plugins.call(self.ordered_plugin_list, 'post_search', self.request, self)
-
-        results = self.result_container.get_ordered_results()
-
-        for result in results:
-            plugins.call(self.ordered_plugin_list, 'on_result', self.request, self, result)
-
-        return self.result_container

+ 265 - 0
searx/search/__init__.py

@@ -0,0 +1,265 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
+'''
+
+import typing
+import gc
+import threading
+from time import time
+from uuid import uuid4
+from _thread import start_new_thread
+
+from searx import settings
+from searx.answerers import ask
+from searx.external_bang import get_bang_url
+from searx.results import ResultContainer
+from searx import logger
+from searx.plugins import plugins
+from searx.search.processors import processors, initialize as initialize_processors
+
+
+logger = logger.getChild('search')
+
+max_request_timeout = settings.get('outgoing', {}).get('max_request_timeout' or None)
+if max_request_timeout is None:
+    logger.info('max_request_timeout={0}'.format(max_request_timeout))
+else:
+    if isinstance(max_request_timeout, float):
+        logger.info('max_request_timeout={0} second(s)'.format(max_request_timeout))
+    else:
+        logger.critical('outgoing.max_request_timeout if defined has to be float')
+        import sys
+        sys.exit(1)
+
+
+def initialize(settings_engines=None):
+    settings_engines = settings_engines or settings['engines']
+    initialize_processors(settings_engines)
+
+
+class EngineRef:
+
+    __slots__ = 'name', 'category', 'from_bang'
+
+    def __init__(self, name: str, category: str, from_bang: bool=False):
+        self.name = name
+        self.category = category
+        self.from_bang = from_bang
+
+    def __repr__(self):
+        return "EngineRef({!r}, {!r}, {!r})".format(self.name, self.category, self.from_bang)
+
+    def __eq__(self, other):
+        return self.name == other.name and self.category == other.category and self.from_bang == other.from_bang
+
+
+class SearchQuery:
+    """container for all the search parameters (query, language, etc...)"""
+
+    __slots__ = 'query', 'engineref_list', 'categories', 'lang', 'safesearch', 'pageno', 'time_range',\
+                'timeout_limit', 'external_bang'
+
+    def __init__(self,
+                 query: str,
+                 engineref_list: typing.List[EngineRef],
+                 categories: typing.List[str],
+                 lang: str,
+                 safesearch: int,
+                 pageno: int,
+                 time_range: typing.Optional[str],
+                 timeout_limit: typing.Optional[float]=None,
+                 external_bang: typing.Optional[str]=None):
+        self.query = query
+        self.engineref_list = engineref_list
+        self.categories = categories
+        self.lang = lang
+        self.safesearch = safesearch
+        self.pageno = pageno
+        self.time_range = time_range
+        self.timeout_limit = timeout_limit
+        self.external_bang = external_bang
+
+    def __repr__(self):
+        return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
+               format(self.query, self.engineref_list, self.categories, self.lang, self.safesearch,
+                      self.pageno, self.time_range, self.timeout_limit, self.external_bang)
+
+    def __eq__(self, other):
+        return self.query == other.query\
+            and self.engineref_list == other.engineref_list\
+            and self.categories == self.categories\
+            and self.lang == other.lang\
+            and self.safesearch == other.safesearch\
+            and self.pageno == other.pageno\
+            and self.time_range == other.time_range\
+            and self.timeout_limit == other.timeout_limit\
+            and self.external_bang == other.external_bang
+
+
+class Search:
+    """Search information container"""
+
+    __slots__ = "search_query", "result_container", "start_time", "actual_timeout"
+
+    def __init__(self, search_query):
+        # init vars
+        super().__init__()
+        self.search_query = search_query
+        self.result_container = ResultContainer()
+        self.start_time = None
+        self.actual_timeout = None
+
+    def search_external_bang(self):
+        """
+        Check if there is a external bang.
+        If yes, update self.result_container and return True
+        """
+        if self.search_query.external_bang:
+            self.result_container.redirect_url = get_bang_url(self.search_query)
+
+            # This means there was a valid bang and the
+            # rest of the search does not need to be continued
+            if isinstance(self.result_container.redirect_url, str):
+                return True
+        return False
+
+    def search_answerers(self):
+        """
+        Check if an answer return a result.
+        If yes, update self.result_container and return True
+        """
+        answerers_results = ask(self.search_query)
+
+        if answerers_results:
+            for results in answerers_results:
+                self.result_container.extend('answer', results)
+            return True
+        return False
+
+    # do search-request
+    def _get_requests(self):
+        # init vars
+        requests = []
+
+        # max of all selected engine timeout
+        default_timeout = 0
+
+        # start search-reqest for all selected engines
+        for engineref in self.search_query.engineref_list:
+            processor = processors[engineref.name]
+
+            # set default request parameters
+            request_params = processor.get_params(self.search_query, engineref.category)
+            if request_params is None:
+                continue
+
+            # append request to list
+            requests.append((engineref.name, self.search_query.query, request_params))
+
+            # update default_timeout
+            default_timeout = max(default_timeout, processor.engine.timeout)
+
+        # adjust timeout
+        actual_timeout = default_timeout
+        query_timeout = self.search_query.timeout_limit
+
+        if max_request_timeout is None and query_timeout is None:
+            # No max, no user query: default_timeout
+            pass
+        elif max_request_timeout is None and query_timeout is not None:
+            # No max, but user query: From user query except if above default
+            actual_timeout = min(default_timeout, query_timeout)
+        elif max_request_timeout is not None and query_timeout is None:
+            # Max, no user query: Default except if above max
+            actual_timeout = min(default_timeout, max_request_timeout)
+        elif max_request_timeout is not None and query_timeout is not None:
+            # Max & user query: From user query except if above max
+            actual_timeout = min(query_timeout, max_request_timeout)
+
+        logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})"
+                     .format(actual_timeout, default_timeout, query_timeout, max_request_timeout))
+
+        return requests, actual_timeout
+
+    def search_multiple_requests(self, requests):
+        search_id = uuid4().__str__()
+
+        for engine_name, query, request_params in requests:
+            th = threading.Thread(
+                target=processors[engine_name].search,
+                args=(query, request_params, self.result_container, self.start_time, self.actual_timeout),
+                name=search_id,
+            )
+            th._timeout = False
+            th._engine_name = engine_name
+            th.start()
+
+        for th in threading.enumerate():
+            if th.name == search_id:
+                remaining_time = max(0.0, self.actual_timeout - (time() - self.start_time))
+                th.join(remaining_time)
+                if th.is_alive():
+                    th._timeout = True
+                    self.result_container.add_unresponsive_engine(th._engine_name, 'timeout')
+                    logger.warning('engine timeout: {0}'.format(th._engine_name))
+
+    def search_standard(self):
+        """
+        Update self.result_container, self.actual_timeout
+        """
+        requests, self.actual_timeout = self._get_requests()
+
+        # send all search-request
+        if requests:
+            self.search_multiple_requests(requests)
+            start_new_thread(gc.collect, tuple())
+
+        # return results, suggestions, answers and infoboxes
+        return True
+
+    # do search-request
+    def search(self):
+        self.start_time = time()
+
+        if not self.search_external_bang():
+            if not self.search_answerers():
+                self.search_standard()
+
+        return self.result_container
+
+
+class SearchWithPlugins(Search):
+    """Similar to the Search class but call the plugins."""
+
+    __slots__ = 'ordered_plugin_list', 'request'
+
+    def __init__(self, search_query, ordered_plugin_list, request):
+        super().__init__(search_query)
+        self.ordered_plugin_list = ordered_plugin_list
+        self.request = request
+
+    def search(self):
+        if plugins.call(self.ordered_plugin_list, 'pre_search', self.request, self):
+            super().search()
+
+        plugins.call(self.ordered_plugin_list, 'post_search', self.request, self)
+
+        results = self.result_container.get_ordered_results()
+
+        for result in results:
+            plugins.call(self.ordered_plugin_list, 'on_result', self.request, self, result)
+
+        return self.result_container

+ 41 - 0
searx/search/processors/__init__.py

@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+from .online import OnlineProcessor
+from .offline import OfflineProcessor
+from .online_dictionary import OnlineDictionaryProcessor
+from .online_currency import OnlineCurrencyProcessor
+from .abstract import EngineProcessor
+from searx import logger
+import searx.engines as engines
+
+
+__all__ = ['EngineProcessor', 'OfflineProcessor', 'OnlineProcessor',
+           'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', 'processors']
+logger = logger.getChild('search.processors')
+processors = {}
+
+
+def get_processor_class(engine_type):
+    for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]:
+        if c.engine_type == engine_type:
+            return c
+    return None
+
+
+def get_processor(engine, engine_name):
+    engine_type = getattr(engine, 'engine_type', 'online')
+    processor_class = get_processor_class(engine_type)
+    if processor_class:
+        return processor_class(engine, engine_name)
+    else:
+        return None
+
+
+def initialize(engine_list):
+    engines.initialize_engines(engine_list)
+    for engine_name, engine in engines.engines.items():
+        processor = get_processor(engine, engine_name)
+        if processor is None:
+            logger.error('Error get processor for engine %s', engine_name)
+        else:
+            processors[engine_name] = processor

+ 39 - 0
searx/search/processors/abstract.py

@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+from abc import abstractmethod
+from searx import logger
+
+
+logger = logger.getChild('searx.search.processor')
+
+
+class EngineProcessor:
+
+    def __init__(self, engine, engine_name):
+        self.engine = engine
+        self.engine_name = engine_name
+
+    def get_params(self, search_query, engine_category):
+        # if paging is not supported, skip
+        if search_query.pageno > 1 and not self.engine.paging:
+            return None
+
+        # if time_range is not supported, skip
+        if search_query.time_range and not self.engine.time_range_support:
+            return None
+
+        params = {}
+        params['category'] = engine_category
+        params['pageno'] = search_query.pageno
+        params['safesearch'] = search_query.safesearch
+        params['time_range'] = search_query.time_range
+
+        if hasattr(self.engine, 'language') and self.engine.language:
+            params['language'] = self.engine.language
+        else:
+            params['language'] = search_query.lang
+        return params
+
+    @abstractmethod
+    def search(self, query, params, result_container, start_time, timeout_limit):
+        pass

+ 51 - 0
searx/search/processors/offline.py

@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import threading
+from time import time
+from searx import logger
+from searx.metrology.error_recorder import record_exception, record_error
+from searx.search.processors.abstract import EngineProcessor
+
+
+logger = logger.getChild('search.processor.offline')
+
+
+class OfflineProcessor(EngineProcessor):
+
+    engine_type = 'offline'
+
+    def _record_stats_on_error(self, result_container, start_time):
+        engine_time = time() - start_time
+        result_container.add_timing(self.engine_name, engine_time, engine_time)
+
+        with threading.RLock():
+            self.engine.stats['errors'] += 1
+
+    def _search_basic(self, query, params):
+        return self.engine.search(query, params)
+
+    def search(self, query, params, result_container, start_time, timeout_limit):
+        try:
+            search_results = self._search_basic(query, params)
+
+            if search_results:
+                result_container.extend(self.engine_name, search_results)
+
+                engine_time = time() - start_time
+                result_container.add_timing(self.engine_name, engine_time, engine_time)
+                with threading.RLock():
+                    self.engine.stats['engine_time'] += engine_time
+                    self.engine.stats['engine_time_count'] += 1
+
+        except ValueError as e:
+            record_exception(self.engine_name, e)
+            self._record_stats_on_error(result_container, start_time)
+            logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
+        except Exception as e:
+            record_exception(self.engine_name, e)
+            self._record_stats_on_error(result_container, start_time)
+            result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash', str(e))
+            logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
+        else:
+            if getattr(threading.current_thread(), '_timeout', False):
+                record_error(self.engine_name, 'Timeout')

+ 211 - 0
searx/search/processors/online.py

@@ -0,0 +1,211 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+from urllib.parse import urlparse
+from time import time
+import threading
+
+import requests.exceptions
+
+import searx.poolrequests as poolrequests
+from searx.engines import settings
+from searx import logger
+from searx.utils import gen_useragent
+from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
+                              SearxEngineTooManyRequestsException,)
+from searx.metrology.error_recorder import record_exception, record_error
+
+from searx.search.processors.abstract import EngineProcessor
+
+
+logger = logger.getChild('search.processor.online')
+
+DEFAULT_PARAMS = {
+    'method': 'GET',
+    'headers': {},
+    'data': {},
+    'url': '',
+    'cookies': {},
+    'verify': True,
+    'auth': None
+}
+
+
+class OnlineProcessor(EngineProcessor):
+
+    engine_type = 'online'
+
+    def get_params(self, search_query, engine_category):
+        params = super().get_params(search_query, engine_category)
+        if params is None:
+            return None
+
+        # skip suspended engines
+        if self.engine.suspend_end_time >= time():
+            logger.debug('Engine currently suspended: %s', self.engine_name)
+            return None
+
+        # add default params
+        params.update(DEFAULT_PARAMS)
+
+        # add an user agent
+        params['headers']['User-Agent'] = gen_useragent()
+
+        return params
+
+    def _send_http_request(self, params):
+        # create dictionary which contain all
+        # informations about the request
+        request_args = dict(
+            headers=params['headers'],
+            cookies=params['cookies'],
+            verify=params['verify'],
+            auth=params['auth']
+        )
+
+        # setting engine based proxies
+        if hasattr(self.engine, 'proxies'):
+            request_args['proxies'] = poolrequests.get_proxies(self.engine.proxies)
+
+        # max_redirects
+        max_redirects = params.get('max_redirects')
+        if max_redirects:
+            request_args['max_redirects'] = max_redirects
+
+        # soft_max_redirects
+        soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0)
+
+        # raise_for_status
+        request_args['raise_for_httperror'] = params.get('raise_for_httperror', False)
+
+        # specific type of request (GET or POST)
+        if params['method'] == 'GET':
+            req = poolrequests.get
+        else:
+            req = poolrequests.post
+
+        request_args['data'] = params['data']
+
+        # send the request
+        response = req(params['url'], **request_args)
+
+        # check soft limit of the redirect count
+        if len(response.history) > soft_max_redirects:
+            # unexpected redirect : record an error
+            # but the engine might still return valid results.
+            status_code = str(response.status_code or '')
+            reason = response.reason or ''
+            hostname = str(urlparse(response.url or '').netloc)
+            record_error(self.engine_name,
+                         '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
+                         (status_code, reason, hostname))
+
+        return response
+
+    def _search_basic(self, query, params):
+        # update request parameters dependent on
+        # search-engine (contained in engines folder)
+        self.engine.request(query, params)
+
+        # ignoring empty urls
+        if params['url'] is None:
+            return None
+
+        if not params['url']:
+            return None
+
+        # send request
+        response = self._send_http_request(params)
+
+        # parse the response
+        response.search_params = params
+        return self.engine.response(response)
+
+    def search(self, query, params, result_container, start_time, timeout_limit):
+        # set timeout for all HTTP requests
+        poolrequests.set_timeout_for_thread(timeout_limit, start_time=start_time)
+        # reset the HTTP total time
+        poolrequests.reset_time_for_thread()
+
+        # suppose everything will be alright
+        requests_exception = False
+        suspended_time = None
+
+        try:
+            # send requests and parse the results
+            search_results = self._search_basic(query, params)
+
+            # check if the engine accepted the request
+            if search_results is not None:
+                # yes, so add results
+                result_container.extend(self.engine_name, search_results)
+
+                # update engine time when there is no exception
+                engine_time = time() - start_time
+                page_load_time = poolrequests.get_time_for_thread()
+                result_container.add_timing(self.engine_name, engine_time, page_load_time)
+                with threading.RLock():
+                    self.engine.stats['engine_time'] += engine_time
+                    self.engine.stats['engine_time_count'] += 1
+                    # update stats with the total HTTP time
+                    self.engine.stats['page_load_time'] += page_load_time
+                    self.engine.stats['page_load_count'] += 1
+        except Exception as e:
+            record_exception(self.engine_name, e)
+
+            # Timing
+            engine_time = time() - start_time
+            page_load_time = poolrequests.get_time_for_thread()
+            result_container.add_timing(self.engine_name, engine_time, page_load_time)
+
+            # Record the errors
+            with threading.RLock():
+                self.engine.stats['errors'] += 1
+
+            if (issubclass(e.__class__, requests.exceptions.Timeout)):
+                result_container.add_unresponsive_engine(self.engine_name, 'HTTP timeout')
+                # requests timeout (connect or read)
+                logger.error("engine {0} : HTTP requests timeout"
+                             "(search duration : {1} s, timeout: {2} s) : {3}"
+                             .format(self.engine_name, engine_time, timeout_limit, e.__class__.__name__))
+                requests_exception = True
+            elif (issubclass(e.__class__, requests.exceptions.RequestException)):
+                result_container.add_unresponsive_engine(self.engine_name, 'HTTP error')
+                # other requests exception
+                logger.exception("engine {0} : requests exception"
+                                 "(search duration : {1} s, timeout: {2} s) : {3}"
+                                 .format(self.engine_name, engine_time, timeout_limit, e))
+                requests_exception = True
+            elif (issubclass(e.__class__, SearxEngineCaptchaException)):
+                result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required')
+                logger.exception('engine {0} : CAPTCHA')
+                suspended_time = e.suspended_time  # pylint: disable=no-member
+            elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)):
+                result_container.add_unresponsive_engine(self.engine_name, 'too many requests')
+                logger.exception('engine {0} : Too many requests')
+                suspended_time = e.suspended_time  # pylint: disable=no-member
+            elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
+                result_container.add_unresponsive_engine(self.engine_name, 'blocked')
+                logger.exception('engine {0} : Searx is blocked')
+                suspended_time = e.suspended_time  # pylint: disable=no-member
+            else:
+                result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash')
+                # others errors
+                logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
+        else:
+            if getattr(threading.current_thread(), '_timeout', False):
+                record_error(self.engine_name, 'Timeout')
+
+        # suspend the engine if there is an HTTP error
+        # or suspended_time is defined
+        with threading.RLock():
+            if requests_exception or suspended_time:
+                # update continuous_errors / suspend_end_time
+                self.engine.continuous_errors += 1
+                if suspended_time is None:
+                    suspended_time = min(settings['search']['max_ban_time_on_fail'],
+                                         self.engine.continuous_errors * settings['search']['ban_time_on_fail'])
+                self.engine.suspend_end_time = time() + suspended_time
+            else:
+                # reset the suspend variables
+                self.engine.continuous_errors = 0
+                self.engine.suspend_end_time = 0

+ 57 - 0
searx/search/processors/online_currency.py

@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import unicodedata
+import re
+
+from searx.data import CURRENCIES
+from .online import OnlineProcessor
+
+
+parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
+
+
+def normalize_name(name):
+    name = name.lower().replace('-', ' ').rstrip('s')
+    name = re.sub(' +', ' ', name)
+    return unicodedata.normalize('NFKD', name).lower()
+
+
+def name_to_iso4217(name):
+    global CURRENCIES
+    name = normalize_name(name)
+    currency = CURRENCIES['names'].get(name, [name])
+    return currency[0]
+
+
+def iso4217_to_name(iso4217, language):
+    global CURRENCIES
+    return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
+
+
+class OnlineCurrencyProcessor(OnlineProcessor):
+
+    engine_type = 'online_currency'
+
+    def get_params(self, search_query, engine_category):
+        params = super().get_params(search_query, engine_category)
+        if params is None:
+            return None
+
+        m = parser_re.match(search_query.query)
+        if not m:
+            return None
+
+        amount_str, from_currency, to_currency = m.groups()
+        try:
+            amount = float(amount_str)
+        except ValueError:
+            return None
+        from_currency = name_to_iso4217(from_currency.strip())
+        to_currency = name_to_iso4217(to_currency.strip())
+
+        params['amount'] = amount
+        params['from'] = from_currency
+        params['to'] = to_currency
+        params['from_name'] = iso4217_to_name(from_currency, 'en')
+        params['to_name'] = iso4217_to_name(to_currency, 'en')
+        return params

+ 37 - 0
searx/search/processors/online_dictionary.py

@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import re
+
+from searx.utils import is_valid_lang
+from .online import OnlineProcessor
+
+
+parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+
+
+class OnlineDictionaryProcessor(OnlineProcessor):
+
+    engine_type = 'online_dictionnary'
+
+    def get_params(self, search_query, engine_category):
+        params = super().get_params(search_query, engine_category)
+        if params is None:
+            return None
+
+        m = parser_re.match(search_query.query)
+        if not m:
+            return None
+
+        from_lang, to_lang, query = m.groups()
+
+        from_lang = is_valid_lang(from_lang)
+        to_lang = is_valid_lang(to_lang)
+
+        if not from_lang or not to_lang:
+            return None
+
+        params['from_lang'] = from_lang
+        params['to_lang'] = to_lang
+        params['query'] = query
+
+        return params

+ 3 - 3
searx/webapp.py

@@ -60,7 +60,7 @@ from searx import brand, static_path
 from searx import settings, searx_dir, searx_debug
 from searx import settings, searx_dir, searx_debug
 from searx.exceptions import SearxParameterException
 from searx.exceptions import SearxParameterException
 from searx.engines import (
 from searx.engines import (
-    categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
+    categories, engines, engine_shortcuts, get_engines_stats
 )
 )
 from searx.webutils import (
 from searx.webutils import (
     UnicodeWriter, highlight_content, get_resources_directory,
     UnicodeWriter, highlight_content, get_resources_directory,
@@ -71,7 +71,7 @@ from searx.webadapter import get_search_query_from_webapp, get_selected_categori
 from searx.utils import html_to_text, gen_useragent, dict_subset, match_language
 from searx.utils import html_to_text, gen_useragent, dict_subset, match_language
 from searx.version import VERSION_STRING
 from searx.version import VERSION_STRING
 from searx.languages import language_codes as languages
 from searx.languages import language_codes as languages
-from searx.search import SearchWithPlugins
+from searx.search import SearchWithPlugins, initialize
 from searx.query import RawTextQuery
 from searx.query import RawTextQuery
 from searx.autocomplete import searx_bang, backends as autocomplete_backends
 from searx.autocomplete import searx_bang, backends as autocomplete_backends
 from searx.plugins import plugins
 from searx.plugins import plugins
@@ -131,7 +131,7 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai
 # initialize the engines except on the first run of the werkzeug server.
 # initialize the engines except on the first run of the werkzeug server.
 if not werkzeug_reloader\
 if not werkzeug_reloader\
    or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"):
    or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"):
-    initialize_engines(settings['engines'])
+    initialize()
 
 
 babel = Babel(app)
 babel = Babel(app)
 
 

+ 1 - 2
tests/unit/test_search.py

@@ -3,7 +3,6 @@
 from searx.testing import SearxTestCase
 from searx.testing import SearxTestCase
 from searx.search import SearchQuery, EngineRef
 from searx.search import SearchQuery, EngineRef
 import searx.search
 import searx.search
-import searx.engines
 
 
 
 
 SAFESEARCH = 0
 SAFESEARCH = 0
@@ -39,7 +38,7 @@ class SearchTestCase(SearxTestCase):
 
 
     @classmethod
     @classmethod
     def setUpClass(cls):
     def setUpClass(cls):
-        searx.engines.initialize_engines(TEST_ENGINES)
+        searx.search.initialize(TEST_ENGINES)
 
 
     def test_timeout_simple(self):
     def test_timeout_simple(self):
         searx.search.max_request_timeout = None
         searx.search.max_request_timeout = None

+ 2 - 3
tests/unit/test_standalone_searx.py

@@ -8,8 +8,7 @@ import sys
 from mock import Mock, patch
 from mock import Mock, patch
 from nose2.tools import params
 from nose2.tools import params
 
 
-from searx.search import SearchQuery, EngineRef
-from searx.engines import initialize_engines
+from searx.search import SearchQuery, EngineRef, initialize
 from searx.testing import SearxTestCase
 from searx.testing import SearxTestCase
 
 
 
 
@@ -30,7 +29,7 @@ class StandaloneSearx(SearxTestCase):
     def setUpClass(cls):
     def setUpClass(cls):
         engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}]
         engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}]
 
 
-        initialize_engines(engine_list)
+        initialize(engine_list)
 
 
     def test_parse_argument_no_args(self):
     def test_parse_argument_no_args(self):
         """Test parse argument without args."""
         """Test parse argument without args."""

+ 2 - 2
tests/unit/test_webadapter.py

@@ -17,7 +17,7 @@ TEST_ENGINES = [
         'categories': 'general',
         'categories': 'general',
         'shortcut': 'do',
         'shortcut': 'do',
         'timeout': 3.0,
         'timeout': 3.0,
-        'offline': True,
+        'engine_type': 'offline',
         'tokens': ['my-token'],
         'tokens': ['my-token'],
     },
     },
 ]
 ]
@@ -28,7 +28,7 @@ class ValidateQueryCase(SearxTestCase):
 
 
     @classmethod
     @classmethod
     def setUpClass(cls):
     def setUpClass(cls):
-        searx.engines.initialize_engines(TEST_ENGINES)
+        searx.search.initialize(TEST_ENGINES)
 
 
     def test_query_private_engine_without_token(self):
     def test_query_private_engine_without_token(self):
         preferences = Preferences(['oscar'], ['general'], engines, [])
         preferences = Preferences(['oscar'], ['general'], engines, [])

+ 5 - 6
utils/standalone_searx.py

@@ -6,8 +6,8 @@ Getting categories without initiate the engine will only return `['general']`
 >>> import searx.engines
 >>> import searx.engines
 ... list(searx.engines.categories.keys())
 ... list(searx.engines.categories.keys())
 ['general']
 ['general']
->>> import searx
-... searx.engines.initialize_engines(searx.settings['engines'])
+>>> import searx.search
+... searx.search.initialize()
 ... list(searx.engines.categories.keys())
 ... list(searx.engines.categories.keys())
 ['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
 ['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
 
 
@@ -22,11 +22,11 @@ Example to run it from python:
 >>> import importlib
 >>> import importlib
 ... import json
 ... import json
 ... import sys
 ... import sys
-... import searx
 ... import searx.engines
 ... import searx.engines
+... import searx.search
 ... search_query = 'rain'
 ... search_query = 'rain'
 ... # initialize engines
 ... # initialize engines
-... searx.engines.initialize_engines(searx.settings['engines'])
+... searx.search.initialize()
 ... # load engines categories once instead of each time the function called
 ... # load engines categories once instead of each time the function called
 ... engine_cs = list(searx.engines.categories.keys())
 ... engine_cs = list(searx.engines.categories.keys())
 ... # load module
 ... # load module
@@ -82,7 +82,6 @@ from json import dumps
 from typing import Any, Dict, List, Optional
 from typing import Any, Dict, List, Optional
 
 
 import searx
 import searx
-import searx.engines
 import searx.preferences
 import searx.preferences
 import searx.query
 import searx.query
 import searx.search
 import searx.search
@@ -208,7 +207,7 @@ def parse_argument(
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    searx.engines.initialize_engines(searx.settings['engines'])
+    searx.search.initialize()
     engine_cs = list(searx.engines.categories.keys())
     engine_cs = list(searx.engines.categories.keys())
     prog_args = parse_argument(category_choices=engine_cs)
     prog_args = parse_argument(category_choices=engine_cs)
     search_q = get_search_query(prog_args, engine_categories=engine_cs)
     search_q = get_search_query(prog_args, engine_categories=engine_cs)