Browse Source

[enh] rewrite and enhance metrics

Alexandre Flament 4 years ago
parent
commit
7acd7ffc02

+ 0 - 114
searx/engines/__init__.py

@@ -21,7 +21,6 @@ import threading
 from os.path import realpath, dirname
 from os.path import realpath, dirname
 from babel.localedata import locale_identifiers
 from babel.localedata import locale_identifiers
 from urllib.parse import urlparse
 from urllib.parse import urlparse
-from flask_babel import gettext
 from operator import itemgetter
 from operator import itemgetter
 from searx import settings
 from searx import settings
 from searx import logger
 from searx import logger
@@ -136,22 +135,6 @@ def load_engine(engine_data):
         setattr(engine, 'fetch_supported_languages',
         setattr(engine, 'fetch_supported_languages',
                 lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)))
                 lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)))
 
 
-    engine.stats = {
-        'sent_search_count': 0,  # sent search
-        'search_count': 0,  # succesful search
-        'result_count': 0,
-        'engine_time': 0,
-        'engine_time_count': 0,
-        'score_count': 0,
-        'errors': 0
-    }
-
-    engine_type = getattr(engine, 'engine_type', 'online')
-
-    if engine_type != 'offline':
-        engine.stats['page_load_time'] = 0
-        engine.stats['page_load_count'] = 0
-
     # tor related settings
     # tor related settings
     if settings['outgoing'].get('using_tor_proxy'):
     if settings['outgoing'].get('using_tor_proxy'):
         # use onion url if using tor.
         # use onion url if using tor.
@@ -175,103 +158,6 @@ def load_engine(engine_data):
     return engine
     return engine
 
 
 
 
-def to_percentage(stats, maxvalue):
-    for engine_stat in stats:
-        if maxvalue:
-            engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
-        else:
-            engine_stat['percentage'] = 0
-    return stats
-
-
-def get_engines_stats(preferences):
-    # TODO refactor
-    pageloads = []
-    engine_times = []
-    results = []
-    scores = []
-    errors = []
-    scores_per_result = []
-
-    max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0  # noqa
-    for engine in engines.values():
-        if not preferences.validate_token(engine):
-            continue
-
-        if engine.stats['search_count'] == 0:
-            continue
-
-        results_num = \
-            engine.stats['result_count'] / float(engine.stats['search_count'])
-
-        if engine.stats['engine_time_count'] != 0:
-            this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count'])  # noqa
-        else:
-            this_engine_time = 0
-
-        if results_num:
-            score = engine.stats['score_count'] / float(engine.stats['search_count'])  # noqa
-            score_per_result = score / results_num
-        else:
-            score = score_per_result = 0.0
-
-        if engine.engine_type != 'offline':
-            load_times = 0
-            if engine.stats['page_load_count'] != 0:
-                load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count'])  # noqa
-            max_pageload = max(load_times, max_pageload)
-            pageloads.append({'avg': load_times, 'name': engine.name})
-
-        max_engine_times = max(this_engine_time, max_engine_times)
-        max_results = max(results_num, max_results)
-        max_score = max(score, max_score)
-        max_score_per_result = max(score_per_result, max_score_per_result)
-        max_errors = max(max_errors, engine.stats['errors'])
-
-        engine_times.append({'avg': this_engine_time, 'name': engine.name})
-        results.append({'avg': results_num, 'name': engine.name})
-        scores.append({'avg': score, 'name': engine.name})
-        errors.append({'avg': engine.stats['errors'], 'name': engine.name})
-        scores_per_result.append({
-            'avg': score_per_result,
-            'name': engine.name
-        })
-
-    pageloads = to_percentage(pageloads, max_pageload)
-    engine_times = to_percentage(engine_times, max_engine_times)
-    results = to_percentage(results, max_results)
-    scores = to_percentage(scores, max_score)
-    scores_per_result = to_percentage(scores_per_result, max_score_per_result)
-    errors = to_percentage(errors, max_errors)
-
-    return [
-        (
-            gettext('Engine time (sec)'),
-            sorted(engine_times, key=itemgetter('avg'))
-        ),
-        (
-            gettext('Page loads (sec)'),
-            sorted(pageloads, key=itemgetter('avg'))
-        ),
-        (
-            gettext('Number of results'),
-            sorted(results, key=itemgetter('avg'), reverse=True)
-        ),
-        (
-            gettext('Scores'),
-            sorted(scores, key=itemgetter('avg'), reverse=True)
-        ),
-        (
-            gettext('Scores per result'),
-            sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
-        ),
-        (
-            gettext('Errors'),
-            sorted(errors, key=itemgetter('avg'), reverse=True)
-        ),
-    ]
-
-
 def load_engines(engine_list):
 def load_engines(engine_list):
     global engines, engine_shortcuts
     global engines, engine_shortcuts
     engines.clear()
     engines.clear()

+ 206 - 0
searx/metrics/__init__.py

@@ -0,0 +1,206 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import typing
+import math
+import contextlib
+from timeit import default_timer
+from operator import itemgetter
+
+from searx.engines import engines
+from .models import HistogramStorage, CounterStorage
+from .error_recorder import count_error, count_exception, errors_per_engines
+
+__all__ = ["initialize",
+           "get_engines_stats", "get_engine_errors",
+           "histogram", "histogram_observe", "histogram_observe_time",
+           "counter", "counter_inc", "counter_add",
+           "count_error", "count_exception"]
+
+
+ENDPOINTS = {'search'}
+
+
+histogram_storage: typing.Optional[HistogramStorage] = None
+counter_storage: typing.Optional[CounterStorage] = None
+
+
+@contextlib.contextmanager
+def histogram_observe_time(*args):
+    h = histogram_storage.get(*args)
+    before = default_timer()
+    yield before
+    duration = default_timer() - before
+    if h:
+        h.observe(duration)
+    else:
+        raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
+
+
+def histogram_observe(duration, *args):
+    histogram_storage.get(*args).observe(duration)
+
+
+def histogram(*args, raise_on_not_found=True):
+    h = histogram_storage.get(*args)
+    if raise_on_not_found and h is None:
+        raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
+    return h
+
+
+def counter_inc(*args):
+    counter_storage.add(1, *args)
+
+
+def counter_add(value, *args):
+    counter_storage.add(value, *args)
+
+
+def counter(*args):
+    return counter_storage.get(*args)
+
+
+def initialize(engine_names=None):
+    """
+    Initialize metrics
+    """
+    global counter_storage, histogram_storage
+
+    counter_storage = CounterStorage()
+    histogram_storage = HistogramStorage()
+
+    # max_timeout = max of all the engine.timeout
+    max_timeout = 2
+    for engine_name in (engine_names or engines):
+        if engine_name in engines:
+            max_timeout = max(max_timeout, engines[engine_name].timeout)
+
+    # histogram configuration
+    histogram_width = 0.1
+    histogram_size = int(1.5 * max_timeout / histogram_width)
+
+    # engines
+    for engine_name in (engine_names or engines):
+        # search count
+        counter_storage.configure('engine', engine_name, 'search', 'count', 'sent')
+        counter_storage.configure('engine', engine_name, 'search', 'count', 'successful')
+        # global counter of errors
+        counter_storage.configure('engine', engine_name, 'search', 'count', 'error')
+        # score of the engine
+        counter_storage.configure('engine', engine_name, 'score')
+        # result count per requests
+        histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count')
+        # time doing HTTP requests
+        histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
+        # total time
+        # .time.request and ...response times may overlap .time.http time.
+        histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')
+
+
+def get_engine_errors(engline_list):
+    result = {}
+    engine_names = list(errors_per_engines.keys())
+    engine_names.sort()
+    for engine_name in engine_names:
+        if engine_name not in engline_list:
+            continue
+
+        error_stats = errors_per_engines[engine_name]
+        sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1)
+        sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
+        r = []
+        for context, count in sorted_context_count_list:
+            percentage = round(20 * count / sent_search_count) * 5
+            r.append({
+                'filename': context.filename,
+                'function': context.function,
+                'line_no': context.line_no,
+                'code': context.code,
+                'exception_classname': context.exception_classname,
+                'log_message': context.log_message,
+                'log_parameters': context.log_parameters,
+                'secondary': context.secondary,
+                'percentage': percentage,
+            })
+        result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage'])
+    return result
+
+
+def to_percentage(stats, maxvalue):
+    for engine_stat in stats:
+        if maxvalue:
+            engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
+        else:
+            engine_stat['percentage'] = 0
+    return stats
+
+
+def get_engines_stats(engine_list):
+    global counter_storage, histogram_storage
+
+    assert counter_storage is not None
+    assert histogram_storage is not None
+
+    list_time = []
+    list_time_http = []
+    list_time_total = []
+    list_result_count = []
+    list_error_count = []
+    list_scores = []
+    list_scores_per_result = []
+
+    max_error_count = max_http_time = max_time_total = max_result_count = max_score = None  # noqa
+    for engine_name in engine_list:
+        error_count = counter('engine', engine_name, 'search', 'count', 'error')
+
+        if counter('engine', engine_name, 'search', 'count', 'sent') > 0:
+            list_error_count.append({'avg': error_count, 'name': engine_name})
+            max_error_count = max(error_count, max_error_count or 0)
+
+        successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
+        if successful_count == 0:
+            continue
+
+        result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
+        time_total = histogram('engine', engine_name, 'time', 'total').percentage(50)
+        time_http = histogram('engine', engine_name, 'time', 'http').percentage(50)
+        result_count = result_count_sum / float(successful_count)
+
+        if result_count:
+            score = counter('engine', engine_name, 'score')  # noqa
+            score_per_result = score / float(result_count_sum)
+        else:
+            score = score_per_result = 0.0
+
+        max_time_total = max(time_total, max_time_total or 0)
+        max_http_time = max(time_http, max_http_time or 0)
+        max_result_count = max(result_count, max_result_count or 0)
+        max_score = max(score, max_score or 0)
+
+        list_time.append({'total': round(time_total, 1),
+                          'http': round(time_http, 1),
+                          'name': engine_name,
+                          'processing': round(time_total - time_http, 1)})
+        list_time_total.append({'avg': time_total, 'name': engine_name})
+        list_time_http.append({'avg': time_http, 'name': engine_name})
+        list_result_count.append({'avg': result_count, 'name': engine_name})
+        list_scores.append({'avg': score, 'name': engine_name})
+        list_scores_per_result.append({'avg': score_per_result, 'name': engine_name})
+
+    list_time = sorted(list_time, key=itemgetter('total'))
+    list_time_total = sorted(to_percentage(list_time_total, max_time_total), key=itemgetter('avg'))
+    list_time_http = sorted(to_percentage(list_time_http, max_http_time), key=itemgetter('avg'))
+    list_result_count = sorted(to_percentage(list_result_count, max_result_count), key=itemgetter('avg'), reverse=True)
+    list_scores = sorted(list_scores, key=itemgetter('avg'), reverse=True)
+    list_scores_per_result = sorted(list_scores_per_result, key=itemgetter('avg'), reverse=True)
+    list_error_count = sorted(to_percentage(list_error_count, max_error_count), key=itemgetter('avg'), reverse=True)
+
+    return {
+        'time': list_time,
+        'max_time': math.ceil(max_time_total or 0),
+        'time_total': list_time_total,
+        'time_http': list_time_http,
+        'result_count': list_result_count,
+        'scores': list_scores,
+        'scores_per_result': list_scores_per_result,
+        'error_count': list_error_count,
+    }

+ 2 - 5
searx/metrology/error_recorder.py → searx/metrics/error_recorder.py

@@ -1,6 +1,5 @@
 import typing
 import typing
 import inspect
 import inspect
-import logging
 from json import JSONDecodeError
 from json import JSONDecodeError
 from urllib.parse import urlparse
 from urllib.parse import urlparse
 from httpx import HTTPError, HTTPStatusError
 from httpx import HTTPError, HTTPStatusError
@@ -9,8 +8,6 @@ from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathExcepti
 from searx import logger
 from searx import logger
 
 
 
 
-logging.basicConfig(level=logging.INFO)
-
 errors_per_engines = {}
 errors_per_engines = {}
 
 
 
 
@@ -124,7 +121,7 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame
     return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)
     return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)
 
 
 
 
-def record_exception(engine_name: str, exc: Exception) -> None:
+def count_exception(engine_name: str, exc: Exception) -> None:
     framerecords = inspect.trace()
     framerecords = inspect.trace()
     try:
     try:
         exception_classname = get_exception_classname(exc)
         exception_classname = get_exception_classname(exc)
@@ -135,7 +132,7 @@ def record_exception(engine_name: str, exc: Exception) -> None:
         del framerecords
         del framerecords
 
 
 
 
-def record_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
+def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
     framerecords = list(reversed(inspect.stack()[1:]))
     framerecords = list(reversed(inspect.stack()[1:]))
     try:
     try:
         error_context = get_error_context(framerecords, None, log_message, log_parameters or ())
         error_context = get_error_context(framerecords, None, log_message, log_parameters or ())

+ 156 - 0
searx/metrics/models.py

@@ -0,0 +1,156 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import decimal
+import threading
+
+from searx import logger
+
+
+__all__ = ["Histogram", "HistogramStorage", "CounterStorage"]
+
+logger = logger.getChild('searx.metrics')
+
+
+class Histogram:
+
+    _slots__ = '_lock', '_size', '_sum', '_quartiles', '_count', '_width'
+
+    def __init__(self, width=10, size=200):
+        self._lock = threading.Lock()
+        self._width = width
+        self._size = size
+        self._quartiles = [0] * size
+        self._count = 0
+        self._sum = 0
+
+    def observe(self, value):
+        q = int(value / self._width)
+        if q < 0:
+            """Value below zero is ignored"""
+            q = 0
+        if q >= self._size:
+            """Value above the maximum is replaced by the maximum"""
+            q = self._size - 1
+        with self._lock:
+            self._quartiles[q] += 1
+            self._count += 1
+            self._sum += value
+
+    @property
+    def quartiles(self):
+        return list(self._quartiles)
+
+    @property
+    def count(self):
+        return self._count
+
+    @property
+    def sum(self):
+        return self._sum
+
+    @property
+    def average(self):
+        with self._lock:
+            if self._count != 0:
+                return self._sum / self._count
+            else:
+                return 0
+
+    @property
+    def quartile_percentage(self):
+        ''' Quartile in percentage '''
+        with self._lock:
+            if self._count > 0:
+                return [int(q * 100 / self._count) for q in self._quartiles]
+            else:
+                return self._quartiles
+
+    @property
+    def quartile_percentage_map(self):
+        result = {}
+        # use Decimal to avoid rounding errors
+        x = decimal.Decimal(0)
+        width = decimal.Decimal(self._width)
+        width_exponent = -width.as_tuple().exponent
+        with self._lock:
+            if self._count > 0:
+                for y in self._quartiles:
+                    yp = int(y * 100 / self._count)
+                    if yp != 0:
+                        result[round(float(x), width_exponent)] = yp
+                    x += width
+        return result
+
+    def percentage(self, percentage):
+        # use Decimal to avoid rounding errors
+        x = decimal.Decimal(0)
+        width = decimal.Decimal(self._width)
+        stop_at_value = decimal.Decimal(self._count) / 100 * percentage
+        sum_value = 0
+        with self._lock:
+            if self._count > 0:
+                for y in self._quartiles:
+                    sum_value += y
+                    if sum_value >= stop_at_value:
+                        return x
+                    x += width
+        return None
+
+    def __repr__(self):
+        return "Histogram<avg: " + str(self.average) + ", count: " + str(self._count) + ">"
+
+
+class HistogramStorage:
+
+    __slots__ = 'measures'
+
+    def __init__(self):
+        self.clear()
+
+    def clear(self):
+        self.measures = {}
+
+    def configure(self, width, size, *args):
+        measure = Histogram(width, size)
+        self.measures[args] = measure
+        return measure
+
+    def get(self, *args):
+        return self.measures.get(args, None)
+
+    def dump(self):
+        logger.debug("Histograms:")
+        ks = sorted(self.measures.keys(), key='/'.join)
+        for k in ks:
+            logger.debug("- %-60s %s", '|'.join(k), self.measures[k])
+
+
+class CounterStorage:
+
+    __slots__ = 'counters', 'lock'
+
+    def __init__(self):
+        self.lock = threading.Lock()
+        self.clear()
+
+    def clear(self):
+        with self.lock:
+            self.counters = {}
+
+    def configure(self, *args):
+        with self.lock:
+            self.counters[args] = 0
+
+    def get(self, *args):
+        return self.counters[args]
+
+    def add(self, value, *args):
+        with self.lock:
+            self.counters[args] += value
+
+    def dump(self):
+        with self.lock:
+            ks = sorted(self.counters.keys(), key='/'.join)
+        logger.debug("Counters:")
+        for k in ks:
+            logger.debug("- %-60s %s", '|'.join(k), self.counters[k])

+ 0 - 0
searx/metrology/__init__.py


+ 4 - 4
searx/network/__init__.py

@@ -3,7 +3,7 @@
 import asyncio
 import asyncio
 import threading
 import threading
 import concurrent.futures
 import concurrent.futures
-from time import time
+from timeit import default_timer
 
 
 import httpx
 import httpx
 import h2.exceptions
 import h2.exceptions
@@ -65,7 +65,7 @@ def get_context_network():
 
 
 def request(method, url, **kwargs):
 def request(method, url, **kwargs):
     """same as requests/requests/api.py request(...)"""
     """same as requests/requests/api.py request(...)"""
-    time_before_request = time()
+    time_before_request = default_timer()
 
 
     # timeout (httpx)
     # timeout (httpx)
     if 'timeout' in kwargs:
     if 'timeout' in kwargs:
@@ -82,7 +82,7 @@ def request(method, url, **kwargs):
     timeout += 0.2  # overhead
     timeout += 0.2  # overhead
     start_time = getattr(THREADLOCAL, 'start_time', time_before_request)
     start_time = getattr(THREADLOCAL, 'start_time', time_before_request)
     if start_time:
     if start_time:
-        timeout -= time() - start_time
+        timeout -= default_timer() - start_time
 
 
     # raise_for_error
     # raise_for_error
     check_for_httperror = True
     check_for_httperror = True
@@ -111,7 +111,7 @@ def request(method, url, **kwargs):
     # update total_time.
     # update total_time.
     # See get_time_for_thread() and reset_time_for_thread()
     # See get_time_for_thread() and reset_time_for_thread()
     if hasattr(THREADLOCAL, 'total_time'):
     if hasattr(THREADLOCAL, 'total_time'):
-        time_after_request = time()
+        time_after_request = default_timer()
         THREADLOCAL.total_time += time_after_request - time_before_request
         THREADLOCAL.total_time += time_after_request - time_before_request
 
 
     # raise an exception
     # raise an exception

+ 2 - 0
searx/raise_for_httperror/__init__.py

@@ -0,0 +1,2 @@
+# compatibility with searx/searx
+from searx.network import raise_for_httperror

+ 5 - 8
searx/results.py

@@ -5,7 +5,7 @@ from threading import RLock
 from urllib.parse import urlparse, unquote
 from urllib.parse import urlparse, unquote
 from searx import logger
 from searx import logger
 from searx.engines import engines
 from searx.engines import engines
-from searx.metrology.error_recorder import record_error
+from searx.metrics import histogram_observe, counter_add, count_error
 
 
 
 
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
@@ -196,12 +196,10 @@ class ResultContainer:
 
 
         if len(error_msgs) > 0:
         if len(error_msgs) > 0:
             for msg in error_msgs:
             for msg in error_msgs:
-                record_error(engine_name, 'some results are invalids: ' + msg)
+                count_error(engine_name, 'some results are invalids: ' + msg)
 
 
         if engine_name in engines:
         if engine_name in engines:
-            with RLock():
-                engines[engine_name].stats['search_count'] += 1
-                engines[engine_name].stats['result_count'] += standard_result_count
+            histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count')
 
 
         if not self.paging and standard_result_count > 0 and engine_name in engines\
         if not self.paging and standard_result_count > 0 and engine_name in engines\
            and engines[engine_name].paging:
            and engines[engine_name].paging:
@@ -301,9 +299,8 @@ class ResultContainer:
         for result in self._merged_results:
         for result in self._merged_results:
             score = result_score(result)
             score = result_score(result)
             result['score'] = score
             result['score'] = score
-            with RLock():
-                for result_engine in result['engines']:
-                    engines[result_engine].stats['score_count'] += score
+            for result_engine in result['engines']:
+                counter_add(score, 'engine', result_engine, 'score')
 
 
         results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)
         results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)
 
 

+ 6 - 7
searx/search/__init__.py

@@ -18,7 +18,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 import typing
 import typing
 import gc
 import gc
 import threading
 import threading
-from time import time
+from timeit import default_timer
 from uuid import uuid4
 from uuid import uuid4
 from _thread import start_new_thread
 from _thread import start_new_thread
 
 
@@ -31,6 +31,7 @@ from searx.plugins import plugins
 from searx.search.models import EngineRef, SearchQuery
 from searx.search.models import EngineRef, SearchQuery
 from searx.search.processors import processors, initialize as initialize_processors
 from searx.search.processors import processors, initialize as initialize_processors
 from searx.search.checker import initialize as initialize_checker
 from searx.search.checker import initialize as initialize_checker
+from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
 
 
 
 
 logger = logger.getChild('search')
 logger = logger.getChild('search')
@@ -50,6 +51,7 @@ else:
 def initialize(settings_engines=None, enable_checker=False):
 def initialize(settings_engines=None, enable_checker=False):
     settings_engines = settings_engines or settings['engines']
     settings_engines = settings_engines or settings['engines']
     initialize_processors(settings_engines)
     initialize_processors(settings_engines)
+    initialize_metrics([engine['name'] for engine in settings_engines])
     if enable_checker:
     if enable_checker:
         initialize_checker()
         initialize_checker()
 
 
@@ -115,8 +117,7 @@ class Search:
             if request_params is None:
             if request_params is None:
                 continue
                 continue
 
 
-            with processor.lock:
-                processor.engine.stats['sent_search_count'] += 1
+            counter_inc('engine', engineref.name, 'search', 'count', 'sent')
 
 
             # append request to list
             # append request to list
             requests.append((engineref.name, self.search_query.query, request_params))
             requests.append((engineref.name, self.search_query.query, request_params))
@@ -161,7 +162,7 @@ class Search:
 
 
         for th in threading.enumerate():
         for th in threading.enumerate():
             if th.name == search_id:
             if th.name == search_id:
-                remaining_time = max(0.0, self.actual_timeout - (time() - self.start_time))
+                remaining_time = max(0.0, self.actual_timeout - (default_timer() - self.start_time))
                 th.join(remaining_time)
                 th.join(remaining_time)
                 if th.is_alive():
                 if th.is_alive():
                     th._timeout = True
                     th._timeout = True
@@ -184,12 +185,10 @@ class Search:
 
 
     # do search-request
     # do search-request
     def search(self):
     def search(self):
-        self.start_time = time()
-
+        self.start_time = default_timer()
         if not self.search_external_bang():
         if not self.search_external_bang():
             if not self.search_answerers():
             if not self.search_answerers():
                 self.search_standard()
                 self.search_standard()
-
         return self.result_container
         return self.result_container
 
 
 
 

+ 2 - 2
searx/search/checker/impl.py

@@ -16,6 +16,7 @@ from searx import network, logger
 from searx.results import ResultContainer
 from searx.results import ResultContainer
 from searx.search.models import SearchQuery, EngineRef
 from searx.search.models import SearchQuery, EngineRef
 from searx.search.processors import EngineProcessor
 from searx.search.processors import EngineProcessor
+from searx.metrics import counter_inc
 
 
 
 
 logger = logger.getChild('searx.search.checker')
 logger = logger.getChild('searx.search.checker')
@@ -384,8 +385,7 @@ class Checker:
         engineref_category = search_query.engineref_list[0].category
         engineref_category = search_query.engineref_list[0].category
         params = self.processor.get_params(search_query, engineref_category)
         params = self.processor.get_params(search_query, engineref_category)
         if params is not None:
         if params is not None:
-            with self.processor.lock:
-                self.processor.engine.stats['sent_search_count'] += 1
+            counter_inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent')
             self.processor.search(search_query.query, params, result_container, time(), 5)
             self.processor.search(search_query.query, params, result_container, time(), 5)
         return result_container
         return result_container
 
 

+ 12 - 17
searx/search/processors/abstract.py

@@ -2,12 +2,12 @@
 
 
 import threading
 import threading
 from abc import abstractmethod, ABC
 from abc import abstractmethod, ABC
-from time import time
+from timeit import default_timer
 
 
 from searx import logger
 from searx import logger
 from searx.engines import settings
 from searx.engines import settings
 from searx.network import get_time_for_thread, get_network
 from searx.network import get_time_for_thread, get_network
-from searx.metrology.error_recorder import record_exception, record_error
+from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
 from searx.exceptions import SearxEngineAccessDeniedException
 from searx.exceptions import SearxEngineAccessDeniedException
 
 
 
 
@@ -27,7 +27,7 @@ class SuspendedStatus:
 
 
     @property
     @property
     def is_suspended(self):
     def is_suspended(self):
-        return self.suspend_end_time >= time()
+        return self.suspend_end_time >= default_timer()
 
 
     def suspend(self, suspended_time, suspend_reason):
     def suspend(self, suspended_time, suspend_reason):
         with self.lock:
         with self.lock:
@@ -36,7 +36,7 @@ class SuspendedStatus:
             if suspended_time is None:
             if suspended_time is None:
                 suspended_time = min(settings['search']['max_ban_time_on_fail'],
                 suspended_time = min(settings['search']['max_ban_time_on_fail'],
                                      self.continuous_errors * settings['search']['ban_time_on_fail'])
                                      self.continuous_errors * settings['search']['ban_time_on_fail'])
-            self.suspend_end_time = time() + suspended_time
+            self.suspend_end_time = default_timer() + suspended_time
             self.suspend_reason = suspend_reason
             self.suspend_reason = suspend_reason
         logger.debug('Suspend engine for %i seconds', suspended_time)
         logger.debug('Suspend engine for %i seconds', suspended_time)
 
 
@@ -55,7 +55,6 @@ class EngineProcessor(ABC):
     def __init__(self, engine, engine_name):
     def __init__(self, engine, engine_name):
         self.engine = engine
         self.engine = engine
         self.engine_name = engine_name
         self.engine_name = engine_name
-        self.lock = threading.Lock()
         key = get_network(self.engine_name)
         key = get_network(self.engine_name)
         key = id(key) if key else self.engine_name
         key = id(key) if key else self.engine_name
         self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
         self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
@@ -65,12 +64,11 @@ class EngineProcessor(ABC):
         error_message = str(exception) if display_exception and exception else None
         error_message = str(exception) if display_exception and exception else None
         result_container.add_unresponsive_engine(self.engine_name, reason, error_message)
         result_container.add_unresponsive_engine(self.engine_name, reason, error_message)
         # metrics
         # metrics
-        with self.lock:
-            self.engine.stats['errors'] += 1
+        counter_inc('engine', self.engine_name, 'search', 'count', 'error')
         if exception:
         if exception:
-            record_exception(self.engine_name, exception)
+            count_exception(self.engine_name, exception)
         else:
         else:
-            record_error(self.engine_name, reason)
+            count_error(self.engine_name, reason)
         # suspend the engine ?
         # suspend the engine ?
         if suspend:
         if suspend:
             suspended_time = None
             suspended_time = None
@@ -81,17 +79,14 @@ class EngineProcessor(ABC):
     def _extend_container_basic(self, result_container, start_time, search_results):
     def _extend_container_basic(self, result_container, start_time, search_results):
         # update result_container
         # update result_container
         result_container.extend(self.engine_name, search_results)
         result_container.extend(self.engine_name, search_results)
-        engine_time = time() - start_time
+        engine_time = default_timer() - start_time
         page_load_time = get_time_for_thread()
         page_load_time = get_time_for_thread()
         result_container.add_timing(self.engine_name, engine_time, page_load_time)
         result_container.add_timing(self.engine_name, engine_time, page_load_time)
         # metrics
         # metrics
-        with self.lock:
-            self.engine.stats['engine_time'] += engine_time
-            self.engine.stats['engine_time_count'] += 1
-            # update stats with the total HTTP time
-            if page_load_time is not None and 'page_load_time' in self.engine.stats:
-                self.engine.stats['page_load_time'] += page_load_time
-                self.engine.stats['page_load_count'] += 1
+        counter_inc('engine', self.engine_name, 'search', 'count', 'successful')
+        histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total')
+        if page_load_time is not None:
+            histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http')
 
 
     def extend_container(self, result_container, start_time, search_results):
     def extend_container(self, result_container, start_time, search_results):
         if getattr(threading.current_thread(), '_timeout', False):
         if getattr(threading.current_thread(), '_timeout', False):

+ 4 - 4
searx/search/processors/online.py

@@ -10,7 +10,7 @@ from searx import logger
 from searx.utils import gen_useragent
 from searx.utils import gen_useragent
 from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
 from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
                               SearxEngineTooManyRequestsException,)
                               SearxEngineTooManyRequestsException,)
-from searx.metrology.error_recorder import record_error
+from searx.metrics.error_recorder import count_error
 
 
 from searx.search.processors.abstract import EngineProcessor
 from searx.search.processors.abstract import EngineProcessor
 
 
@@ -90,9 +90,9 @@ class OnlineProcessor(EngineProcessor):
             status_code = str(response.status_code or '')
             status_code = str(response.status_code or '')
             reason = response.reason_phrase or ''
             reason = response.reason_phrase or ''
             hostname = response.url.host
             hostname = response.url.host
-            record_error(self.engine_name,
-                         '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
-                         (status_code, reason, hostname))
+            count_error(self.engine_name,
+                        '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
+                        (status_code, reason, hostname))
 
 
         return response
         return response
 
 

+ 28 - 49
searx/webapp.py

@@ -51,7 +51,7 @@ from searx import logger
 logger = logger.getChild('webapp')
 logger = logger.getChild('webapp')
 
 
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
-from time import time
+from timeit import default_timer
 from html import escape
 from html import escape
 from io import StringIO
 from io import StringIO
 from urllib.parse import urlencode, urlparse
 from urllib.parse import urlencode, urlparse
@@ -73,9 +73,7 @@ from flask.json import jsonify
 from searx import brand, static_path
 from searx import brand, static_path
 from searx import settings, searx_dir, searx_debug
 from searx import settings, searx_dir, searx_debug
 from searx.exceptions import SearxParameterException
 from searx.exceptions import SearxParameterException
-from searx.engines import (
-    categories, engines, engine_shortcuts, get_engines_stats
-)
+from searx.engines import categories, engines, engine_shortcuts
 from searx.webutils import (
 from searx.webutils import (
     UnicodeWriter, highlight_content, get_resources_directory,
     UnicodeWriter, highlight_content, get_resources_directory,
     get_static_files, get_result_templates, get_themes,
     get_static_files, get_result_templates, get_themes,
@@ -95,7 +93,7 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
 from searx.answerers import answerers
 from searx.answerers import answerers
 from searx.network import stream as http_stream
 from searx.network import stream as http_stream
 from searx.answerers import ask
 from searx.answerers import ask
-from searx.metrology.error_recorder import errors_per_engines
+from searx.metrics import get_engines_stats, get_engine_errors, histogram
 
 
 # serve pages with HTTP/1.1
 # serve pages with HTTP/1.1
 from werkzeug.serving import WSGIRequestHandler
 from werkzeug.serving import WSGIRequestHandler
@@ -463,7 +461,7 @@ def _get_ordered_categories():
 
 
 @app.before_request
 @app.before_request
 def pre_request():
 def pre_request():
-    request.start_time = time()
+    request.start_time = default_timer()
     request.timings = []
     request.timings = []
     request.errors = []
     request.errors = []
 
 
@@ -521,7 +519,7 @@ def add_default_headers(response):
 
 
 @app.after_request
 @app.after_request
 def post_request(response):
 def post_request(response):
-    total_time = time() - request.start_time
+    total_time = default_timer() - request.start_time
     timings_all = ['total;dur=' + str(round(total_time * 1000, 3))]
     timings_all = ['total;dur=' + str(round(total_time * 1000, 3))]
     if len(request.timings) > 0:
     if len(request.timings) > 0:
         timings = sorted(request.timings, key=lambda v: v['total'])
         timings = sorted(request.timings, key=lambda v: v['total'])
@@ -852,29 +850,25 @@ def preferences():
     allowed_plugins = request.preferences.plugins.get_enabled()
     allowed_plugins = request.preferences.plugins.get_enabled()
 
 
     # stats for preferences page
     # stats for preferences page
-    stats = {}
+    filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items()))
 
 
     engines_by_category = {}
     engines_by_category = {}
     for c in categories:
     for c in categories:
-        engines_by_category[c] = []
-        for e in categories[c]:
-            if not request.preferences.validate_token(e):
-                continue
-
-            stats[e.name] = {'time': None,
-                             'warn_timeout': False,
-                             'warn_time': False}
-            if e.timeout > settings['outgoing']['request_timeout']:
-                stats[e.name]['warn_timeout'] = True
-            stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences)
-            engines_by_category[c].append(e)
+        engines_by_category[c] = [e for e in categories[c] if e.name in filtered_engines]
 
 
     # get first element [0], the engine time,
     # get first element [0], the engine time,
     # and then the second element [1] : the time (the first one is the label)
     # and then the second element [1] : the time (the first one is the label)
-    for engine_stat in get_engines_stats(request.preferences)[0][1]:
-        stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3)
-        if engine_stat.get('avg') > settings['outgoing']['request_timeout']:
-            stats[engine_stat.get('name')]['warn_time'] = True
+    stats = {}
+    for _, e in filtered_engines.items():
+        h = histogram('engine', e.name, 'time', 'total')
+        median = round(h.percentage(50), 1) if h.count > 0 else None
+
+        stats[e.name] = {
+            'time': median if median else None,
+            'warn_time': median is not None and median > settings['outgoing']['request_timeout'],
+            'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
+            'supports_selected_language': _is_selected_language_supported(e, request.preferences)
+        }
     # end of stats
     # end of stats
 
 
     locked_preferences = list()
     locked_preferences = list()
@@ -976,38 +970,23 @@ def image_proxy():
 @app.route('/stats', methods=['GET'])
 @app.route('/stats', methods=['GET'])
 def stats():
 def stats():
     """Render engine statistics page."""
     """Render engine statistics page."""
-    stats = get_engines_stats(request.preferences)
+    filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items()))
+    engine_stats = get_engines_stats(filtered_engines)
     return render(
     return render(
         'stats.html',
         'stats.html',
-        stats=stats,
+        stats=[(gettext('Engine time (sec)'), engine_stats['time_total']),
+               (gettext('Page loads (sec)'), engine_stats['time_http']),
+               (gettext('Number of results'), engine_stats['result_count']),
+               (gettext('Scores'), engine_stats['scores']),
+               (gettext('Scores per result'), engine_stats['scores_per_result']),
+               (gettext('Errors'), engine_stats['error_count'])]
     )
     )
 
 
 
 
 @app.route('/stats/errors', methods=['GET'])
 @app.route('/stats/errors', methods=['GET'])
 def stats_errors():
 def stats_errors():
-    result = {}
-    engine_names = list(errors_per_engines.keys())
-    engine_names.sort()
-    for engine_name in engine_names:
-        error_stats = errors_per_engines[engine_name]
-        sent_search_count = max(engines[engine_name].stats['sent_search_count'], 1)
-        sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
-        r = []
-        percentage_sum = 0
-        for context, count in sorted_context_count_list:
-            percentage = round(20 * count / sent_search_count) * 5
-            percentage_sum += percentage
-            r.append({
-                'filename': context.filename,
-                'function': context.function,
-                'line_no': context.line_no,
-                'code': context.code,
-                'exception_classname': context.exception_classname,
-                'log_message': context.log_message,
-                'log_parameters': context.log_parameters,
-                'percentage': percentage,
-            })
-        result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage'])
+    filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items()))
+    result = get_engine_errors(filtered_engines)
     return jsonify(result)
     return jsonify(result)