background.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. # pylint: disable=missing-module-docstring
  4. # pyright: basic
  5. import json
  6. import time
  7. import threading
  8. import os
  9. import signal
  10. from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
  11. import redis.exceptions
  12. from searx import logger, settings, searx_debug
  13. from searx.redisdb import client as get_redis_client
  14. from searx.exceptions import SearxSettingsException
  15. from searx.search.processors import PROCESSORS
  16. from searx.search.checker import Checker
  17. from searx.search.checker.scheduler import scheduler_function
  18. REDIS_RESULT_KEY = 'SearXNG_checker_result'
  19. REDIS_LOCK_KEY = 'SearXNG_checker_lock'
  20. CheckerResult = Union['CheckerOk', 'CheckerErr', 'CheckerOther']
  21. class CheckerOk(TypedDict):
  22. """Checking the engines succeeded"""
  23. status: Literal['ok']
  24. engines: Dict[str, 'EngineResult']
  25. timestamp: int
  26. class CheckerErr(TypedDict):
  27. """Checking the engines failed"""
  28. status: Literal['error']
  29. timestamp: int
  30. class CheckerOther(TypedDict):
  31. """The status is unknown or disabled"""
  32. status: Literal['unknown', 'disabled']
  33. EngineResult = Union['EngineOk', 'EngineErr']
  34. class EngineOk(TypedDict):
  35. """Checking the engine succeeded"""
  36. success: Literal[True]
  37. class EngineErr(TypedDict):
  38. """Checking the engine failed"""
  39. success: Literal[False]
  40. errors: Dict[str, List[str]]
  41. def _get_interval(every: Any, error_msg: str) -> Tuple[int, int]:
  42. if isinstance(every, int):
  43. return (every, every)
  44. if (
  45. not isinstance(every, (tuple, list))
  46. or len(every) != 2 # type: ignore
  47. or not isinstance(every[0], int)
  48. or not isinstance(every[1], int)
  49. ):
  50. raise SearxSettingsException(error_msg, None)
  51. return (every[0], every[1])
  52. def get_result() -> CheckerResult:
  53. client = get_redis_client()
  54. if client is None:
  55. # without Redis, the checker is disabled
  56. return {'status': 'disabled'}
  57. serialized_result: Optional[bytes] = client.get(REDIS_RESULT_KEY)
  58. if serialized_result is None:
  59. # the Redis key does not exist
  60. return {'status': 'unknown'}
  61. return json.loads(serialized_result)
  62. def _set_result(result: CheckerResult):
  63. client = get_redis_client()
  64. if client is None:
  65. # without Redis, the function does nothing
  66. return
  67. client.set(REDIS_RESULT_KEY, json.dumps(result))
  68. def _timestamp():
  69. return int(time.time() / 3600) * 3600
  70. def run():
  71. try:
  72. # use a Redis lock to make sure there is no checker running at the same time
  73. # (this should not happen, this is a safety measure)
  74. with get_redis_client().lock(REDIS_LOCK_KEY, blocking_timeout=60, timeout=3600):
  75. logger.info('Starting checker')
  76. result: CheckerOk = {'status': 'ok', 'engines': {}, 'timestamp': _timestamp()}
  77. for name, processor in PROCESSORS.items():
  78. logger.debug('Checking %s engine', name)
  79. checker = Checker(processor)
  80. checker.run()
  81. if checker.test_results.successful:
  82. result['engines'][name] = {'success': True}
  83. else:
  84. result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}
  85. _set_result(result)
  86. logger.info('Check done')
  87. except redis.exceptions.LockError:
  88. _set_result({'status': 'error', 'timestamp': _timestamp()})
  89. logger.exception('Error while running the checker')
  90. except Exception: # pylint: disable=broad-except
  91. _set_result({'status': 'error', 'timestamp': _timestamp()})
  92. logger.exception('Error while running the checker')
  93. def _signal_handler(_signum: int, _frame: Any):
  94. t = threading.Thread(target=run)
  95. t.daemon = True
  96. t.start()
  97. def initialize():
  98. if hasattr(signal, 'SIGUSR1'):
  99. # Windows doesn't support SIGUSR1
  100. logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid())
  101. signal.signal(signal.SIGUSR1, _signal_handler)
  102. # special case when debug is activate
  103. if searx_debug and settings['checker']['off_when_debug']:
  104. logger.info('debug mode: checker is disabled')
  105. return
  106. # check value of checker.scheduling.every now
  107. scheduling = settings['checker']['scheduling']
  108. if scheduling is None or not scheduling:
  109. logger.info('Checker scheduler is disabled')
  110. return
  111. # make sure there is a Redis connection
  112. if get_redis_client() is None:
  113. logger.error('The checker requires Redis')
  114. return
  115. # start the background scheduler
  116. every_range = _get_interval(scheduling.get('every', (300, 1800)), 'checker.scheduling.every is not a int or list')
  117. start_after_range = _get_interval(
  118. scheduling.get('start_after', (300, 1800)), 'checker.scheduling.start_after is not a int or list'
  119. )
  120. t = threading.Thread(
  121. target=scheduler_function,
  122. args=(start_after_range[0], start_after_range[1], every_range[0], every_range[1], run),
  123. name='checker_scheduler',
  124. )
  125. t.daemon = True
  126. t.start()