error_recorder.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import typing
  2. import inspect
  3. from json import JSONDecodeError
  4. from urllib.parse import urlparse
  5. from httpx import HTTPError, HTTPStatusError
  6. from searx.exceptions import (
  7. SearxXPathSyntaxException,
  8. SearxEngineXPathException,
  9. SearxEngineAPIException,
  10. SearxEngineAccessDeniedException,
  11. )
  12. from searx import searx_parent_dir, settings
  13. from searx.engines import engines
  14. errors_per_engines = {}
  15. class ErrorContext:
  16. __slots__ = (
  17. 'filename',
  18. 'function',
  19. 'line_no',
  20. 'code',
  21. 'exception_classname',
  22. 'log_message',
  23. 'log_parameters',
  24. 'secondary',
  25. )
  26. def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary):
  27. self.filename = filename
  28. self.function = function
  29. self.line_no = line_no
  30. self.code = code
  31. self.exception_classname = exception_classname
  32. self.log_message = log_message
  33. self.log_parameters = log_parameters
  34. self.secondary = secondary
  35. def __eq__(self, o) -> bool:
  36. if not isinstance(o, ErrorContext):
  37. return False
  38. return (
  39. self.filename == o.filename
  40. and self.function == o.function
  41. and self.line_no == o.line_no
  42. and self.code == o.code
  43. and self.exception_classname == o.exception_classname
  44. and self.log_message == o.log_message
  45. and self.log_parameters == o.log_parameters
  46. and self.secondary == o.secondary
  47. )
  48. def __hash__(self):
  49. return hash(
  50. (
  51. self.filename,
  52. self.function,
  53. self.line_no,
  54. self.code,
  55. self.exception_classname,
  56. self.log_message,
  57. self.log_parameters,
  58. self.secondary,
  59. )
  60. )
  61. def __repr__(self):
  62. return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format(
  63. self.filename,
  64. self.line_no,
  65. self.code,
  66. self.exception_classname,
  67. self.log_message,
  68. self.log_parameters,
  69. self.secondary,
  70. )
  71. def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
  72. errors_for_engine = errors_per_engines.setdefault(engine_name, {})
  73. errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
  74. engines[engine_name].logger.warning('%s', str(error_context))
  75. def get_trace(traces):
  76. for trace in reversed(traces):
  77. split_filename = trace.filename.split('/')
  78. if '/'.join(split_filename[-3:-1]) == 'searx/engines':
  79. return trace
  80. if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
  81. return trace
  82. return traces[-1]
  83. def get_hostname(exc: HTTPError) -> typing.Optional[None]:
  84. url = exc.request.url
  85. if url is None and exc.response is not None:
  86. url = exc.response.url
  87. return urlparse(url).netloc
  88. def get_request_exception_messages(
  89. exc: HTTPError,
  90. ) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
  91. url = None
  92. status_code = None
  93. reason = None
  94. hostname = None
  95. if hasattr(exc, '_request') and exc._request is not None:
  96. # exc.request is property that raise an RuntimeException
  97. # if exc._request is not defined.
  98. url = exc.request.url
  99. if url is None and hasattr(exc, 'response') and exc.response is not None:
  100. url = exc.response.url
  101. if url is not None:
  102. hostname = url.host
  103. if isinstance(exc, HTTPStatusError):
  104. status_code = str(exc.response.status_code)
  105. reason = exc.response.reason_phrase
  106. return (status_code, reason, hostname)
  107. def get_messages(exc, filename) -> typing.Tuple:
  108. if isinstance(exc, JSONDecodeError):
  109. return (exc.msg,)
  110. if isinstance(exc, TypeError):
  111. return (str(exc),)
  112. if isinstance(exc, ValueError) and 'lxml' in filename:
  113. return (str(exc),)
  114. if isinstance(exc, HTTPError):
  115. return get_request_exception_messages(exc)
  116. if isinstance(exc, SearxXPathSyntaxException):
  117. return (exc.xpath_str, exc.message)
  118. if isinstance(exc, SearxEngineXPathException):
  119. return (exc.xpath_str, exc.message)
  120. if isinstance(exc, SearxEngineAPIException):
  121. return (str(exc.args[0]),)
  122. if isinstance(exc, SearxEngineAccessDeniedException):
  123. return (exc.message,)
  124. return ()
  125. def get_exception_classname(exc: Exception) -> str:
  126. exc_class = exc.__class__
  127. exc_name = exc_class.__qualname__
  128. exc_module = exc_class.__module__
  129. if exc_module is None or exc_module == str.__class__.__module__:
  130. return exc_name
  131. return exc_module + '.' + exc_name
  132. def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext:
  133. searx_frame = get_trace(framerecords)
  134. filename = searx_frame.filename
  135. if filename.startswith(searx_parent_dir):
  136. filename = filename[len(searx_parent_dir) + 1 :]
  137. function = searx_frame.function
  138. line_no = searx_frame.lineno
  139. code = searx_frame.code_context[0].strip()
  140. del framerecords
  141. return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)
  142. def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None:
  143. if not settings['general']['enable_metrics']:
  144. return
  145. framerecords = inspect.trace()
  146. try:
  147. exception_classname = get_exception_classname(exc)
  148. log_parameters = get_messages(exc, framerecords[-1][1])
  149. error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary)
  150. add_error_context(engine_name, error_context)
  151. finally:
  152. del framerecords
  153. def count_error(
  154. engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False
  155. ) -> None:
  156. if not settings['general']['enable_metrics']:
  157. return
  158. framerecords = list(reversed(inspect.stack()[1:]))
  159. try:
  160. error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary)
  161. add_error_context(engine_name, error_context)
  162. finally:
  163. del framerecords