error_recorder.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. import typing
  2. import inspect
  3. from json import JSONDecodeError
  4. from urllib.parse import urlparse
  5. from httpx import HTTPError, HTTPStatusError
  6. from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
  7. SearxEngineAccessDeniedException)
  8. from searx import searx_parent_dir
  9. from searx.engines import engines
  10. errors_per_engines = {}
  11. class ErrorContext:
  12. __slots__ = ('filename', 'function', 'line_no', 'code', 'exception_classname',
  13. 'log_message', 'log_parameters', 'secondary')
  14. def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary):
  15. self.filename = filename
  16. self.function = function
  17. self.line_no = line_no
  18. self.code = code
  19. self.exception_classname = exception_classname
  20. self.log_message = log_message
  21. self.log_parameters = log_parameters
  22. self.secondary = secondary
  23. def __eq__(self, o) -> bool:
  24. if not isinstance(o, ErrorContext):
  25. return False
  26. return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\
  27. and self.code == o.code and self.exception_classname == o.exception_classname\
  28. and self.log_message == o.log_message and self.log_parameters == o.log_parameters \
  29. and self.secondary == o.secondary
  30. def __hash__(self):
  31. return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message,
  32. self.log_parameters, self.secondary))
  33. def __repr__(self):
  34. return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".\
  35. format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message,
  36. self.log_parameters, self.secondary)
  37. def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
  38. errors_for_engine = errors_per_engines.setdefault(engine_name, {})
  39. errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
  40. engines[engine_name].logger.warning('%s', str(error_context))
  41. def get_trace(traces):
  42. for trace in reversed(traces):
  43. split_filename = trace.filename.split('/')
  44. if '/'.join(split_filename[-3:-1]) == 'searx/engines':
  45. return trace
  46. if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
  47. return trace
  48. return traces[-1]
  49. def get_hostname(exc: HTTPError) -> typing.Optional[None]:
  50. url = exc.request.url
  51. if url is None and exc.response is not None:
  52. url = exc.response.url
  53. return urlparse(url).netloc
  54. def get_request_exception_messages(exc: HTTPError)\
  55. -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
  56. url = None
  57. status_code = None
  58. reason = None
  59. hostname = None
  60. if hasattr(exc, '_request') and exc._request is not None:
  61. # exc.request is property that raise an RuntimeException
  62. # if exc._request is not defined.
  63. url = exc.request.url
  64. if url is None and hasattr(exc, 'response') and exc.response is not None:
  65. url = exc.response.url
  66. if url is not None:
  67. hostname = url.host
  68. if isinstance(exc, HTTPStatusError):
  69. status_code = str(exc.response.status_code)
  70. reason = exc.response.reason_phrase
  71. return (status_code, reason, hostname)
  72. def get_messages(exc, filename) -> typing.Tuple:
  73. if isinstance(exc, JSONDecodeError):
  74. return (exc.msg, )
  75. if isinstance(exc, TypeError):
  76. return (str(exc), )
  77. if isinstance(exc, ValueError) and 'lxml' in filename:
  78. return (str(exc), )
  79. if isinstance(exc, HTTPError):
  80. return get_request_exception_messages(exc)
  81. if isinstance(exc, SearxXPathSyntaxException):
  82. return (exc.xpath_str, exc.message)
  83. if isinstance(exc, SearxEngineXPathException):
  84. return (exc.xpath_str, exc.message)
  85. if isinstance(exc, SearxEngineAPIException):
  86. return (str(exc.args[0]), )
  87. if isinstance(exc, SearxEngineAccessDeniedException):
  88. return (exc.message, )
  89. return ()
  90. def get_exception_classname(exc: Exception) -> str:
  91. exc_class = exc.__class__
  92. exc_name = exc_class.__qualname__
  93. exc_module = exc_class.__module__
  94. if exc_module is None or exc_module == str.__class__.__module__:
  95. return exc_name
  96. return exc_module + '.' + exc_name
  97. def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext:
  98. searx_frame = get_trace(framerecords)
  99. filename = searx_frame.filename
  100. if filename.startswith(searx_parent_dir):
  101. filename = filename[len(searx_parent_dir) + 1:]
  102. function = searx_frame.function
  103. line_no = searx_frame.lineno
  104. code = searx_frame.code_context[0].strip()
  105. del framerecords
  106. return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)
  107. def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None:
  108. framerecords = inspect.trace()
  109. try:
  110. exception_classname = get_exception_classname(exc)
  111. log_parameters = get_messages(exc, framerecords[-1][1])
  112. error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary)
  113. add_error_context(engine_name, error_context)
  114. finally:
  115. del framerecords
  116. def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None,
  117. secondary: bool = False) -> None:
  118. framerecords = list(reversed(inspect.stack()[1:]))
  119. try:
  120. error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary)
  121. add_error_context(engine_name, error_context)
  122. finally:
  123. del framerecords