error_recorder.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. import typing
  2. import inspect
  3. from json import JSONDecodeError
  4. from urllib.parse import urlparse
  5. from httpx import HTTPError, HTTPStatusError
  6. from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
  7. SearxEngineAccessDeniedException)
  8. from searx import logger
  9. errors_per_engines = {}
  10. class ErrorContext:
  11. __slots__ = 'filename', 'function', 'line_no', 'code', 'exception_classname', 'log_message', 'log_parameters'
  12. def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters):
  13. self.filename = filename
  14. self.function = function
  15. self.line_no = line_no
  16. self.code = code
  17. self.exception_classname = exception_classname
  18. self.log_message = log_message
  19. self.log_parameters = log_parameters
  20. def __eq__(self, o) -> bool:
  21. if not isinstance(o, ErrorContext):
  22. return False
  23. return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\
  24. and self.code == o.code and self.exception_classname == o.exception_classname\
  25. and self.log_message == o.log_message and self.log_parameters == o.log_parameters
  26. def __hash__(self):
  27. return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message,
  28. self.log_parameters))
  29. def __repr__(self):
  30. return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
  31. format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message,
  32. self.log_parameters)
  33. def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
  34. errors_for_engine = errors_per_engines.setdefault(engine_name, {})
  35. errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
  36. logger.debug('%s: %s', engine_name, str(error_context))
  37. def get_trace(traces):
  38. for trace in reversed(traces):
  39. split_filename = trace.filename.split('/')
  40. if '/'.join(split_filename[-3:-1]) == 'searx/engines':
  41. return trace
  42. if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
  43. return trace
  44. return traces[-1]
  45. def get_hostname(exc: HTTPError) -> typing.Optional[None]:
  46. url = exc.request.url
  47. if url is None and exc.response is not None:
  48. url = exc.response.url
  49. return urlparse(url).netloc
  50. def get_request_exception_messages(exc: HTTPError)\
  51. -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
  52. url = None
  53. status_code = None
  54. reason = None
  55. hostname = None
  56. if hasattr(exc, 'request') and exc.request is not None:
  57. url = exc.request.url
  58. if url is None and hasattr(exc, 'response') and exc.respones is not None:
  59. url = exc.response.url
  60. if url is not None:
  61. hostname = url.host
  62. if isinstance(exc, HTTPStatusError):
  63. status_code = str(exc.response.status_code)
  64. reason = exc.response.reason_phrase
  65. return (status_code, reason, hostname)
  66. def get_messages(exc, filename) -> typing.Tuple:
  67. if isinstance(exc, JSONDecodeError):
  68. return (exc.msg, )
  69. if isinstance(exc, TypeError):
  70. return (str(exc), )
  71. if isinstance(exc, ValueError) and 'lxml' in filename:
  72. return (str(exc), )
  73. if isinstance(exc, HTTPError):
  74. return get_request_exception_messages(exc)
  75. if isinstance(exc, SearxXPathSyntaxException):
  76. return (exc.xpath_str, exc.message)
  77. if isinstance(exc, SearxEngineXPathException):
  78. return (exc.xpath_str, exc.message)
  79. if isinstance(exc, SearxEngineAPIException):
  80. return (str(exc.args[0]), )
  81. if isinstance(exc, SearxEngineAccessDeniedException):
  82. return (exc.message, )
  83. return ()
  84. def get_exception_classname(exc: Exception) -> str:
  85. exc_class = exc.__class__
  86. exc_name = exc_class.__qualname__
  87. exc_module = exc_class.__module__
  88. if exc_module is None or exc_module == str.__class__.__module__:
  89. return exc_name
  90. return exc_module + '.' + exc_name
  91. def get_error_context(framerecords, exception_classname, log_message, log_parameters) -> ErrorContext:
  92. searx_frame = get_trace(framerecords)
  93. filename = searx_frame.filename
  94. function = searx_frame.function
  95. line_no = searx_frame.lineno
  96. code = searx_frame.code_context[0].strip()
  97. del framerecords
  98. return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)
  99. def count_exception(engine_name: str, exc: Exception) -> None:
  100. framerecords = inspect.trace()
  101. try:
  102. exception_classname = get_exception_classname(exc)
  103. log_parameters = get_messages(exc, framerecords[-1][1])
  104. error_context = get_error_context(framerecords, exception_classname, None, log_parameters)
  105. add_error_context(engine_name, error_context)
  106. finally:
  107. del framerecords
  108. def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
  109. framerecords = list(reversed(inspect.stack()[1:]))
  110. try:
  111. error_context = get_error_context(framerecords, None, log_message, log_parameters or ())
  112. add_error_context(engine_name, error_context)
  113. finally:
  114. del framerecords