error_recorder.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import typing
  2. import inspect
  3. from json import JSONDecodeError
  4. from urllib.parse import urlparse
  5. from httpx import HTTPError, HTTPStatusError
  6. from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
  7. SearxEngineAccessDeniedException)
  8. from searx import logger, searx_parent_dir
  9. errors_per_engines = {}
  10. class ErrorContext:
  11. __slots__ = ('filename', 'function', 'line_no', 'code', 'exception_classname',
  12. 'log_message', 'log_parameters', 'secondary')
  13. def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary):
  14. self.filename = filename
  15. self.function = function
  16. self.line_no = line_no
  17. self.code = code
  18. self.exception_classname = exception_classname
  19. self.log_message = log_message
  20. self.log_parameters = log_parameters
  21. self.secondary = secondary
  22. def __eq__(self, o) -> bool:
  23. if not isinstance(o, ErrorContext):
  24. return False
  25. return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\
  26. and self.code == o.code and self.exception_classname == o.exception_classname\
  27. and self.log_message == o.log_message and self.log_parameters == o.log_parameters \
  28. and self.secondary == o.secondary
  29. def __hash__(self):
  30. return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message,
  31. self.log_parameters, self.secondary))
  32. def __repr__(self):
  33. return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".\
  34. format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message,
  35. self.log_parameters, self.secondary)
  36. def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
  37. errors_for_engine = errors_per_engines.setdefault(engine_name, {})
  38. errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
  39. logger.debug('%s: %s', engine_name, str(error_context))
  40. def get_trace(traces):
  41. for trace in reversed(traces):
  42. split_filename = trace.filename.split('/')
  43. if '/'.join(split_filename[-3:-1]) == 'searx/engines':
  44. return trace
  45. if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
  46. return trace
  47. return traces[-1]
  48. def get_hostname(exc: HTTPError) -> typing.Optional[None]:
  49. url = exc.request.url
  50. if url is None and exc.response is not None:
  51. url = exc.response.url
  52. return urlparse(url).netloc
  53. def get_request_exception_messages(exc: HTTPError)\
  54. -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
  55. url = None
  56. status_code = None
  57. reason = None
  58. hostname = None
  59. if hasattr(exc, 'request') and exc.request is not None:
  60. url = exc.request.url
  61. if url is None and hasattr(exc, 'response') and exc.respones is not None:
  62. url = exc.response.url
  63. if url is not None:
  64. hostname = url.host
  65. if isinstance(exc, HTTPStatusError):
  66. status_code = str(exc.response.status_code)
  67. reason = exc.response.reason_phrase
  68. return (status_code, reason, hostname)
  69. def get_messages(exc, filename) -> typing.Tuple:
  70. if isinstance(exc, JSONDecodeError):
  71. return (exc.msg, )
  72. if isinstance(exc, TypeError):
  73. return (str(exc), )
  74. if isinstance(exc, ValueError) and 'lxml' in filename:
  75. return (str(exc), )
  76. if isinstance(exc, HTTPError):
  77. return get_request_exception_messages(exc)
  78. if isinstance(exc, SearxXPathSyntaxException):
  79. return (exc.xpath_str, exc.message)
  80. if isinstance(exc, SearxEngineXPathException):
  81. return (exc.xpath_str, exc.message)
  82. if isinstance(exc, SearxEngineAPIException):
  83. return (str(exc.args[0]), )
  84. if isinstance(exc, SearxEngineAccessDeniedException):
  85. return (exc.message, )
  86. return ()
  87. def get_exception_classname(exc: Exception) -> str:
  88. exc_class = exc.__class__
  89. exc_name = exc_class.__qualname__
  90. exc_module = exc_class.__module__
  91. if exc_module is None or exc_module == str.__class__.__module__:
  92. return exc_name
  93. return exc_module + '.' + exc_name
  94. def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext:
  95. searx_frame = get_trace(framerecords)
  96. filename = searx_frame.filename
  97. if filename.startswith(searx_parent_dir):
  98. filename = filename[len(searx_parent_dir) + 1:]
  99. function = searx_frame.function
  100. line_no = searx_frame.lineno
  101. code = searx_frame.code_context[0].strip()
  102. del framerecords
  103. return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)
  104. def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None:
  105. framerecords = inspect.trace()
  106. try:
  107. exception_classname = get_exception_classname(exc)
  108. log_parameters = get_messages(exc, framerecords[-1][1])
  109. error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary)
  110. add_error_context(engine_name, error_context)
  111. finally:
  112. del framerecords
  113. def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None,
  114. secondary: bool = False) -> None:
  115. framerecords = list(reversed(inspect.stack()[1:]))
  116. try:
  117. error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary)
  118. add_error_context(engine_name, error_context)
  119. finally:
  120. del framerecords