error_recorder.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import typing
  2. import inspect
  3. import logging
  4. from json import JSONDecodeError
  5. from urllib.parse import urlparse
  6. from requests.exceptions import RequestException
  7. from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
  8. SearxEngineAccessDeniedException)
  9. from searx import logger
  10. logging.basicConfig(level=logging.INFO)
  11. errors_per_engines = {}
  12. class ErrorContext:
  13. __slots__ = 'filename', 'function', 'line_no', 'code', 'exception_classname', 'log_message', 'log_parameters'
  14. def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters):
  15. self.filename = filename
  16. self.function = function
  17. self.line_no = line_no
  18. self.code = code
  19. self.exception_classname = exception_classname
  20. self.log_message = log_message
  21. self.log_parameters = log_parameters
  22. def __eq__(self, o) -> bool:
  23. if not isinstance(o, ErrorContext):
  24. return False
  25. return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\
  26. and self.code == o.code and self.exception_classname == o.exception_classname\
  27. and self.log_message == o.log_message and self.log_parameters == o.log_parameters
  28. def __hash__(self):
  29. return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message,
  30. self.log_parameters))
  31. def __repr__(self):
  32. return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
  33. format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message,
  34. self.log_parameters)
  35. def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
  36. errors_for_engine = errors_per_engines.setdefault(engine_name, {})
  37. errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
  38. logger.debug('⚠️ %s: %s', engine_name, str(error_context))
  39. def get_trace(traces):
  40. previous_trace = traces[-1]
  41. for trace in reversed(traces):
  42. if trace.filename.endswith('searx/search.py'):
  43. if previous_trace.filename.endswith('searx/poolrequests.py'):
  44. return trace
  45. if previous_trace.filename.endswith('requests/models.py'):
  46. return trace
  47. return previous_trace
  48. previous_trace = trace
  49. return traces[-1]
  50. def get_hostname(exc: RequestException) -> typing.Optional[None]:
  51. url = exc.request.url
  52. if url is None and exc.response is not None:
  53. url = exc.response.url
  54. return urlparse(url).netloc
  55. def get_request_exception_messages(exc: RequestException)\
  56. -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
  57. url = None
  58. status_code = None
  59. reason = None
  60. hostname = None
  61. if exc.request is not None:
  62. url = exc.request.url
  63. if url is None and exc.response is not None:
  64. url = exc.response.url
  65. if url is not None:
  66. hostname = str(urlparse(url).netloc)
  67. if exc.response is not None:
  68. status_code = str(exc.response.status_code)
  69. reason = exc.response.reason
  70. return (status_code, reason, hostname)
  71. def get_messages(exc, filename) -> typing.Tuple:
  72. if isinstance(exc, JSONDecodeError):
  73. return (exc.msg, )
  74. if isinstance(exc, TypeError):
  75. return (str(exc), )
  76. if isinstance(exc, ValueError) and 'lxml' in filename:
  77. return (str(exc), )
  78. if isinstance(exc, RequestException):
  79. return get_request_exception_messages(exc)
  80. if isinstance(exc, SearxXPathSyntaxException):
  81. return (exc.xpath_str, exc.message)
  82. if isinstance(exc, SearxEngineXPathException):
  83. return (exc.xpath_str, exc.message)
  84. if isinstance(exc, SearxEngineAPIException):
  85. return (str(exc.args[0]), )
  86. if isinstance(exc, SearxEngineAccessDeniedException):
  87. return (exc.message, )
  88. return ()
  89. def get_exception_classname(exc: Exception) -> str:
  90. exc_class = exc.__class__
  91. exc_name = exc_class.__qualname__
  92. exc_module = exc_class.__module__
  93. if exc_module is None or exc_module == str.__class__.__module__:
  94. return exc_name
  95. return exc_module + '.' + exc_name
  96. def get_error_context(framerecords, exception_classname, log_message, log_parameters) -> ErrorContext:
  97. searx_frame = get_trace(framerecords)
  98. filename = searx_frame.filename
  99. function = searx_frame.function
  100. line_no = searx_frame.lineno
  101. code = searx_frame.code_context[0].strip()
  102. del framerecords
  103. return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)
  104. def record_exception(engine_name: str, exc: Exception) -> None:
  105. framerecords = inspect.trace()
  106. try:
  107. exception_classname = get_exception_classname(exc)
  108. log_parameters = get_messages(exc, framerecords[-1][1])
  109. error_context = get_error_context(framerecords, exception_classname, None, log_parameters)
  110. add_error_context(engine_name, error_context)
  111. finally:
  112. del framerecords
  113. def record_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
  114. framerecords = list(reversed(inspect.stack()[1:]))
  115. try:
  116. error_context = get_error_context(framerecords, None, log_message, log_parameters or ())
  117. add_error_context(engine_name, error_context)
  118. finally:
  119. del framerecords