__init__.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
  13. '''
  14. import sys
  15. import threading
  16. from os.path import realpath, dirname
  17. from io import open
  18. from flask_babel import gettext
  19. from operator import itemgetter
  20. from json import loads
  21. from requests import get
  22. from searx import settings
  23. from searx import logger
  24. from searx.utils import load_module
  25. logger = logger.getChild('engines')
  26. engine_dir = dirname(realpath(__file__))
  27. engines = {}
  28. categories = {'general': []}
  29. languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
  30. engine_shortcuts = {}
  31. engine_default_args = {'paging': False,
  32. 'categories': ['general'],
  33. 'language_support': True,
  34. 'supported_languages': [],
  35. 'safesearch': False,
  36. 'timeout': settings['outgoing']['request_timeout'],
  37. 'shortcut': '-',
  38. 'disabled': False,
  39. 'suspend_end_time': 0,
  40. 'continuous_errors': 0,
  41. 'time_range_support': False}
  42. def load_engine(engine_data):
  43. if '_' in engine_data['name']:
  44. logger.error('Engine name conains underscore: "{}"'.format(engine_data['name']))
  45. sys.exit(1)
  46. engine_module = engine_data['engine']
  47. try:
  48. engine = load_module(engine_module + '.py', engine_dir)
  49. except:
  50. logger.exception('Cannot load engine "{}"'.format(engine_module))
  51. return None
  52. for param_name in engine_data:
  53. if param_name == 'engine':
  54. continue
  55. if param_name == 'categories':
  56. if engine_data['categories'] == 'none':
  57. engine.categories = []
  58. else:
  59. engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
  60. continue
  61. setattr(engine, param_name, engine_data[param_name])
  62. for arg_name, arg_value in engine_default_args.items():
  63. if not hasattr(engine, arg_name):
  64. setattr(engine, arg_name, arg_value)
  65. # checking required variables
  66. for engine_attr in dir(engine):
  67. if engine_attr.startswith('_'):
  68. continue
  69. if engine_attr == 'inactive' and getattr(engine, engine_attr) is True:
  70. return None
  71. if getattr(engine, engine_attr) is None:
  72. logger.error('Missing engine config attribute: "{0}.{1}"'
  73. .format(engine.name, engine_attr))
  74. sys.exit(1)
  75. # assign supported languages from json file
  76. if engine_data['name'] in languages:
  77. setattr(engine, 'supported_languages', languages[engine_data['name']])
  78. # assign language fetching method if auxiliary method exists
  79. if hasattr(engine, '_fetch_supported_languages'):
  80. setattr(engine, 'fetch_supported_languages',
  81. lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
  82. engine.stats = {
  83. 'result_count': 0,
  84. 'search_count': 0,
  85. 'page_load_time': 0,
  86. 'page_load_count': 0,
  87. 'engine_time': 0,
  88. 'engine_time_count': 0,
  89. 'score_count': 0,
  90. 'errors': 0
  91. }
  92. for category_name in engine.categories:
  93. categories.setdefault(category_name, []).append(engine)
  94. if engine.shortcut in engine_shortcuts:
  95. logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
  96. sys.exit(1)
  97. engine_shortcuts[engine.shortcut] = engine.name
  98. return engine
  99. def to_percentage(stats, maxvalue):
  100. for engine_stat in stats:
  101. if maxvalue:
  102. engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
  103. else:
  104. engine_stat['percentage'] = 0
  105. return stats
  106. def get_engines_stats():
  107. # TODO refactor
  108. pageloads = []
  109. engine_times = []
  110. results = []
  111. scores = []
  112. errors = []
  113. scores_per_result = []
  114. max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
  115. for engine in engines.values():
  116. if engine.stats['search_count'] == 0:
  117. continue
  118. results_num = \
  119. engine.stats['result_count'] / float(engine.stats['search_count'])
  120. if engine.stats['page_load_count'] != 0:
  121. load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count']) # noqa
  122. else:
  123. load_times = 0
  124. if engine.stats['engine_time_count'] != 0:
  125. this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count']) # noqa
  126. else:
  127. this_engine_time = 0
  128. if results_num:
  129. score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa
  130. score_per_result = score / results_num
  131. else:
  132. score = score_per_result = 0.0
  133. max_pageload = max(load_times, max_pageload)
  134. max_engine_times = max(this_engine_time, max_engine_times)
  135. max_results = max(results_num, max_results)
  136. max_score = max(score, max_score)
  137. max_score_per_result = max(score_per_result, max_score_per_result)
  138. max_errors = max(max_errors, engine.stats['errors'])
  139. pageloads.append({'avg': load_times, 'name': engine.name})
  140. engine_times.append({'avg': this_engine_time, 'name': engine.name})
  141. results.append({'avg': results_num, 'name': engine.name})
  142. scores.append({'avg': score, 'name': engine.name})
  143. errors.append({'avg': engine.stats['errors'], 'name': engine.name})
  144. scores_per_result.append({
  145. 'avg': score_per_result,
  146. 'name': engine.name
  147. })
  148. pageloads = to_percentage(pageloads, max_pageload)
  149. engine_times = to_percentage(engine_times, max_engine_times)
  150. results = to_percentage(results, max_results)
  151. scores = to_percentage(scores, max_score)
  152. scores_per_result = to_percentage(scores_per_result, max_score_per_result)
  153. erros = to_percentage(errors, max_errors)
  154. return [
  155. (
  156. gettext('Engine time (sec)'),
  157. sorted(engine_times, key=itemgetter('avg'))
  158. ),
  159. (
  160. gettext('Page loads (sec)'),
  161. sorted(pageloads, key=itemgetter('avg'))
  162. ),
  163. (
  164. gettext('Number of results'),
  165. sorted(results, key=itemgetter('avg'), reverse=True)
  166. ),
  167. (
  168. gettext('Scores'),
  169. sorted(scores, key=itemgetter('avg'), reverse=True)
  170. ),
  171. (
  172. gettext('Scores per result'),
  173. sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
  174. ),
  175. (
  176. gettext('Errors'),
  177. sorted(errors, key=itemgetter('avg'), reverse=True)
  178. ),
  179. ]
  180. def load_engines(engine_list):
  181. global engines
  182. engines.clear()
  183. for engine_data in engine_list:
  184. engine = load_engine(engine_data)
  185. if engine is not None:
  186. engines[engine.name] = engine
  187. return engines
  188. def initialize_engines(engine_list):
  189. load_engines(engine_list)
  190. for engine_name, engine in engines.items():
  191. if hasattr(engine, 'init'):
  192. init_fn = getattr(engine, 'init')
  193. def engine_init():
  194. init_fn()
  195. logger.debug('%s engine initialized', engine_name)
  196. logger.debug('Starting background initialization of %s engine', engine_name)
  197. threading.Thread(target=engine_init).start()