__init__.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
  13. '''
  14. import sys
  15. import threading
  16. from os.path import realpath, dirname
  17. from io import open
  18. from babel.localedata import locale_identifiers
  19. from flask_babel import gettext
  20. from operator import itemgetter
  21. from json import loads
  22. from requests import get
  23. from searx import settings
  24. from searx import logger
  25. from searx.utils import load_module, match_language, get_engine_from_settings
  26. logger = logger.getChild('engines')
  27. engine_dir = dirname(realpath(__file__))
  28. engines = {}
  29. categories = {'general': []}
  30. languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
  31. babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
  32. for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
  33. engine_shortcuts = {}
  34. engine_default_args = {'paging': False,
  35. 'categories': ['general'],
  36. 'language_support': True,
  37. 'supported_languages': [],
  38. 'safesearch': False,
  39. 'timeout': settings['outgoing']['request_timeout'],
  40. 'shortcut': '-',
  41. 'disabled': False,
  42. 'suspend_end_time': 0,
  43. 'continuous_errors': 0,
  44. 'time_range_support': False,
  45. 'offline': False,
  46. 'display_error_messages': True,
  47. 'tokens': []}
  48. def load_engine(engine_data):
  49. engine_name = engine_data['name']
  50. if '_' in engine_name:
  51. logger.error('Engine name contains underscore: "{}"'.format(engine_name))
  52. sys.exit(1)
  53. if engine_name.lower() != engine_name:
  54. logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
  55. engine_name = engine_name.lower()
  56. engine_data['name'] = engine_name
  57. engine_module = engine_data['engine']
  58. try:
  59. engine = load_module(engine_module + '.py', engine_dir)
  60. except:
  61. logger.exception('Cannot load engine "{}"'.format(engine_module))
  62. return None
  63. for param_name in engine_data:
  64. if param_name == 'engine':
  65. continue
  66. if param_name == 'categories':
  67. if engine_data['categories'] == 'none':
  68. engine.categories = []
  69. else:
  70. engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
  71. continue
  72. setattr(engine, param_name, engine_data[param_name])
  73. for arg_name, arg_value in engine_default_args.items():
  74. if not hasattr(engine, arg_name):
  75. setattr(engine, arg_name, arg_value)
  76. # checking required variables
  77. for engine_attr in dir(engine):
  78. if engine_attr.startswith('_'):
  79. continue
  80. if engine_attr == 'inactive' and getattr(engine, engine_attr) is True:
  81. return None
  82. if getattr(engine, engine_attr) is None:
  83. logger.error('Missing engine config attribute: "{0}.{1}"'
  84. .format(engine.name, engine_attr))
  85. sys.exit(1)
  86. # assign supported languages from json file
  87. if engine_data['name'] in languages:
  88. setattr(engine, 'supported_languages', languages[engine_data['name']])
  89. # find custom aliases for non standard language codes
  90. if hasattr(engine, 'supported_languages'):
  91. if hasattr(engine, 'language_aliases'):
  92. language_aliases = getattr(engine, 'language_aliases')
  93. else:
  94. language_aliases = {}
  95. for engine_lang in getattr(engine, 'supported_languages'):
  96. iso_lang = match_language(engine_lang, babel_langs, fallback=None)
  97. if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \
  98. iso_lang not in getattr(engine, 'supported_languages'):
  99. language_aliases[iso_lang] = engine_lang
  100. setattr(engine, 'language_aliases', language_aliases)
  101. # assign language fetching method if auxiliary method exists
  102. if hasattr(engine, '_fetch_supported_languages'):
  103. setattr(engine, 'fetch_supported_languages',
  104. lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
  105. engine.stats = {
  106. 'result_count': 0,
  107. 'search_count': 0,
  108. 'engine_time': 0,
  109. 'engine_time_count': 0,
  110. 'score_count': 0,
  111. 'errors': 0
  112. }
  113. if not engine.offline:
  114. engine.stats['page_load_time'] = 0
  115. engine.stats['page_load_count'] = 0
  116. for category_name in engine.categories:
  117. categories.setdefault(category_name, []).append(engine)
  118. if engine.shortcut in engine_shortcuts:
  119. logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
  120. sys.exit(1)
  121. engine_shortcuts[engine.shortcut] = engine.name
  122. return engine
  123. def to_percentage(stats, maxvalue):
  124. for engine_stat in stats:
  125. if maxvalue:
  126. engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
  127. else:
  128. engine_stat['percentage'] = 0
  129. return stats
  130. def get_engines_stats(preferences):
  131. # TODO refactor
  132. pageloads = []
  133. engine_times = []
  134. results = []
  135. scores = []
  136. errors = []
  137. scores_per_result = []
  138. max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
  139. for engine in engines.values():
  140. if not preferences.validate_token(engine):
  141. continue
  142. if engine.stats['search_count'] == 0:
  143. continue
  144. results_num = \
  145. engine.stats['result_count'] / float(engine.stats['search_count'])
  146. if engine.stats['engine_time_count'] != 0:
  147. this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count']) # noqa
  148. else:
  149. this_engine_time = 0
  150. if results_num:
  151. score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa
  152. score_per_result = score / results_num
  153. else:
  154. score = score_per_result = 0.0
  155. if not engine.offline:
  156. load_times = 0
  157. if engine.stats['page_load_count'] != 0:
  158. load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count']) # noqa
  159. max_pageload = max(load_times, max_pageload)
  160. pageloads.append({'avg': load_times, 'name': engine.name})
  161. max_engine_times = max(this_engine_time, max_engine_times)
  162. max_results = max(results_num, max_results)
  163. max_score = max(score, max_score)
  164. max_score_per_result = max(score_per_result, max_score_per_result)
  165. max_errors = max(max_errors, engine.stats['errors'])
  166. engine_times.append({'avg': this_engine_time, 'name': engine.name})
  167. results.append({'avg': results_num, 'name': engine.name})
  168. scores.append({'avg': score, 'name': engine.name})
  169. errors.append({'avg': engine.stats['errors'], 'name': engine.name})
  170. scores_per_result.append({
  171. 'avg': score_per_result,
  172. 'name': engine.name
  173. })
  174. pageloads = to_percentage(pageloads, max_pageload)
  175. engine_times = to_percentage(engine_times, max_engine_times)
  176. results = to_percentage(results, max_results)
  177. scores = to_percentage(scores, max_score)
  178. scores_per_result = to_percentage(scores_per_result, max_score_per_result)
  179. erros = to_percentage(errors, max_errors)
  180. return [
  181. (
  182. gettext('Engine time (sec)'),
  183. sorted(engine_times, key=itemgetter('avg'))
  184. ),
  185. (
  186. gettext('Page loads (sec)'),
  187. sorted(pageloads, key=itemgetter('avg'))
  188. ),
  189. (
  190. gettext('Number of results'),
  191. sorted(results, key=itemgetter('avg'), reverse=True)
  192. ),
  193. (
  194. gettext('Scores'),
  195. sorted(scores, key=itemgetter('avg'), reverse=True)
  196. ),
  197. (
  198. gettext('Scores per result'),
  199. sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
  200. ),
  201. (
  202. gettext('Errors'),
  203. sorted(errors, key=itemgetter('avg'), reverse=True)
  204. ),
  205. ]
  206. def load_engines(engine_list):
  207. global engines
  208. engines.clear()
  209. for engine_data in engine_list:
  210. engine = load_engine(engine_data)
  211. if engine is not None:
  212. engines[engine.name] = engine
  213. return engines
  214. def initialize_engines(engine_list):
  215. load_engines(engine_list)
  216. def engine_init(engine_name, init_fn):
  217. init_fn(get_engine_from_settings(engine_name))
  218. logger.debug('%s engine: Initialized', engine_name)
  219. for engine_name, engine in engines.items():
  220. if hasattr(engine, 'init'):
  221. init_fn = getattr(engine, 'init')
  222. if init_fn:
  223. logger.debug('%s engine: Starting background initialization', engine_name)
  224. threading.Thread(target=engine_init, args=(engine_name, init_fn)).start()