__init__.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. # pylint: disable=missing-function-docstring
  4. """
  5. Engine loader:
  6. call load_engines(settings['engines])
  7. to initialiaze categories, engines, engine_shortcuts
  8. """
  9. import sys
  10. import copy
  11. from os.path import realpath, dirname
  12. from babel.localedata import locale_identifiers
  13. from searx import logger, settings
  14. from searx.data import ENGINES_LANGUAGES
  15. from searx.network import get
  16. from searx.utils import load_module, match_language, gen_useragent
  17. logger = logger.getChild('engines')
  18. ENGINE_DIR = dirname(realpath(__file__))
  19. BABEL_LANGS = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
  20. for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
  21. ENGINE_DEFAULT_ARGS = {
  22. "engine_type": "online",
  23. "inactive": False,
  24. "disabled": False,
  25. "timeout": settings["outgoing"]["request_timeout"],
  26. "shortcut": "-",
  27. "categories": ["general"],
  28. "supported_languages": [],
  29. "language_aliases": {},
  30. "paging": False,
  31. "safesearch": False,
  32. "time_range_support": False,
  33. "enable_http": False,
  34. "display_error_messages": True,
  35. "tokens": [],
  36. }
  37. categories = {'general': []}
  38. engines = {}
  39. engine_shortcuts = {}
  40. def load_engine(engine_data):
  41. engine_name = engine_data['name']
  42. if '_' in engine_name:
  43. logger.error('Engine name contains underscore: "{}"'.format(engine_name))
  44. sys.exit(1)
  45. if engine_name.lower() != engine_name:
  46. logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
  47. engine_name = engine_name.lower()
  48. engine_data['name'] = engine_name
  49. # load_module
  50. engine_module = engine_data['engine']
  51. try:
  52. engine = load_module(engine_module + '.py', ENGINE_DIR)
  53. except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
  54. logger.exception('Fatal exception in engine "{}"'.format(engine_module))
  55. sys.exit(1)
  56. except BaseException:
  57. logger.exception('Cannot load engine "{}"'.format(engine_module))
  58. return None
  59. update_engine_attributes(engine, engine_data)
  60. set_language_attributes(engine)
  61. update_attributes_for_tor(engine)
  62. if is_missing_required_attributes(engine):
  63. sys.exit(1)
  64. if not is_engine_active(engine):
  65. return None
  66. return engine
  67. def update_engine_attributes(engine, engine_data):
  68. # set engine attributes from engine_data
  69. for param_name, param_value in engine_data.items():
  70. if param_name == 'categories':
  71. if isinstance(param_value, str):
  72. param_value = list(map(str.strip, param_value.split(',')))
  73. engine.categories = param_value
  74. elif param_name != 'engine':
  75. setattr(engine, param_name, param_value)
  76. # set default attributes
  77. for arg_name, arg_value in ENGINE_DEFAULT_ARGS.items():
  78. if not hasattr(engine, arg_name):
  79. setattr(engine, arg_name, copy.deepcopy(arg_value))
  80. def set_language_attributes(engine):
  81. # pylint: disable=protected-access
  82. # assign supported languages from json file
  83. if engine.name in ENGINES_LANGUAGES:
  84. engine.supported_languages = ENGINES_LANGUAGES[engine.name]
  85. # find custom aliases for non standard language codes
  86. for engine_lang in engine.supported_languages:
  87. iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
  88. if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \
  89. iso_lang not in engine.supported_languages:
  90. engine.language_aliases[iso_lang] = engine_lang
  91. # language_support
  92. engine.language_support = len(engine.supported_languages) > 0
  93. # assign language fetching method if auxiliary method exists
  94. if hasattr(engine, '_fetch_supported_languages'):
  95. headers = {
  96. 'User-Agent': gen_useragent(),
  97. 'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3', # bing needs a non-English language
  98. }
  99. engine.fetch_supported_languages =\
  100. lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
  101. def update_attributes_for_tor(engine):
  102. if settings['outgoing'].get('using_tor_proxy') and hasattr(engine, 'onion_url'):
  103. engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
  104. engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)
  105. def is_missing_required_attributes(engine):
  106. """an attribute is required when its name doesn't start with '_'.
  107. Required attributes must not be None
  108. """
  109. missing = False
  110. for engine_attr in dir(engine):
  111. if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None:
  112. logger.error('Missing engine config attribute: "{0}.{1}"'
  113. .format(engine.name, engine_attr))
  114. missing = True
  115. return missing
  116. def is_engine_active(engine):
  117. # check if engine is inactive
  118. if engine.inactive is True:
  119. return False
  120. # exclude onion engines if not using tor
  121. if 'onions' in engine.categories and not settings['outgoing'].get('using_tor_proxy'):
  122. return False
  123. return True
  124. def register_engine(engine):
  125. engines[engine.name] = engine
  126. if engine.shortcut in engine_shortcuts:
  127. logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
  128. sys.exit(1)
  129. engine_shortcuts[engine.shortcut] = engine.name
  130. for category_name in engine.categories:
  131. categories.setdefault(category_name, []).append(engine)
  132. def load_engines(engine_list):
  133. """Use case: engine_list = settings['engines']
  134. """
  135. engines.clear()
  136. engine_shortcuts.clear()
  137. categories.clear()
  138. categories['general'] = []
  139. for engine_data in engine_list:
  140. engine = load_engine(engine_data)
  141. if engine:
  142. register_engine(engine)
  143. return engines