command.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. '''
  13. import re
  14. from os.path import expanduser, isabs, realpath, commonprefix
  15. from shlex import split as shlex_split
  16. from subprocess import Popen, PIPE
  17. from time import time
  18. from threading import Thread
  19. from searx import logger
  20. offline = True
  21. paging = True
  22. command = []
  23. delimiter = {}
  24. parse_regex = {}
  25. query_type = ''
  26. query_enum = []
  27. environment_variables = {}
  28. working_dir = realpath('.')
  29. result_separator = '\n'
  30. result_template = 'key-value.html'
  31. timeout = 4.0
  32. _command_logger = logger.getChild('command')
  33. _compiled_parse_regex = {}
  34. def init(engine_settings):
  35. check_parsing_options(engine_settings)
  36. if 'command' not in engine_settings:
  37. raise ValueError('engine command : missing configuration key: command')
  38. global command, working_dir, result_template, delimiter, parse_regex, timeout, environment_variables
  39. command = engine_settings['command']
  40. if 'working_dir' in engine_settings:
  41. working_dir = engine_settings['working_dir']
  42. if not isabs(engine_settings['working_dir']):
  43. working_dir = realpath(working_dir)
  44. if 'parse_regex' in engine_settings:
  45. parse_regex = engine_settings['parse_regex']
  46. for result_key, regex in parse_regex.items():
  47. _compiled_parse_regex[result_key] = re.compile(regex, flags=re.MULTILINE)
  48. if 'delimiter' in engine_settings:
  49. delimiter = engine_settings['delimiter']
  50. if 'environment_variables' in engine_settings:
  51. environment_variables = engine_settings['environment_variables']
  52. def search(query, params):
  53. cmd = _get_command_to_run(query)
  54. if not cmd:
  55. return []
  56. results = []
  57. reader_thread = Thread(target=_get_results_from_process, args=(results, cmd, params['pageno']))
  58. reader_thread.start()
  59. reader_thread.join(timeout=timeout)
  60. return results
  61. def _get_command_to_run(query):
  62. params = shlex_split(query.decode('utf-8'))
  63. __check_query_params(params)
  64. cmd = []
  65. for c in command:
  66. if c == '{{QUERY}}':
  67. cmd.extend(params)
  68. else:
  69. cmd.append(c)
  70. return cmd
  71. def _get_results_from_process(results, cmd, pageno):
  72. leftover = ''
  73. count = 0
  74. start, end = __get_results_limits(pageno)
  75. with Popen(cmd, stdout=PIPE, stderr=PIPE, env=environment_variables) as process:
  76. line = process.stdout.readline()
  77. while line:
  78. buf = leftover + line.decode('utf-8')
  79. raw_results = buf.split(result_separator)
  80. if raw_results[-1]:
  81. leftover = raw_results[-1]
  82. raw_results = raw_results[:-1]
  83. for raw_result in raw_results:
  84. result = __parse_single_result(raw_result)
  85. if result is None:
  86. _command_logger.debug('skipped result:', raw_result)
  87. continue
  88. if start <= count and count <= end:
  89. result['template'] = result_template
  90. results.append(result)
  91. count += 1
  92. if end < count:
  93. return results
  94. line = process.stdout.readline()
  95. return_code = process.wait(timeout=timeout)
  96. if return_code != 0:
  97. raise RuntimeError('non-zero return code when running command', cmd, return_code)
  98. def __get_results_limits(pageno):
  99. start = (pageno - 1) * 10
  100. end = start + 9
  101. return start, end
  102. def __check_query_params(params):
  103. if not query_type:
  104. return
  105. if query_type == 'path':
  106. query_path = params[-1]
  107. query_path = expanduser(query_path)
  108. if commonprefix([realpath(query_path), working_dir]) != working_dir:
  109. raise ValueError('requested path is outside of configured working directory')
  110. elif query_type == 'enum' and len(query_enum) > 0:
  111. for param in params:
  112. if param not in query_enum:
  113. raise ValueError('submitted query params is not allowed', param, 'allowed params:', query_enum)
  114. def check_parsing_options(engine_settings):
  115. """ Checks if delimiter based parsing or regex parsing is configured correctly """
  116. if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
  117. raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
  118. if 'delimiter' in engine_settings and 'parse_regex' in engine_settings:
  119. raise ValueError('failed to init settings for parsing lines: too many settings')
  120. if 'delimiter' in engine_settings:
  121. if 'chars' not in engine_settings['delimiter'] or 'keys' not in engine_settings['delimiter']:
  122. raise ValueError
  123. def __parse_single_result(raw_result):
  124. """ Parses command line output based on configuration """
  125. result = {}
  126. if delimiter:
  127. elements = raw_result.split(delimiter['chars'], maxsplit=len(delimiter['keys']) - 1)
  128. if len(elements) != len(delimiter['keys']):
  129. return {}
  130. for i in range(len(elements)):
  131. result[delimiter['keys'][i]] = elements[i]
  132. if parse_regex:
  133. for result_key, regex in _compiled_parse_regex.items():
  134. found = regex.search(raw_result)
  135. if not found:
  136. return {}
  137. result[result_key] = raw_result[found.start():found.end()]
  138. return result