standalone_searx.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. #!/usr/bin/env python
  2. # lint: pylint
  3. # SPDX-License-Identifier: AGPL-3.0-or-later
  4. # (C) Copyright Contributors to the SearXNG project.
  5. # (C) Copyright Contributors to the searx project (2014 - 2021)
  6. """Script to run SearXNG from terminal.
  7. Getting categories without initiate the engine will only return `['general']`
  8. >>> import searx.engines
  9. ... list(searx.engines.categories.keys())
  10. ['general']
  11. >>> import searx.search
  12. ... searx.search.initialize()
  13. ... list(searx.engines.categories.keys())
  14. ['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
  15. Example to use this script:
  16. .. code:: bash
  17. $ python3 searxng_extra/standalone_searx.py rain
  18. .. danger::
  19. Be warned, using the ``standalone_searx.py`` won't give you privacy!
  20. On the contrary, this script behaves like a SearXNG server: your IP is
  21. exposed and tracked by all active engines (google, bing, qwant, ... ), with
  22. every query!
  23. Example to run it from python:
  24. >>> import importlib
  25. ... import json
  26. ... import sys
  27. ... import searx.engines
  28. ... import searx.search
  29. ... search_query = 'rain'
  30. ... # initialize engines
  31. ... searx.search.initialize()
  32. ... # load engines categories once instead of each time the function called
  33. ... engine_cs = list(searx.engines.categories.keys())
  34. ... # load module
  35. ... spec = importlib.util.spec_from_file_location(
  36. ... 'utils.standalone_searx', 'searxng_extra/standalone_searx.py')
  37. ... sas = importlib.util.module_from_spec(spec)
  38. ... spec.loader.exec_module(sas)
  39. ... # use function from module
  40. ... prog_args = sas.parse_argument([search_query], category_choices=engine_cs)
  41. ... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs)
  42. ... res_dict = sas.to_dict(search_q)
  43. ... sys.stdout.write(json.dumps(
  44. ... res_dict, sort_keys=True, indent=4, ensure_ascii=False,
  45. ... default=sas.json_serial))
  46. {
  47. "answers": [],
  48. "infoboxes": [ {...} ],
  49. "paging": true,
  50. "results": [... ],
  51. "results_number": 820000000.0,
  52. "search": {
  53. "lang": "all",
  54. "pageno": 1,
  55. "q": "rain",
  56. "safesearch": 0,
  57. "timerange": null
  58. },
  59. "suggestions": [...]
  60. }
  61. """ # pylint: disable=line-too-long
  62. import argparse
  63. import sys
  64. from datetime import datetime
  65. from json import dumps
  66. from typing import Any, Dict, List, Optional
  67. import searx
  68. import searx.preferences
  69. import searx.query
  70. import searx.search
  71. import searx.webadapter
  72. EngineCategoriesVar = Optional[List[str]]
  73. def get_search_query(
  74. args: argparse.Namespace, engine_categories: EngineCategoriesVar = None
  75. ) -> searx.search.SearchQuery:
  76. """Get search results for the query"""
  77. if engine_categories is None:
  78. engine_categories = list(searx.engines.categories.keys())
  79. try:
  80. category = args.category.decode('utf-8')
  81. except AttributeError:
  82. category = args.category
  83. form = {
  84. "q": args.query,
  85. "categories": category,
  86. "pageno": str(args.pageno),
  87. "language": args.lang,
  88. "time_range": args.timerange,
  89. }
  90. preferences = searx.preferences.Preferences(['oscar'], engine_categories, searx.engines.engines, [])
  91. preferences.key_value_settings['safesearch'].parse(args.safesearch)
  92. search_query = searx.webadapter.get_search_query_from_webapp(preferences, form)[0]
  93. return search_query
  94. def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
  95. """Remove parsed url from dict."""
  96. for result in results:
  97. del result['parsed_url']
  98. return results
  99. def json_serial(obj: Any) -> Any:
  100. """JSON serializer for objects not serializable by default json code.
  101. :raise TypeError: raised when **obj** is not serializable
  102. """
  103. if isinstance(obj, datetime):
  104. serial = obj.isoformat()
  105. return serial
  106. if isinstance(obj, bytes):
  107. return obj.decode('utf8')
  108. if isinstance(obj, set):
  109. return list(obj)
  110. raise TypeError("Type ({}) not serializable".format(type(obj)))
  111. def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
  112. """Get result from parsed arguments."""
  113. result_container = searx.search.Search(search_query).search()
  114. result_container_json = {
  115. "search": {
  116. "q": search_query.query,
  117. "pageno": search_query.pageno,
  118. "lang": search_query.lang,
  119. "safesearch": search_query.safesearch,
  120. "timerange": search_query.time_range,
  121. },
  122. "results": no_parsed_url(result_container.get_ordered_results()),
  123. "infoboxes": result_container.infoboxes,
  124. "suggestions": list(result_container.suggestions),
  125. "answers": list(result_container.answers),
  126. "paging": result_container.paging,
  127. "results_number": result_container.results_number(),
  128. }
  129. return result_container_json
  130. def parse_argument(
  131. args: Optional[List[str]] = None, category_choices: EngineCategoriesVar = None
  132. ) -> argparse.Namespace:
  133. """Parse command line.
  134. :raise SystemExit: Query argument required on `args`
  135. Examples:
  136. >>> import importlib
  137. ... # load module
  138. ... spec = importlib.util.spec_from_file_location(
  139. ... 'utils.standalone_searx', 'utils/standalone_searx.py')
  140. ... sas = importlib.util.module_from_spec(spec)
  141. ... spec.loader.exec_module(sas)
  142. ... sas.parse_argument()
  143. usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]]
  144. query
  145. SystemExit: 2
  146. >>> sas.parse_argument(['rain'])
  147. Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None)
  148. """ # noqa: E501
  149. if not category_choices:
  150. category_choices = list(searx.engines.categories.keys())
  151. parser = argparse.ArgumentParser(description='Standalone searx.')
  152. parser.add_argument('query', type=str, help='Text query')
  153. parser.add_argument(
  154. '--category', type=str, nargs='?', choices=category_choices, default='general', help='Search category'
  155. )
  156. parser.add_argument('--lang', type=str, nargs='?', default='all', help='Search language')
  157. parser.add_argument('--pageno', type=int, nargs='?', default=1, help='Page number starting from 1')
  158. parser.add_argument(
  159. '--safesearch',
  160. type=str,
  161. nargs='?',
  162. choices=['0', '1', '2'],
  163. default='0',
  164. help='Safe content filter from none to strict',
  165. )
  166. parser.add_argument(
  167. '--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], help='Filter by time range'
  168. )
  169. return parser.parse_args(args)
  170. if __name__ == '__main__':
  171. settings_engines = searx.settings['engines']
  172. searx.search.load_engines(settings_engines)
  173. engine_cs = list(searx.engines.categories.keys())
  174. prog_args = parse_argument(category_choices=engine_cs)
  175. searx.search.initialize_network(settings_engines, searx.settings['outgoing'])
  176. searx.search.check_network_configuration()
  177. searx.search.initialize_metrics([engine['name'] for engine in settings_engines])
  178. searx.search.initialize_processors(settings_engines)
  179. search_q = get_search_query(prog_args, engine_categories=engine_cs)
  180. res_dict = to_dict(search_q)
  181. sys.stdout.write(dumps(res_dict, sort_keys=True, indent=4, ensure_ascii=False, default=json_serial))