standalone_searx.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. #!/usr/bin/env python
  2. # lint: pylint
  3. # SPDX-License-Identifier: AGPL-3.0-or-later
  4. # (C) Copyright Contributors to the SearXNG project.
  5. # (C) Copyright Contributors to the searx project (2014 - 2021)
  6. """Script to run SearXNG from terminal.
  7. Getting categories without initiate the engine will only return `['general']`
  8. >>> import searx.engines
  9. ... list(searx.engines.categories.keys())
  10. ['general']
  11. >>> import searx.search
  12. ... searx.search.initialize()
  13. ... list(searx.engines.categories.keys())
  14. ['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
  15. Example to use this script:
  16. .. code:: bash
  17. $ python3 searxng_extra/standalone_searx.py rain
  18. Example to run it from python:
  19. >>> import importlib
  20. ... import json
  21. ... import sys
  22. ... import searx.engines
  23. ... import searx.search
  24. ... search_query = 'rain'
  25. ... # initialize engines
  26. ... searx.search.initialize()
  27. ... # load engines categories once instead of each time the function called
  28. ... engine_cs = list(searx.engines.categories.keys())
  29. ... # load module
  30. ... spec = importlib.util.spec_from_file_location(
  31. ... 'utils.standalone_searx', 'searxng_extra/standalone_searx.py')
  32. ... sas = importlib.util.module_from_spec(spec)
  33. ... spec.loader.exec_module(sas)
  34. ... # use function from module
  35. ... prog_args = sas.parse_argument([search_query], category_choices=engine_cs)
  36. ... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs)
  37. ... res_dict = sas.to_dict(search_q)
  38. ... sys.stdout.write(json.dumps(
  39. ... res_dict, sort_keys=True, indent=4, ensure_ascii=False,
  40. ... default=sas.json_serial))
  41. {
  42. "answers": [],
  43. "infoboxes": [ {...} ],
  44. "paging": true,
  45. "results": [... ],
  46. "results_number": 820000000.0,
  47. "search": {
  48. "lang": "all",
  49. "pageno": 1,
  50. "q": "rain",
  51. "safesearch": 0,
  52. "timerange": null
  53. },
  54. "suggestions": [...]
  55. }
  56. """ # pylint: disable=line-too-long
  57. import argparse
  58. import sys
  59. from datetime import datetime
  60. from json import dumps
  61. from typing import Any, Dict, List, Optional
  62. import searx
  63. import searx.preferences
  64. import searx.query
  65. import searx.search
  66. import searx.webadapter
  67. EngineCategoriesVar = Optional[List[str]]
  68. def get_search_query(
  69. args: argparse.Namespace, engine_categories: EngineCategoriesVar = None
  70. ) -> searx.search.SearchQuery:
  71. """Get search results for the query"""
  72. if engine_categories is None:
  73. engine_categories = list(searx.engines.categories.keys())
  74. try:
  75. category = args.category.decode('utf-8')
  76. except AttributeError:
  77. category = args.category
  78. form = {
  79. "q": args.query,
  80. "categories": category,
  81. "pageno": str(args.pageno),
  82. "language": args.lang,
  83. "time_range": args.timerange
  84. }
  85. preferences = searx.preferences.Preferences(
  86. ['oscar'], engine_categories, searx.engines.engines, [])
  87. preferences.key_value_settings['safesearch'].parse(args.safesearch)
  88. search_query = searx.webadapter.get_search_query_from_webapp(
  89. preferences, form)[0]
  90. return search_query
  91. def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
  92. """Remove parsed url from dict."""
  93. for result in results:
  94. del result['parsed_url']
  95. return results
  96. def json_serial(obj: Any) -> Any:
  97. """JSON serializer for objects not serializable by default json code.
  98. :raise TypeError: raised when **obj** is not serializable
  99. """
  100. if isinstance(obj, datetime):
  101. serial = obj.isoformat()
  102. return serial
  103. if isinstance(obj, bytes):
  104. return obj.decode('utf8')
  105. if isinstance(obj, set):
  106. return list(obj)
  107. raise TypeError("Type ({}) not serializable".format(type(obj)))
  108. def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
  109. """Get result from parsed arguments."""
  110. result_container = searx.search.Search(search_query).search()
  111. result_container_json = {
  112. "search": {
  113. "q": search_query.query,
  114. "pageno": search_query.pageno,
  115. "lang": search_query.lang,
  116. "safesearch": search_query.safesearch,
  117. "timerange": search_query.time_range,
  118. },
  119. "results": no_parsed_url(result_container.get_ordered_results()),
  120. "infoboxes": result_container.infoboxes,
  121. "suggestions": list(result_container.suggestions),
  122. "answers": list(result_container.answers),
  123. "paging": result_container.paging,
  124. "results_number": result_container.results_number()
  125. }
  126. return result_container_json
  127. def parse_argument(
  128. args: Optional[List[str]]=None,
  129. category_choices: EngineCategoriesVar=None
  130. ) -> argparse.Namespace:
  131. """Parse command line.
  132. :raise SystemExit: Query argument required on `args`
  133. Examples:
  134. >>> import importlib
  135. ... # load module
  136. ... spec = importlib.util.spec_from_file_location(
  137. ... 'utils.standalone_searx', 'utils/standalone_searx.py')
  138. ... sas = importlib.util.module_from_spec(spec)
  139. ... spec.loader.exec_module(sas)
  140. ... sas.parse_argument()
  141. usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]]
  142. query
  143. SystemExit: 2
  144. >>> sas.parse_argument(['rain'])
  145. Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None)
  146. """ # noqa: E501
  147. if not category_choices:
  148. category_choices = list(searx.engines.categories.keys())
  149. parser = argparse.ArgumentParser(description='Standalone searx.')
  150. parser.add_argument('query', type=str,
  151. help='Text query')
  152. parser.add_argument('--category', type=str, nargs='?',
  153. choices=category_choices,
  154. default='general',
  155. help='Search category')
  156. parser.add_argument('--lang', type=str, nargs='?', default='all',
  157. help='Search language')
  158. parser.add_argument('--pageno', type=int, nargs='?', default=1,
  159. help='Page number starting from 1')
  160. parser.add_argument(
  161. '--safesearch', type=str, nargs='?',
  162. choices=['0', '1', '2'], default='0',
  163. help='Safe content filter from none to strict')
  164. parser.add_argument(
  165. '--timerange', type=str,
  166. nargs='?', choices=['day', 'week', 'month', 'year'],
  167. help='Filter by time range')
  168. return parser.parse_args(args)
  169. if __name__ == '__main__':
  170. searx.search.initialize()
  171. engine_cs = list(searx.engines.categories.keys())
  172. prog_args = parse_argument(category_choices=engine_cs)
  173. search_q = get_search_query(prog_args, engine_categories=engine_cs)
  174. res_dict = to_dict(search_q)
  175. sys.stdout.write(dumps(
  176. res_dict, sort_keys=True, indent=4, ensure_ascii=False,
  177. default=json_serial))