Browse Source

[enh] standalone_seax.py is a command line interface to searx with JSON output.

Alexandre Flament 8 years ago
parent
commit
369ec017b7
2 changed files with 101 additions and 0 deletions
  1. 0 0
      utils/google_search.py
  2. 101 0
      utils/standalone_searx.py

+ 0 - 0
utils/standalone_search.py → utils/google_search.py


+ 101 - 0
utils/standalone_searx.py

@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2016- by Alexandre Flament, <alex@al-f.net>
+'''
+
+# set path
+from sys import path
+from os.path import realpath, dirname
+path.append(realpath(dirname(realpath(__file__)) + '/../'))
+
+# initialization
+from json import dumps
+from searx import settings
+import searx.query
+import searx.search
+import searx.engines
+import searx.preferences
+import argparse
+
+searx.engines.initialize_engines(settings['engines'])
+
+# command line parsing
+parser = argparse.ArgumentParser(description='Standalone searx.')
+parser.add_argument('query', type=str,
+                    help='Text query')
+parser.add_argument('--category', type=str, nargs='?',
+                    choices=searx.engines.categories.keys(),
+                    default='general',
+                    help='Search category')
+parser.add_argument('--lang', type=str, nargs='?',default='all',
+                    help='Search language')
+parser.add_argument('--pageno', type=int, nargs='?', default=1,
+                    help='Page number starting from 1')
+parser.add_argument('--safesearch', type=str, nargs='?', choices=['0', '1', '2'], default='0',
+                    help='Safe content filter from none to strict')
+parser.add_argument('--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'],
+                    help='Filter by time range')
+args = parser.parse_args()
+
+# search results for the query
+form = {
+    "q":args.query,
+    "categories":args.category.decode('utf-8'),
+    "pageno":str(args.pageno),
+    "language":args.lang,
+    "time_range":args.timerange
+}
+preferences = searx.preferences.Preferences(['oscar'], searx.engines.categories.keys(), searx.engines.engines, [])
+preferences.key_value_settings['safesearch'].parse(args.safesearch)
+
+search_query = searx.search.get_search_query_from_webapp(preferences, form)
+search = searx.search.Search(search_query)
+result_container = search.search()
+
+# output
+from datetime import datetime
+
+def no_parsed_url(results):
+    for result in results:
+        del result['parsed_url']
+    return results
+
+def json_serial(obj):
+    """JSON serializer for objects not serializable by default json code"""
+    if isinstance(obj, datetime):
+        serial = obj.isoformat()
+        return serial
+    raise TypeError ("Type not serializable")
+
+result_container_json = {
+    "search": {
+        "q": search_query.query,
+        "pageno": search_query.pageno,
+        "lang": search_query.lang,
+        "safesearch": search_query.safesearch,
+        "timerange": search_query.time_range,
+        "engines": search_query.engines  
+    },
+    "results": no_parsed_url(result_container.get_ordered_results()),
+    "infoboxes": result_container.infoboxes,
+    "suggestions": list(result_container.suggestions),
+    "answers": list(result_container.answers),
+    "paging": result_container.paging,
+    "results_number": result_container.results_number()
+}
+
+print(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial))