Browse Source

Feature/standalone searx update (#1591)

* chg: dev: update standalone_searx

parent d8a5df721b33dd8a7cc9e21dba4060f21d629f69
author rachmadaniHaryono <foreturiga@gmail.com> 1603896594 +0800
committer rachmadaniHaryono <foreturiga@gmail.com> 1603896619 +0800

chg: dev: debug engine_shortcuts
chg: dev: only initilize if engine is given
chg: dev: split main
chg: dev: standalone_searx
chg: dev: update standalone_searx
chg: doc: remove unnecessary log
chg: test: differentiate travis
chg: test: disable shortcut
chg: test: use default engine settings
fix: dev: category choices
fix: dev: duplicate engine shortcut
fix: dev: travis python3
fix: test:  use empty string as shortcut
fix: test: apkm
fix: test: engine shortcut
fix: test: mypy
fix: test: parameter
fix: test: pep8
fix: test: py2 compatibilities
fix: test: searx settings
fix: test: travis engines
new: dev: deduplicate engine
new: dev: main receive engines parameter
new: dev: parse_argument accept engines parameter
new: dev: split search query from get_result func
new: test: basic result case
Suggestions: use RawTextQuery to make the suggestions URLs. Update all themes accordingly.

* new: doc: searx import and init

* chg: dev: parse_argument

- doc
- run on __main__
- simple parse_args

* chg: doc: module

* chg: dev: import section

- remove unused python path modification
- new required package

* chg: dev: script run

- parse_argument func return directly parsed results
- main func return dict instead json text
- dump directly on sys.stdout.write

* chg: dev: get_search_query and get_search_query func

* chg: dev: main func

- move inner function outside
- return dict instead of json text

* new: dev: add utils to doc sys path

* new: doc: standalone_searx

* fix: doc: run script

* chg: dev: mypy type hint

* chg: dev: SearchQuery don't have attr engines

* chg: dev: reset engines __init__

* chg: test: unit test update

* chg: dev: pylint and flake8

* new: test: standalone_searx

* chg: dev: main func and doc

* chg: dev: import and type hint

* new: dev: main func

- remove get_result func
- single func which just translate dict

* chg: test: put mypy on dev requirement

* chg: doc: update

* new: doc: add standalone_searx module member

* chg: doc: shell command line

* chg: dev: remove mypy

* chg: doc: module docstring
rachmadani haryono 4 years ago
parent
commit
c03e4c86bc

+ 1 - 0
docs/conf.py

@@ -87,6 +87,7 @@ issues_github_path = "searx/searx"
 # HTML -----------------------------------------------------------------
 # HTML -----------------------------------------------------------------
 
 
 sys.path.append(os.path.abspath('_themes'))
 sys.path.append(os.path.abspath('_themes'))
+sys.path.insert(0, os.path.abspath("../utils/"))
 html_theme_path = ['_themes']
 html_theme_path = ['_themes']
 html_theme = "searx"
 html_theme = "searx"
 
 

+ 1 - 0
docs/utils/index.rst

@@ -16,6 +16,7 @@ developers.
    filtron.sh
    filtron.sh
    morty.sh
    morty.sh
    lxc.sh
    lxc.sh
+   standalone_searx.py
 
 
 .. _toolboxing common:
 .. _toolboxing common:
 
 

+ 11 - 0
docs/utils/standalone_searx.py.rst

@@ -0,0 +1,11 @@
+
+.. _standalone_searx.py:
+
+=============================
+``utils/standalone_searx.py``
+=============================
+
+.. automodule:: standalone_searx
+  :members:
+
+

+ 118 - 0
tests/unit/test_standalone_searx.py

@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+"""Test utils/standalone_searx.py"""
+import datetime
+import importlib.util
+import sys
+
+from mock import Mock, patch
+from nose2.tools import params
+
+from searx.testing import SearxTestCase
+
+
+def get_standalone_searx_module():
+    """Get standalone_searx module."""
+    module_name = 'utils.standalone_searx'
+    filename = 'utils/standalone_searx.py'
+    spec = importlib.util.spec_from_file_location(module_name, filename)
+    sas = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(sas)
+    return sas
+
+
+class StandaloneSearx(SearxTestCase):
+    """Unit test for standalone_searx."""
+
+    def test_parse_argument_no_args(self):
+        """Test parse argument without args."""
+        sas = get_standalone_searx_module()
+        with patch.object(sys, 'argv', ['standalone_searx']), \
+                self.assertRaises(SystemExit):
+            sas.parse_argument()
+
+    def test_parse_argument_basic_args(self):
+        """Test parse argument with basic args."""
+        sas = get_standalone_searx_module()
+        query = 'red box'
+        exp_dict = {
+            'query': query, 'category': 'general', 'lang': 'all', 'pageno': 1,
+            'safesearch': '0', 'timerange': None}
+        args = ['standalone_searx', query]
+        with patch.object(sys, 'argv', args):
+            res = sas.parse_argument()
+            self.assertEqual(exp_dict, vars(res))
+        res2 = sas.parse_argument(args[1:])
+        self.assertEqual(exp_dict, vars(res2))
+
+    def test_to_dict(self):
+        """test to_dict."""
+        sas = get_standalone_searx_module()
+        self.assertEqual(
+            sas.to_dict(
+                sas.get_search_query(sas.parse_argument(['red box']))),
+            {
+                'search': {
+                    'q': 'red box', 'pageno': 1, 'lang': 'all',
+                    'safesearch': 0, 'timerange': None
+                },
+                'results': [], 'infoboxes': [], 'suggestions': [],
+                'answers': [], 'paging': False, 'results_number': 0
+            }
+        )
+
+    def test_to_dict_with_mock(self):
+        """test to dict."""
+        sas = get_standalone_searx_module()
+        with patch.object(sas.searx.search, 'Search') as mock_s:
+            m_search = mock_s().search()
+            m_sq = Mock()
+            self.assertEqual(
+                sas.to_dict(m_sq),
+                {
+                    'answers': [],
+                    'infoboxes': m_search.infoboxes,
+                    'paging': m_search.paging,
+                    'results': m_search.get_ordered_results(),
+                    'results_number': m_search.results_number(),
+                    'search': {
+                        'lang': m_sq.lang,
+                        'pageno': m_sq.pageno,
+                        'q': m_sq.query,
+                        'safesearch': m_sq.safesearch,
+                        'timerange': m_sq.time_range,
+                    },
+                    'suggestions': []
+                }
+            )
+
+    def test_get_search_query(self):
+        """test get_search_query."""
+        sas = get_standalone_searx_module()
+        args = sas.parse_argument(['rain', ])
+        search_q = sas.get_search_query(args)
+        self.assertTrue(search_q)
+        self.assertEqual(str(search_q), 'rain;[]')
+
+    def test_no_parsed_url(self):
+        """test no_parsed_url func"""
+        sas = get_standalone_searx_module()
+        self.assertEqual(
+            sas.no_parsed_url([{'parsed_url': 'http://example.com'}]),
+            [{}]
+        )
+
+    @params(
+        (datetime.datetime(2020, 1, 1), '2020-01-01T00:00:00'),
+        ('a'.encode('utf8'), 'a'),
+        (set([1]), [1])
+    )
+    def test_json_serial(self, arg, exp_res):
+        """test json_serial func"""
+        sas = get_standalone_searx_module()
+        self.assertEqual(sas.json_serial(arg), exp_res)
+
+    def test_json_serial_error(self):
+        """test error on json_serial."""
+        sas = get_standalone_searx_module()
+        with self.assertRaises(TypeError):
+            sas.json_serial('a')

+ 184 - 71
utils/standalone_searx.py

@@ -1,5 +1,63 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
+"""Script to run searx from terminal.
 
 
+Getting categories without initiate the engine will only return `['general']`
+
+>>> import searx.engines
+... list(searx.engines.categories.keys())
+['general']
+>>> import searx
+... searx.engines.initialize_engines(searx.settings['engines'])
+... list(searx.engines.categories.keys())
+['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
+
+Example to use this script:
+
+.. code::  bash
+
+    $ SEARX_DEBUG=1 python3 utils/standalone_searx.py rain
+
+Example to run it from python:
+
+>>> import importlib
+... import json
+... import sys
+... import searx
+... import searx.engines
+... search_query = 'rain'
+... # initialize engines
+... searx.engines.initialize_engines(searx.settings['engines'])
+... # load engines categories once instead of each time the function called
+... engine_cs = list(searx.engines.categories.keys())
+... # load module
+... spec = importlib.util.spec_from_file_location(
+...     'utils.standalone_searx', 'utils/standalone_searx.py')
+... sas = importlib.util.module_from_spec(spec)
+... spec.loader.exec_module(sas)
+... # use function from module
+... prog_args = sas.parse_argument([search_query], category_choices=engine_cs)
+... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs)
+... res_dict = sas.to_dict(search_q)
+... sys.stdout.write(json.dumps(
+...     res_dict, sort_keys=True, indent=4, ensure_ascii=False,
+...     default=sas.json_serial))
+{
+    "answers": [],
+    "infoboxes": [ {...} ],
+    "paging": true,
+    "results": [... ],
+    "results_number": 820000000.0,
+    "search": {
+        "lang": "all",
+        "pageno": 1,
+        "q": "rain",
+        "safesearch": 0,
+        "timerange": null
+    },
+    "suggestions": [...]
+}
+"""  # noqa: E501
+# pylint: disable=pointless-string-statement
 '''
 '''
 searx is free software: you can redistribute it and/or modify
 searx is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 it under the terms of the GNU Affero General Public License as published by
@@ -16,90 +74,145 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 
 (C) 2016- by Alexandre Flament, <alex@al-f.net>
 (C) 2016- by Alexandre Flament, <alex@al-f.net>
 '''
 '''
-
-# set path
-from sys import path
-from os.path import realpath, dirname
-path.append(realpath(dirname(realpath(__file__)) + '/../'))
-
-# initialization
-from json import dumps
-from searx import settings
+# pylint: disable=wrong-import-position
+import argparse
 import sys
 import sys
-import codecs
-import searx.query
-import searx.search
+from datetime import datetime
+from json import dumps
+from typing import Any, Dict, List, Optional
+
+import searx
 import searx.engines
 import searx.engines
-import searx.webapdater
 import searx.preferences
 import searx.preferences
+import searx.query
+import searx.search
 import searx.webadapter
 import searx.webadapter
-import argparse
 
 
-searx.engines.initialize_engines(settings['engines'])
-
-# command line parsing
-parser = argparse.ArgumentParser(description='Standalone searx.')
-parser.add_argument('query', type=str,
-                    help='Text query')
-parser.add_argument('--category', type=str, nargs='?',
-                    choices=searx.engines.categories.keys(),
-                    default='general',
-                    help='Search category')
-parser.add_argument('--lang', type=str, nargs='?',default='all',
-                    help='Search language')
-parser.add_argument('--pageno', type=int, nargs='?', default=1,
-                    help='Page number starting from 1')
-parser.add_argument('--safesearch', type=str, nargs='?', choices=['0', '1', '2'], default='0',
-                    help='Safe content filter from none to strict')
-parser.add_argument('--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'],
-                    help='Filter by time range')
-args = parser.parse_args()
-
-# search results for the query
-form = {
-    "q":args.query,
-    "categories":args.category.decode(),
-    "pageno":str(args.pageno),
-    "language":args.lang,
-    "time_range":args.timerange
-}
-preferences = searx.preferences.Preferences(['oscar'], searx.engines.categories.keys(), searx.engines.engines, [])
-preferences.key_value_settings['safesearch'].parse(args.safesearch)
+EngineCategoriesVar = Optional[List[str]]
 
 
-search_query, raw_text_query, _, _ = searx.webadapter.get_search_query_from_webapp(preferences, form)
-search = searx.search.Search(search_query)
-result_container = search.search()
 
 
-# output
-from datetime import datetime
+def get_search_query(
+        args: argparse.Namespace, engine_categories: EngineCategoriesVar = None
+) -> searx.search.SearchQuery:
+    """Get  search results for the query"""
+    if engine_categories is None:
+        engine_categories = list(searx.engines.categories.keys())
+    try:
+        category = args.category.decode('utf-8')
+    except AttributeError:
+        category = args.category
+    form = {
+        "q": args.query,
+        "categories": category,
+        "pageno": str(args.pageno),
+        "language": args.lang,
+        "time_range": args.timerange
+    }
+    preferences = searx.preferences.Preferences(
+        ['oscar'], engine_categories, searx.engines.engines, [])
+    preferences.key_value_settings['safesearch'].parse(args.safesearch)
+
+    search_query = searx.webadapter.get_search_query_from_webapp(
+        preferences, form)[0]
+    return search_query
 
 
-def no_parsed_url(results):
+
+def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Remove parsed url from dict."""
     for result in results:
     for result in results:
         del result['parsed_url']
         del result['parsed_url']
     return results
     return results
 
 
-def json_serial(obj):
-    """JSON serializer for objects not serializable by default json code"""
+
+def json_serial(obj: Any) -> Any:
+    """JSON serializer for objects not serializable by default json code.
+
+    :raise TypeError: raised when **obj** is not serializable
+    """
     if isinstance(obj, datetime):
     if isinstance(obj, datetime):
         serial = obj.isoformat()
         serial = obj.isoformat()
         return serial
         return serial
-    raise TypeError ("Type not serializable")
+    if isinstance(obj, bytes):
+        return obj.decode('utf8')
+    if isinstance(obj, set):
+        return list(obj)
+    raise TypeError("Type ({}) not serializable".format(type(obj)))
 
 
-result_container_json = {
-    "search": {
-        "q": search_query.query,
-        "pageno": search_query.pageno,
-        "lang": search_query.lang,
-        "safesearch": search_query.safesearch,
-        "timerange": search_query.time_range,
-        "engines": search_query.engines  
-    },
-    "results": no_parsed_url(result_container.get_ordered_results()),
-    "infoboxes": result_container.infoboxes,
-    "suggestions": list(result_container.suggestions),
-    "answers": list(result_container.answers),
-    "paging": result_container.paging,
-    "results_number": result_container.results_number()
-}
-sys.stdout = codecs.getwriter("UTF-8")(sys.stdout)
-sys.stdout.write(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial))
+
+def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
+    """Get result from parsed arguments."""
+    result_container = searx.search.Search(search_query).search()
+    result_container_json = {
+        "search": {
+            "q": search_query.query,
+            "pageno": search_query.pageno,
+            "lang": search_query.lang,
+            "safesearch": search_query.safesearch,
+            "timerange": search_query.time_range,
+        },
+        "results": no_parsed_url(result_container.get_ordered_results()),
+        "infoboxes": result_container.infoboxes,
+        "suggestions": list(result_container.suggestions),
+        "answers": list(result_container.answers),
+        "paging": result_container.paging,
+        "results_number": result_container.results_number()
+    }
+    return result_container_json
+
+
+def parse_argument(
+        args: Optional[List[str]]=None,
+        category_choices: EngineCategoriesVar=None
+) -> argparse.Namespace:
+    """Parse command line.
+
+    :raise SystemExit: Query argument required on `args`
+
+    Examples:
+
+    >>> import importlib
+    ... # load module
+    ... spec = importlib.util.spec_from_file_location(
+    ...     'utils.standalone_searx', 'utils/standalone_searx.py')
+    ... sas = importlib.util.module_from_spec(spec)
+    ... spec.loader.exec_module(sas)
+    ... sas.parse_argument()
+    usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]]
+                     query
+    SystemExit: 2
+    >>> sas.parse_argument(['rain'])
+    Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None)
+    """  # noqa: E501
+    if not category_choices:
+        category_choices = list(searx.engines.categories.keys())
+    parser = argparse.ArgumentParser(description='Standalone searx.')
+    parser.add_argument('query', type=str,
+                        help='Text query')
+    parser.add_argument('--category', type=str, nargs='?',
+                        choices=category_choices,
+                        default='general',
+                        help='Search category')
+    parser.add_argument('--lang', type=str, nargs='?', default='all',
+                        help='Search language')
+    parser.add_argument('--pageno', type=int, nargs='?', default=1,
+                        help='Page number starting from 1')
+    parser.add_argument(
+        '--safesearch', type=str, nargs='?',
+        choices=['0', '1', '2'], default='0',
+        help='Safe content filter from none to strict')
+    parser.add_argument(
+        '--timerange', type=str,
+        nargs='?', choices=['day', 'week', 'month', 'year'],
+        help='Filter by time range')
+    return parser.parse_args(args)
+
+
+if __name__ == '__main__':
+    searx.engines.initialize_engines(searx.settings['engines'])
+    engine_cs = list(searx.engines.categories.keys())
+    prog_args = parse_argument(category_choices=engine_cs)
+    search_q = get_search_query(prog_args, engine_categories=engine_cs)
+    res_dict = to_dict(search_q)
+    sys.stdout.write(dumps(
+        res_dict, sort_keys=True, indent=4, ensure_ascii=False,
+        default=json_serial))