Browse Source

Created new plugin type custom_results. Added new plugin bang_redirect (#2027)

* Made first attempt at the bangs redirects plugin.

* It redirects. But in a messy way via javascript.

* First version with custom plugin

* Added a help page and a operator to see all the bangs available.

* Changed to .format because of support

* Changed to .format because of support

* Removed : in params

* Fixed path to json file and changed bang operator

* Changed bang operator back to &

* Made first attempt at the bangs redirects plugin.

* It redirects. But in a messy way via javascript.

* First version with custom plugin

* Added a help page and a operator to see all the bangs available.

* Changed to .format because of support

* Changed to .format because of support

* Removed : in params

* Fixed path to json file and changed bang operator

* Changed bang operator back to &

* Refactored getting search query. Also changed bang operator to ! and is now working.

* Removed prints

* Removed temporary bangs_redirect.js file. Updated plugin documentation

* Added unit test for the bangs plugin

* Fixed a unit test and added 2 more for bangs plugin

* Changed back to default settings.yml

* Added myself to AUTHORS.rst

* Refacored working of custom plugin.

* Refactored _get_bangs_data from list to dict to improve search speed.

* Decoupled bangs plugin from webserver with redirect_url

* Refactored bangs unit tests

* Fixed unit test bangs. Removed dubbel parsing in bangs.py

* Removed a dumb print statement

* Refactored bangs plugin to core engine.

* Removed bangs plugin.

* Refactored external bangs unit tests from plugin to core.

* Removed custom_results/bangs documentation from plugins.rst

* Added newline in settings.yml so the PR stays clean.

* Changed searx/plugins/__init__.py back to the old file

* Removed newline search.py

* Refactored get_external_bang_operator from utils to external_bang.py

* Removed unnecessary import form test_plugins.py

* Removed _parseExternalBang and _isExternalBang from query.py

* Removed get_external_bang_operator since it was not necessary

* Simplified external_bang.py

* Simplified external_bang.py

* Moved external_bangs unit tests to test_webapp.py. Fixed return in search with external_bang

* Refactored query parsing to unicode to support python2

* Refactored query parsing to unicode to support python2

* Refactored bangs plugin to core engine.

* Refactored search parameter to search_query in external_bang.py
Lukas van den Berk 4 years ago
parent
commit
4829a76aae
10 changed files with 64044 additions and 6 deletions
  1. 1 0
      AUTHORS.rst
  2. 8 0
      docs/dev/plugins.rst
  3. 63937 0
      searx/data/bangs.json
  4. 43 0
      searx/external_bang.py
  5. 9 2
      searx/query.py
  6. 1 0
      searx/results.py
  7. 15 3
      searx/search.py
  8. 7 0
      searx/webapp.py
  9. 21 0
      tests/unit/test_search.py
  10. 2 1
      tests/unit/test_webapp.py

+ 1 - 0
AUTHORS.rst

@@ -124,3 +124,4 @@ generally made searx better:
 - @CaffeinatedTech
 - Robin Schneider @ypid
 - @splintah
+- Lukas van den Berk @lukasvdberk

+ 8 - 0
docs/dev/plugins.rst

@@ -30,6 +30,14 @@ Example plugin
        ctx['search'].suggestions.add('example')
        return True
 
+Register your plugin
+====================
+
+To enable your plugin register your plugin in
+searx > plugin > __init__.py.
+And at the bottom of the file add your plugin like.
+``plugins.register(name_of_python_file)``
+
 Plugin entry points
 ===================
 

File diff suppressed because it is too large
+ 63937 - 0
searx/data/bangs.json


+ 43 - 0
searx/external_bang.py

@@ -0,0 +1,43 @@
+import json
+from os.path import join
+
+from searx import searx_dir
+
+# bangs data coming from the following url convert to json with
+# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
+# https://pseitz.github.io/toml-to-json-online-converter/
+# NOTE only use the get_bang_url
+
+bangs_data = {}
+with open(join(searx_dir, 'data/bangs.json')) as json_file:
+    for bang in json.load(json_file)['bang']:
+        for trigger in bang["triggers"]:
+            bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
+
+
+def get_bang_url(search_query):
+    """
+    Redirects if the user supplied a correct bang search.
+    :param search_query: This is a search_query object which contains preferences and the submitted queries.
+    :return: None if the bang was invalid, else a string of the redirect url.
+    """
+
+    if search_query.external_bang:
+        query = search_query.query.decode('utf-8', 'ignore')
+        bang = _get_bang(search_query.external_bang)
+
+        if bang and query:
+            # TODO add region support.
+            bang_url = bang["regions"]["default"]
+
+            return bang_url.replace("{{{term}}}", query)
+    return None
+
+
+def _get_bang(user_bang):
+    """
+    Searches if the supplied user bang is available. Returns None if not found.
+    :param user_bang: The parsed user bang. For example yt
+    :return: Returns a dict with bangs data (check bangs_data.json for the structure)
+    """
+    return bangs_data.get(user_bang)

+ 9 - 2
searx/query.py

@@ -44,10 +44,11 @@ class RawTextQuery(object):
         self.engines = []
         self.languages = []
         self.timeout_limit = None
+        self.external_bang = None
         self.specific = False
 
     # parse query, if tags are set, which
-    # change the serch engine or search-language
+    # change the search engine or search-language
     def parse_query(self):
         self.query_parts = []
 
@@ -120,6 +121,11 @@ class RawTextQuery(object):
                         self.languages.append(lang)
                         parse_next = True
 
+            # external bang
+            if query_part[0:2] == "!!":
+                self.external_bang = query_part[2:]
+                parse_next = True
+                continue
             # this force a engine or category
             if query_part[0] == '!' or query_part[0] == '?':
                 prefix = query_part[1:].replace('-', ' ').replace('_', ' ')
@@ -178,7 +184,7 @@ class SearchQuery(object):
     """container for all the search parameters (query, language, etc...)"""
 
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
-                 timeout_limit=None, preferences=None):
+                 timeout_limit=None, preferences=None, external_bang=None):
         self.query = query.encode('utf-8')
         self.engines = engines
         self.categories = categories
@@ -188,6 +194,7 @@ class SearchQuery(object):
         self.time_range = None if time_range in ('', 'None', None) else time_range
         self.timeout_limit = timeout_limit
         self.preferences = preferences
+        self.external_bang = external_bang
 
     def __str__(self):
         return str(self.query) + ";" + str(self.engines)

+ 1 - 0
searx/results.py

@@ -138,6 +138,7 @@ class ResultContainer(object):
         self.paging = False
         self.unresponsive_engines = set()
         self.timings = []
+        self.redirect_url = None
 
     def extend(self, engine_name, results):
         for result in list(results):

+ 15 - 3
searx/search.py

@@ -20,6 +20,8 @@ import sys
 import threading
 from time import time
 from uuid import uuid4
+
+import six
 from flask_babel import gettext
 import requests.exceptions
 import searx.poolrequests as requests_lib
@@ -27,6 +29,7 @@ from searx.engines import (
     categories, engines, settings
 )
 from searx.answerers import ask
+from searx.external_bang import get_bang_url
 from searx.utils import gen_useragent
 from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
 from searx.results import ResultContainer
@@ -54,6 +57,7 @@ else:
     else:
         logger.critical('outgoing.max_request_timeout if defined has to be float')
         from sys import exit
+
         exit(1)
 
 
@@ -397,15 +401,16 @@ def get_search_query_from_webapp(preferences, form):
                                      if (engine.name, categ) not in disabled_engines)
 
     query_engines = deduplicate_query_engines(query_engines)
+    external_bang = raw_text_query.external_bang
 
     return (SearchQuery(query, query_engines, query_categories,
                         query_lang, query_safesearch, query_pageno,
-                        query_time_range, query_timeout, preferences),
+                        query_time_range, query_timeout, preferences,
+                        external_bang=external_bang),
             raw_text_query)
 
 
 class Search(object):
-
     """Search information container"""
 
     def __init__(self, search_query):
@@ -419,6 +424,14 @@ class Search(object):
     def search(self):
         global number_of_searches
 
+        # Check if there is a external bang. After that we can stop because the search will terminate.
+        if self.search_query.external_bang:
+            self.result_container.redirect_url = get_bang_url(self.search_query)
+
+            # This means there was a valid bang and the
+            # rest of the search does not need to be continued
+            if isinstance(self.result_container.redirect_url, six.string_types):
+                return self.result_container
         # start time
         start_time = time()
 
@@ -521,7 +534,6 @@ class Search(object):
 
 
 class SearchWithPlugins(Search):
-
     """Similar to the Search class but call the plugins."""
 
     def __init__(self, search_query, ordered_plugin_list, request):

+ 7 - 0
searx/webapp.py

@@ -575,7 +575,9 @@ def index():
         search_query, raw_text_query = get_search_query_from_webapp(request.preferences, request.form)
         # search = Search(search_query) #  without plugins
         search = SearchWithPlugins(search_query, request.user_plugins, request)
+
         result_container = search.search()
+
     except Exception as e:
         # log exception
         logger.exception('search error')
@@ -592,6 +594,10 @@ def index():
     if number_of_results < result_container.results_length():
         number_of_results = 0
 
+    # checkin for a external bang
+    if result_container.redirect_url:
+        return redirect(result_container.redirect_url)
+
     # UI
     advanced_search = request.form.get('advanced_search', None)
 
@@ -665,6 +671,7 @@ def index():
         cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
         response.headers.add('Content-Disposition', cont_disp)
         return response
+
     elif output_format == 'rss':
         response_rss = render(
             'opensearch_response_rss.xml',

+ 21 - 0
tests/unit/test_search.py

@@ -110,3 +110,24 @@ class SearchTestCase(SearxTestCase):
         search = searx.search.Search(search_query)
         results = search.search()
         self.assertEquals(results.results_length(), 1)
+
+    def test_external_bang(self):
+        search_query = searx.query.SearchQuery('yes yes',
+                                               [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
+                                               ['general'], 'en-US', SAFESEARCH, PAGENO, None, None,
+                                               preferences=Preferences(['oscar'], ['general'], engines, [],),
+                                               external_bang="yt")
+        search = searx.search.Search(search_query)
+        results = search.search()
+        # For checking if the user redirected with the youtube external bang
+        self.assertTrue(results.redirect_url is not None)
+
+        search_query = searx.query.SearchQuery('youtube never gonna give you up',
+                                               [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}],
+                                               ['general'], 'en-US', SAFESEARCH, PAGENO, None, None,
+                                               preferences=Preferences(['oscar'], ['general'], engines, []),)
+
+        search = searx.search.Search(search_query)
+        results = search.search()
+        # This should not redirect
+        self.assertTrue(results.redirect_url is None)

+ 2 - 1
tests/unit/test_webapp.py

@@ -56,7 +56,8 @@ class ViewsTestCase(SearxTestCase):
                                                 results=test_results,
                                                 results_number=lambda: 3,
                                                 results_length=lambda: len(test_results),
-                                                get_timings=lambda: timings)
+                                                get_timings=lambda: timings,
+                                                redirect_url=None)
 
         self.setattr4test(Search, 'search', search_mock)
 

Some files were not shown because too many files changed in this diff