Browse Source

[fix] external bangs: don't overwrite Bangs in data trie

Bangs with a `*` suffix (e.g. `!!d*`) overwrite Bangs with the same
prefix (e.g. `!!d`) [1].  This can be avoid when a non printable character is
used to tag a LEAF_KEY.

[1] https://github.com/searxng/searxng/pull/740#issuecomment-1010411888

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
7cdd31440e

+ 4 - 2
searx/external_bang.py

@@ -2,6 +2,8 @@
 
 from searx.data import EXTERNAL_BANGS
 
+LEAF_KEY = chr(16)
+
 
 def get_node(external_bangs_db, bang):
     node = external_bangs_db['trie']
@@ -26,8 +28,8 @@ def get_bang_definition_and_ac(external_bangs_db, bang):
             if k.startswith(after):
                 bang_ac_list.append(before + k)
     elif isinstance(node, dict):
-        bang_definition = node.get('*')
-        bang_ac_list = [before + k for k in node.keys() if k != '*']
+        bang_definition = node.get(LEAF_KEY)
+        bang_ac_list = [before + k for k in node.keys() if k != LEAF_KEY]
     elif isinstance(node, str):
         bang_definition = node
         bang_ac_list = []

+ 20 - 16
searxng_extra/update/update_external_bangs.py

@@ -25,7 +25,7 @@ from os.path import join
 import httpx
 
 from searx import searx_dir  # pylint: disable=E0401 C0413
-
+from searx.external_bang import LEAF_KEY
 
 # from https://duckduckgo.com/newbang
 URL_BV1 = 'https://duckduckgo.com/bv1.js'
@@ -51,18 +51,22 @@ def fetch_ddg_bangs(url):
 def merge_when_no_leaf(node):
     """Minimize the number of nodes
 
-    A -> B -> C
-    B is child of A
-    C is child of B
+    ``A -> B -> C``
+
+    - ``B`` is child of ``A``
+    - ``C`` is child of ``B``
+
+    If there are no ``C`` equals to ``<LEAF_KEY>``, then each ``C`` are merged
+    into ``A``.  For example (5 nodes)::
+
+      d -> d -> g -> <LEAF_KEY> (ddg)
+        -> i -> g -> <LEAF_KEY> (dig)
+
+    becomes (3 noodes)::
 
-    If there are no C equals to '*', then each C are merged into A
+      d -> dg -> <LEAF_KEY>
+        -> ig -> <LEAF_KEY>
 
-    For example:
-      d -> d -> g -> * (ddg*)
-        -> i -> g -> * (dig*)
-    becomes
-      d -> dg -> *
-        -> ig -> *
     """
     restart = False
     if not isinstance(node, dict):
@@ -72,12 +76,12 @@ def merge_when_no_leaf(node):
     keys = list(node.keys())
 
     for key in keys:
-        if key == '*':
+        if key == LEAF_KEY:
             continue
 
         value = node[key]
         value_keys = list(value.keys())
-        if '*' not in value_keys:
+        if LEAF_KEY not in value_keys:
             for value_key in value_keys:
                 node[key + value_key] = value[value_key]
                 merge_when_no_leaf(node[key + value_key])
@@ -94,8 +98,8 @@ def optimize_leaf(parent, parent_key, node):
     if not isinstance(node, dict):
         return
 
-    if len(node) == 1 and '*' in node and parent is not None:
-        parent[parent_key] = node['*']
+    if len(node) == 1 and LEAF_KEY in node and parent is not None:
+        parent[parent_key] = node[LEAF_KEY]
     else:
         for key, value in node.items():
             optimize_leaf(node, key, value)
@@ -138,7 +142,7 @@ def parse_ddg_bangs(ddg_bangs):
         t = bang_trie
         for bang_letter in bang:
             t = t.setdefault(bang_letter, {})
-        t = t.setdefault('*', bang_def_output)
+        t = t.setdefault(LEAF_KEY, bang_def_output)
 
     # optimize the trie
     merge_when_no_leaf(bang_trie)

+ 13 - 7
tests/unit/test_external_bangs.py

@@ -1,4 +1,10 @@
-from searx.external_bang import get_node, resolve_bang_definition, get_bang_url, get_bang_definition_and_autocomplete
+from searx.external_bang import (
+    get_node,
+    resolve_bang_definition,
+    get_bang_url,
+    get_bang_definition_and_autocomplete,
+    LEAF_KEY,
+)
 from searx.search import SearchQuery, EngineRef
 from tests import SearxTestCase
 
@@ -7,12 +13,12 @@ TEST_DB = {
     'trie': {
         'exam': {
             'ple': '//example.com/' + chr(2) + chr(1) + '0',
-            '*': '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0',
+            LEAF_KEY: '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0',
         },
         'sea': {
-            '*': 'sea' + chr(2) + chr(1) + '0',
+            LEAF_KEY: 'sea' + chr(2) + chr(1) + '0',
             'rch': {
-                '*': 'search' + chr(2) + chr(1) + '0',
+                LEAF_KEY: 'search' + chr(2) + chr(1) + '0',
                 'ing': 'searching' + chr(2) + chr(1) + '0',
             },
             's': {
@@ -31,7 +37,7 @@ class TestGetNode(SearxTestCase):
         'trie': {
             'exam': {
                 'ple': 'test',
-                '*': 'not used',
+                LEAF_KEY: 'not used',
             }
         }
     }
@@ -71,7 +77,7 @@ class TestResolveBangDefinition(SearxTestCase):
 class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
     def test_found(self):
         bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB)
-        self.assertEqual(bang_definition, TEST_DB['trie']['exam']['*'])
+        self.assertEqual(bang_definition, TEST_DB['trie']['exam'][LEAF_KEY])
         self.assertEqual(new_autocomplete, ['example'])
 
     def test_found_optimized(self):
@@ -86,7 +92,7 @@ class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
 
     def test_partial2(self):
         bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('sea', external_bangs_db=TEST_DB)
-        self.assertEqual(bang_definition, TEST_DB['trie']['sea']['*'])
+        self.assertEqual(bang_definition, TEST_DB['trie']['sea'][LEAF_KEY])
         self.assertEqual(new_autocomplete, ['search', 'searching', 'seascapes', 'season'])
 
     def test_error(self):