Browse Source

Updated webutils.highlight_content to ignore double-quotes when highlighting query parts

Daniel Hones 4 years ago
parent
commit
138f32471c
3 changed files with 27 additions and 2 deletions
  1. 1 1
      AUTHORS.rst
  2. 4 1
      searx/webutils.py
  3. 22 0
      tests/unit/test_webutils.py

+ 1 - 1
AUTHORS.rst

@@ -154,6 +154,6 @@ generally made searx better:
 - @mrwormo
 - Xiaoyu WEI @xywei
 - @joshu9h
-
+- Daniel Hones
 
 

+ 4 - 1
searx/webutils.py

@@ -119,7 +119,10 @@ def highlight_content(content, query):
     else:
         regex_parts = []
         for chunk in query.split():
-            if len(chunk) == 1:
+            chunk = chunk.replace('"', '')
+            if len(chunk) == 0:
+                continue
+            elif len(chunk) == 1:
                 regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk)))
             else:
                 regex_parts.append('{0}'.format(re.escape(chunk)))

+ 22 - 0
tests/unit/test_webutils.py

@@ -34,6 +34,28 @@ class TestWebUtils(SearxTestCase):
         query = 'a test'
         self.assertEqual(webutils.highlight_content(content, query), content)
 
+        data = (
+            ('" test "',
+             'a test string',
+             'a <span class="highlight">test</span> string'),
+            ('"a"',
+             'this is a test string',
+             'this is<span class="highlight"> a </span>test string'),
+            ('a test',
+             'this is a test string that matches entire query',
+             'this is <span class="highlight">a test</span> string that matches entire query'),
+            ('this a test',
+             'this is a string to test.',
+             ('<span class="highlight">this</span> is<span class="highlight"> a </span>'
+              'string to <span class="highlight">test</span>.')),
+            ('match this "exact phrase"',
+             'this string contains the exact phrase we want to match',
+             ('<span class="highlight">this</span> string contains the <span class="highlight">exact</span>'
+              ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>'))
+        )
+        for query, content, expected in data:
+            self.assertEqual(webutils.highlight_content(content, query), expected)
+
 
 class TestUnicodeWriter(SearxTestCase):