Browse Source

A little fix and modified the testing for content highlight

ahmad-alkadri 2 years ago
parent
commit
99b5272d9a
2 changed files with 21 additions and 17 deletions
  1. 11 8
      searx/webutils.py
  2. 10 9
      tests/unit/test_webutils.py

+ 11 - 8
searx/webutils.py

@@ -124,13 +124,14 @@ def contains_cjko(s: str) -> bool:
     Returns:
         bool: True if the input s contains the characters and False otherwise.
     """
-    unicode_ranges = ('\u4e00-\u9fff' # Chinese characters
-                      '\u3040-\u309f' # Japanese hiragana
-                      '\u30a0-\u30ff' # Japanese katakana
-                      '\u4e00-\u9faf' # Japanese kanji
-                      '\uac00-\ud7af' # Korean hangul syllables
-                      '\u1100-\u11ff' # Korean hangul jamo
-                      )
+    unicode_ranges = (
+        '\u4e00-\u9fff'  # Chinese characters
+        '\u3040-\u309f'  # Japanese hiragana
+        '\u30a0-\u30ff'  # Japanese katakana
+        '\u4e00-\u9faf'  # Japanese kanji
+        '\uac00-\ud7af'  # Korean hangul syllables
+        '\u1100-\u11ff'  # Korean hangul jamo
+    )
     return bool(re.search(fr'[{unicode_ranges}]', s))
 
 
@@ -168,7 +169,9 @@ def highlight_content(content, query):
     querysplit = query.split()
     queries = []
     for qs in querysplit:
-        queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U))
+        qs = qs.replace("'", "").replace('"', '').replace(" ", "")
+        if len(qs) > 0:
+            queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U))
     if len(queries) > 0:
         for q in set(queries):
             content = re.sub(regex_highlight_cjk(q), f'<span class="highlight">{q}</span>', content)

+ 10 - 9
tests/unit/test_webutils.py

@@ -28,32 +28,33 @@ class TestWebUtils(SearxTestCase):
 
         content = 'a'
         query = 'test'
-        self.assertEqual(webutils.highlight_content(content, query), content)
+        self.assertEqual(webutils.highlight_content(content, query), 'a')
         query = 'a test'
-        self.assertEqual(webutils.highlight_content(content, query), content)
+        self.assertEqual(webutils.highlight_content(content, query), '<span class="highlight">a</span>')
 
         data = (
             ('" test "', 'a test string', 'a <span class="highlight">test</span> string'),
-            ('"a"', 'this is a test string', 'this is<span class="highlight"> a </span>test string'),
+            ('"a"', 'this is a test string', 'this is <span class="highlight">a</span> test string'),
             (
                 'a test',
                 'this is a test string that matches entire query',
-                'this is <span class="highlight">a test</span> string that matches entire query',
+                'this is <span class="highlight">a</span> <span class="highlight">test</span> string that matches entire query',
             ),
             (
                 'this a test',
                 'this is a string to test.',
                 (
-                    '<span class="highlight">this</span> is<span class="highlight"> a </span>'
-                    'string to <span class="highlight">test</span>.'
+                    '<span class="highlight">this</span> is <span class="highlight">a</span> string to <span class="highlight">test</span>.'
                 ),
             ),
             (
                 'match this "exact phrase"',
                 'this string contains the exact phrase we want to match',
-                (
-                    '<span class="highlight">this</span> string contains the <span class="highlight">exact</span>'
-                    ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>'
+                ''.join(
+                    [
+                        '<span class="highlight">this</span> string contains the <span class="highlight">exact</span> ',
+                        '<span class="highlight">phrase</span> we want to <span class="highlight">match</span>',
+                    ]
                 ),
             ),
         )