Browse Source

A bit of utils unit tests

Cqoicebordel 10 years ago
parent
commit
cfe81d741c
2 changed files with 23 additions and 1 deletions
  1. 22 0
      searx/tests/test_utils.py
  2. 1 1
      searx/utils.py

+ 22 - 0
searx/tests/test_utils.py

@@ -10,6 +10,11 @@ class TestUtils(SearxTestCase):
         self.assertIsNotNone(utils.gen_useragent())
         self.assertTrue(utils.gen_useragent().startswith('Mozilla'))
 
+    def test_searx_useragent(self):
+        self.assertIsInstance(utils.searx_useragent(), str)
+        self.assertIsNotNone(utils.searx_useragent())
+        self.assertTrue(utils.searx_useragent().startswith('searx'))
+
     def test_highlight_content(self):
         self.assertEqual(utils.highlight_content(0, None), None)
         self.assertEqual(utils.highlight_content(None, None), None)
@@ -29,6 +34,23 @@ class TestUtils(SearxTestCase):
         query = 'a test'
         self.assertEqual(utils.highlight_content(content, query), content)
 
+    def test_html_to_text(self):
+        html = """
+        <a href="/testlink" class="link_access_account">
+            <span class="toto">
+                <span>
+                    <img src="test.jpg" />
+                </span>
+            </span>
+            <span class="titi">
+                            Test text
+            </span>
+        </a>
+        """
+        self.assertIsInstance(utils.html_to_text(html), unicode)
+        self.assertIsNotNone(utils.html_to_text(html))
+        self.assertEqual(utils.html_to_text(html), "Test text")
+
 
 class TestHTMLTextExtractor(SearxTestCase):
 

+ 1 - 1
searx/utils.py

@@ -115,7 +115,7 @@ class HTMLTextExtractor(HTMLParser):
         self.result.append(name)
 
     def get_text(self):
-        return u''.join(self.result)
+        return u''.join(self.result).strip()
 
 
 def html_to_text(html):