Browse Source

Startpage's unit test

Cqoicebordel 10 years ago
parent
commit
f1c10f4fe4
3 changed files with 146 additions and 8 deletions
  1. 5 8
      searx/engines/startpage.py
  2. 140 0
      searx/tests/engines/test_startpage.py
  3. 1 0
      searx/tests/test_engines.py

+ 5 - 8
searx/engines/startpage.py

@@ -13,6 +13,7 @@
 from lxml import html
 from lxml import html
 from cgi import escape
 from cgi import escape
 import re
 import re
+from searx.engines.xpath import extract_text
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
@@ -45,8 +46,7 @@ def request(query, params):
 
 
     # set language if specified
     # set language if specified
     if params['language'] != 'all':
     if params['language'] != 'all':
-        params['data']['with_language'] = ('lang_' +
-                                           params['language'].split('_')[0])
+        params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
 
 
     return params
     return params
 
 
@@ -64,18 +64,15 @@ def response(resp):
             continue
             continue
         link = links[0]
         link = links[0]
         url = link.attrib.get('href')
         url = link.attrib.get('href')
-        try:
-            title = escape(link.text_content())
-        except UnicodeDecodeError:
-            continue
 
 
         # block google-ad url's
         # block google-ad url's
         if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
         if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
             continue
             continue
 
 
+        title = escape(extract_text(link))
+
         if result.xpath('./p[@class="desc"]'):
         if result.xpath('./p[@class="desc"]'):
-            content = escape(result.xpath('./p[@class="desc"]')[0]
-                             .text_content())
+            content = escape(extract_text(result.xpath('./p[@class="desc"]')))
         else:
         else:
             content = ''
             content = ''
 
 

+ 140 - 0
searx/tests/engines/test_startpage.py

@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import startpage
+from searx.testing import SearxTestCase
+
+
+class TestStartpageEngine(SearxTestCase):
+
+    def test_request(self):
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        dicto['pageno'] = 1
+        dicto['language'] = 'fr_FR'
+        params = startpage.request(query, dicto)
+        self.assertIn('url', params)
+        self.assertIn('startpage.com', params['url'])
+        self.assertIn('data', params)
+        self.assertIn('query', params['data'])
+        self.assertIn(query, params['data']['query'])
+        self.assertIn('with_language', params['data'])
+        self.assertIn('lang_fr', params['data']['with_language'])
+
+        dicto['language'] = 'all'
+        params = startpage.request(query, dicto)
+        self.assertNotIn('with_language', params['data'])
+
+    def test_response(self):
+        self.assertRaises(AttributeError, startpage.response, None)
+        self.assertRaises(AttributeError, startpage.response, [])
+        self.assertRaises(AttributeError, startpage.response, '')
+        self.assertRaises(AttributeError, startpage.response, '[]')
+
+        response = mock.Mock(content='<html></html>')
+        self.assertEqual(startpage.response(response), [])
+
+        html = """
+        <div class='result' style=' *width : auto; *margin-right : 10%;'>
+            <h3>
+                <a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
+                    This should be the title
+                </a>
+                <span id='title_stars_2' name='title_stars_2'>  </span>
+            </h3>
+            <p class='desc'>
+                This should be the content.
+            </p>
+            <p>
+                <span class='url'>www.speed<b>test</b>.net/fr/
+                </span>
+                  -
+                <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
+                    class='proxy'>
+                    Navigation avec Ixquick Proxy
+                </A>
+                    -
+                <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
+                    &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
+                    &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
+                    Mis en surbrillance
+                </A>
+            </p>
+        </div>
+        """
+        response = mock.Mock(content=html)
+        results = startpage.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'], 'This should be the title')
+        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
+        self.assertEqual(results[0]['content'], 'This should be the content.')
+
+        html = """
+        <div class='result' style=' *width : auto; *margin-right : 10%;'>
+            <h3>
+                <a href='http://www.google.com/aclk?sa=l&ai=C' id='title_2' name='title_2' >
+                    This should be the title
+                </a>
+                <span id='title_stars_2' name='title_stars_2'>  </span>
+            </h3>
+            <p class='desc'>
+                This should be the content.
+            </p>
+            <p>
+                <span class='url'>www.speed<b>test</b>.net/fr/
+                </span>
+                  -
+                <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
+                    class='proxy'>
+                    Navigation avec Ixquick Proxy
+                </A>
+                    -
+                <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
+                    &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
+                    &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
+                    Mis en surbrillance
+                </A>
+            </p>
+        </div>
+        <div class='result' style=' *width : auto; *margin-right : 10%;'>
+            <h3>
+                <span id='title_stars_2' name='title_stars_2'>  </span>
+            </h3>
+            <p class='desc'>
+                This should be the content.
+            </p>
+            <p>
+                <span class='url'>www.speed<b>test</b>.net/fr/
+                </span>
+            </p>
+        </div>
+        <div class='result' style=' *width : auto; *margin-right : 10%;'>
+            <h3>
+                <a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
+                    This should be the title
+                </a>
+                <span id='title_stars_2' name='title_stars_2'>  </span>
+            </h3>
+            <p>
+                <span class='url'>www.speed<b>test</b>.net/fr/
+                </span>
+                  -
+                <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
+                    class='proxy'>
+                    Navigation avec Ixquick Proxy
+                </A>
+                    -
+                <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
+                    &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
+                    &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
+                    Mis en surbrillance
+                </A>
+            </p>
+        </div>
+        """
+        response = mock.Mock(content=html)
+        results = startpage.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['content'], '')

+ 1 - 0
searx/tests/test_engines.py

@@ -23,6 +23,7 @@ from searx.tests.engines.test_searchcode_code import *  # noqa
 from searx.tests.engines.test_searchcode_doc import *  # noqa
 from searx.tests.engines.test_searchcode_doc import *  # noqa
 from searx.tests.engines.test_soundcloud import *  # noqa
 from searx.tests.engines.test_soundcloud import *  # noqa
 from searx.tests.engines.test_stackoverflow import *  # noqa
 from searx.tests.engines.test_stackoverflow import *  # noqa
+from searx.tests.engines.test_startpage import *  # noqa
 from searx.tests.engines.test_subtitleseeker import *  # noqa
 from searx.tests.engines.test_subtitleseeker import *  # noqa
 from searx.tests.engines.test_twitter import *  # noqa
 from searx.tests.engines.test_twitter import *  # noqa
 from searx.tests.engines.test_vimeo import *  # noqa
 from searx.tests.engines.test_vimeo import *  # noqa