| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 | # -*- coding: utf-8 -*-from collections import defaultdictimport mockimport lxmlfrom searx.engines import googlefrom searx.testing import SearxTestCaseclass TestGoogleEngine(SearxTestCase):    def mock_response(self, text):        response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr')        response.search_params = mock.Mock()        response.search_params.get = mock.Mock(return_value='www.google.com')        return response    def test_request(self):        google.supported_languages = ['en', 'fr', 'zh-CN']        query = 'test_query'        dicto = defaultdict(dict)        dicto['pageno'] = 1        dicto['language'] = 'fr-FR'        dicto['time_range'] = ''        params = google.request(query, dicto)        self.assertIn('url', params)        self.assertIn(query, params['url'])        self.assertIn('google.fr', params['url'])        self.assertIn('fr', params['url'])        self.assertIn('fr', params['headers']['Accept-Language'])        dicto['language'] = 'en-US'        params = google.request(query, dicto)        self.assertIn('google.com', params['url'])        self.assertIn('en', params['url'])        self.assertIn('en', params['headers']['Accept-Language'])        dicto['language'] = 'zh'        params = google.request(query, dicto)        self.assertIn('google.com', params['url'])        self.assertIn('zh-CN', params['url'])        self.assertIn('zh-CN', params['headers']['Accept-Language'])    def test_response(self):        self.assertRaises(AttributeError, google.response, None)        self.assertRaises(AttributeError, google.response, [])        self.assertRaises(AttributeError, google.response, '')        self.assertRaises(AttributeError, google.response, '[]')        response = self.mock_response('<html></html>')        self.assertEqual(google.response(response), [])        html = """        <div class="g">            <h3 class="r">                <a href="http://this.should.be.the.link/">                    <b>This</b> is <b>the</b> title                </a>            </h3>            <div class="s">                <div class="kv" style="margin-bottom:2px">                    <cite>                        <b>test</b>.psychologies.com/                    </cite>                    <div class="_nBb">                        <div style="display:inline" onclick="google.sham(this);" aria-expanded="false"                            aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">                            <span class="_O0">                            </span>                        </div>                        <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">                            <ul>                                <li class="_Ykb">                                    <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent                                        .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">                                        En cache                                    </a>                                </li>                                <li class="_Ykb">                                    <a class="_Zkb" href="/search?safe=off&q=related:test.psy.com/">                                        Pages similaires                                    </a>                                </li>                            </ul>                        </div>                    </div>                </div>                <span class="st">                    This should be the content.                </span>                <br>                <div class="osl">                    <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">                        Test Personnalité                    </a> -                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">                        Tests - Moi                    </a> -                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">                        Test Couple                    </a>                    -                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">                        Test Amour                    </a>                </div>            </div>        </div>        <div class="g">            <h3 class="r">                <a href="http://www.google.com/images?q=toto">                    <b>This</b>                </a>            </h3>        </div>        <div class="g">            <h3 class="r">                <a href="http://www.google.com/search?q=toto">                    <b>This</b> is                </a>            </h3>        </div>        <div class="g">            <h3 class="r">                <a href="€">                    <b>This</b> is <b>the</b>                </a>            </h3>        </div>        <div class="g">            <h3 class="r">                <a href="/url?q=url">                    <b>This</b> is <b>the</b>                </a>            </h3>        </div>        <p class="_Bmc" style="margin:3px 8px">            <a href="/search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved=">                suggestion <b>title</b>            </a>        </p>        """        response = self.mock_response(html)        results = google.response(response)        self.assertEqual(type(results), list)        self.assertEqual(len(results), 2)        self.assertEqual(results[0]['title'], 'This is the title')        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')        self.assertEqual(results[0]['content'], 'This should be the content.')        self.assertEqual(results[1]['suggestion'], 'suggestion title')        html = """        <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">        </li>        """        response = self.mock_response(html)        results = google.response(response)        self.assertEqual(type(results), list)        self.assertEqual(len(results), 0)        response = mock.Mock(text='<html></html>', url='https://sorry.google.com')        response.search_params = mock.Mock()        response.search_params.get = mock.Mock(return_value='www.google.com')        self.assertRaises(RuntimeWarning, google.response, response)        response = mock.Mock(text='<html></html>', url='https://www.google.com/sorry/IndexRedirect')        response.search_params = mock.Mock()        response.search_params.get = mock.Mock(return_value='www.google.com')        self.assertRaises(RuntimeWarning, google.response, response)    def test_parse_images(self):        html = """        <li>            <div>                <a href="http://www.google.com/url?q=http://this.is.the.url/">                    <img style="margin:3px 0;margin-right:6px;padding:0" height="90"                        src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0">                </a>            </div>        </li>        """        dom = lxml.html.fromstring(html)        results = google.parse_images(dom, 'www.google.com')        self.assertEqual(type(results), list)        self.assertEqual(len(results), 1)        self.assertEqual(results[0]['url'], 'http://this.is.the.url/')        self.assertEqual(results[0]['title'], '')        self.assertEqual(results[0]['content'], '')        self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')    def test_fetch_supported_languages(self):        html = """<html></html>"""        response = mock.Mock(text=html)        languages = google._fetch_supported_languages(response)        self.assertEqual(type(languages), dict)        self.assertEqual(len(languages), 0)        html = u"""        <html>            <body>                <table>                    <tbody>                        <tr>                            <td>                                <font>                                    <label>                                        <span id="ten">English</span>                                    </label>                                </font>                            </td>                            <td>                                <font>                                    <label>                                        <span id="tzh-CN">中文 (简体)</span>                                    </label>                                    <label>                                        <span id="tzh-TW">中文 (繁體)</span>                                    </label>                                </font>                            </td>                        </tr>                    </tbody>                </table>            </body>        </html>        """        response = mock.Mock(text=html)        languages = google._fetch_supported_languages(response)        self.assertEqual(type(languages), dict)        self.assertEqual(len(languages), 3)        self.assertIn('en', languages)        self.assertIn('zh-CN', languages)        self.assertIn('zh-TW', languages)        self.assertEquals(type(languages['en']), dict)        self.assertEquals(type(languages['zh-CN']), dict)        self.assertEquals(type(languages['zh-TW']), dict)        self.assertIn('name', languages['en'])        self.assertIn('name', languages['zh-CN'])        self.assertIn('name', languages['zh-TW'])        self.assertEquals(languages['en']['name'], 'English')        self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')        self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')
 |