| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 | # -*- coding: utf-8 -*-from collections import defaultdictimport mockimport lxmlfrom searx.engines import googlefrom searx.testing import SearxTestCaseclass TestGoogleEngine(SearxTestCase):    def mock_response(self, text):        response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1')        response.search_params = mock.Mock()        response.search_params.get = mock.Mock(return_value='www.google.com')        return response    def test_request(self):        query = 'test_query'        dicto = defaultdict(dict)        dicto['pageno'] = 1        dicto['language'] = 'fr_FR'        params = google.request(query, dicto)        self.assertIn('url', params)        self.assertIn(query, params['url'])        self.assertIn('google.fr', params['url'])        self.assertNotIn('PREF', params['cookies'])        self.assertIn('NID', params['cookies'])        self.assertIn('fr', params['headers']['Accept-Language'])        dicto['language'] = 'all'        params = google.request(query, dicto)        self.assertIn('google.com', params['url'])        self.assertIn('en', params['headers']['Accept-Language'])        self.assertIn('PREF', params['cookies'])        self.assertIn('NID', params['cookies'])    def test_response(self):        self.assertRaises(AttributeError, google.response, None)        self.assertRaises(AttributeError, google.response, [])        self.assertRaises(AttributeError, google.response, '')        self.assertRaises(AttributeError, google.response, '[]')        response = self.mock_response('<html></html>')        self.assertEqual(google.response(response), [])        html = """        <li class="g">            <h3 class="r">                <a href="http://this.should.be.the.link/">                    <b>This</b> is <b>the</b> title                </a>            </h3>            <div class="s">                <div class="kv" style="margin-bottom:2px">                    <cite>                        <b>test</b>.psychologies.com/                    </cite>                    <div class="_nBb">                        <div style="display:inline" onclick="google.sham(this);" aria-expanded="false"                            aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">                            <span class="_O0">                            </span>                        </div>                        <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">                            <ul>                                <li class="_Ykb">                                    <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent                                        .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">                                        En cache                                    </a>                                </li>                                <li class="_Ykb">                                    <a class="_Zkb" href="/search?safe=off&q=related:test.psy.com/">                                        Pages similaires                                    </a>                                </li>                            </ul>                        </div>                    </div>                </div>                <span class="st">                    This should be the content.                </span>                <br>                <div class="osl">                    <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">                        Test Personnalité                    </a> -                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">                        Tests - Moi                    </a> -                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">                        Test Couple                    </a>                    -                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">                        Test Amour                    </a>                </div>            </div>        </li>        <li class="g">            <h3 class="r">                <a href="http://www.google.com/images?q=toto">                    <b>This</b>                </a>            </h3>        </li>        <li class="g">            <h3 class="r">                <a href="http://www.google.com/search?q=toto">                    <b>This</b> is                </a>            </h3>        </li>        <li class="g">            <h3 class="r">                <a href="€">                    <b>This</b> is <b>the</b>                </a>            </h3>        </li>        <li class="g">            <h3 class="r">                <a href="/url?q=url">                    <b>This</b> is <b>the</b>                </a>            </h3>        </li>        <p class="_Bmc" style="margin:3px 8px">            <a href="/search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved=">                suggestion <b>title</b>            </a>        </p>        """        response = self.mock_response(html)        results = google.response(response)        self.assertEqual(type(results), list)        self.assertEqual(len(results), 2)        self.assertEqual(results[0]['title'], 'This is the title')        self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')        self.assertEqual(results[0]['content'], 'This should be the content.')        self.assertEqual(results[1]['suggestion'], 'suggestion title')        html = """        <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">        </li>        """        response = self.mock_response(html)        results = google.response(response)        self.assertEqual(type(results), list)        self.assertEqual(len(results), 0)        response = mock.Mock(text='<html></html>', url='https://sorry.google.com')        response.search_params = mock.Mock()        response.search_params.get = mock.Mock(return_value='www.google.com')        self.assertRaises(RuntimeWarning, google.response, response)        response = mock.Mock(text='<html></html>', url='https://www.google.com/sorry/IndexRedirect')        response.search_params = mock.Mock()        response.search_params.get = mock.Mock(return_value='www.google.com')        self.assertRaises(RuntimeWarning, google.response, response)    def test_parse_images(self):        html = """        <li>            <div>                <a href="http://www.google.com/url?q=http://this.is.the.url/">                    <img style="margin:3px 0;margin-right:6px;padding:0" height="90"                        src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0">                </a>            </div>        </li>        """        dom = lxml.html.fromstring(html)        results = google.parse_images(dom, 'www.google.com')        self.assertEqual(type(results), list)        self.assertEqual(len(results), 1)        self.assertEqual(results[0]['url'], 'http://this.is.the.url/')        self.assertEqual(results[0]['title'], '')        self.assertEqual(results[0]['content'], '')        self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
 |