test_google.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. import mock
  4. import lxml
  5. from searx.engines import google
  6. from searx.testing import SearxTestCase
  7. class TestGoogleEngine(SearxTestCase):
  8. def mock_response(self, text):
  9. response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr')
  10. response.search_params = mock.Mock()
  11. response.search_params.get = mock.Mock(return_value='www.google.com')
  12. return response
  13. def test_request(self):
  14. google.supported_languages = ['en', 'fr', 'zh-CN', 'iw']
  15. google.language_aliases = {'he': 'iw'}
  16. query = 'test_query'
  17. dicto = defaultdict(dict)
  18. dicto['pageno'] = 1
  19. dicto['language'] = 'fr-FR'
  20. dicto['time_range'] = ''
  21. params = google.request(query, dicto)
  22. self.assertIn('url', params)
  23. self.assertIn(query, params['url'])
  24. self.assertIn('google.fr', params['url'])
  25. self.assertIn('fr', params['url'])
  26. self.assertIn('fr', params['headers']['Accept-Language'])
  27. dicto['language'] = 'en-US'
  28. params = google.request(query, dicto)
  29. self.assertIn('google.com', params['url'])
  30. self.assertIn('en', params['url'])
  31. self.assertIn('en', params['headers']['Accept-Language'])
  32. dicto['language'] = 'zh'
  33. params = google.request(query, dicto)
  34. self.assertIn('google.com', params['url'])
  35. self.assertIn('zh-CN', params['url'])
  36. self.assertIn('zh-CN', params['headers']['Accept-Language'])
  37. dicto['language'] = 'he'
  38. params = google.request(query, dicto)
  39. self.assertIn('google.com', params['url'])
  40. self.assertIn('iw', params['url'])
  41. self.assertIn('iw', params['headers']['Accept-Language'])
  42. def test_response(self):
  43. self.assertRaises(AttributeError, google.response, None)
  44. self.assertRaises(AttributeError, google.response, [])
  45. self.assertRaises(AttributeError, google.response, '')
  46. self.assertRaises(AttributeError, google.response, '[]')
  47. response = self.mock_response('<html></html>')
  48. self.assertEqual(google.response(response), [])
  49. html = """
  50. <div class="ZINbbc xpd O9g5cc uUPGi">
  51. <div>
  52. <div class="kCrYT">
  53. <a href="/url?q=http://this.should.be.the.link/">
  54. <div class="BNeawe">
  55. <b>This</b> is <b>the</b> title
  56. </div>
  57. <div class="BNeawe">
  58. http://website
  59. </div>
  60. </a>
  61. </div>
  62. <div class="kCrYT">
  63. <div>
  64. <div class="BNeawe">
  65. <div>
  66. <div class="BNeawe">
  67. This should be the content.
  68. </div>
  69. </div>
  70. </div>
  71. </div>
  72. </div>
  73. </div>
  74. </p>
  75. <div class="ZINbbc xpd O9g5cc uUPGi">
  76. <div>
  77. <div class="kCrYT">
  78. <span>
  79. <div class="BNeawe">
  80. Related searches
  81. </div>
  82. </span>
  83. </div>
  84. <div class="rVLSBd">
  85. <a>
  86. <div>
  87. <div class="BNeawe">
  88. suggestion title
  89. </div>
  90. </div>
  91. </a>
  92. </div>
  93. </div>
  94. </p>
  95. """
  96. response = self.mock_response(html)
  97. results = google.response(response)
  98. self.assertEqual(type(results), list)
  99. self.assertEqual(len(results), 2)
  100. self.assertEqual(results[0]['title'], 'This is the title')
  101. self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
  102. self.assertEqual(results[0]['content'], 'This should be the content.')
  103. self.assertEqual(results[1]['suggestion'], 'suggestion title')
  104. html = """
  105. <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
  106. </li>
  107. """
  108. response = self.mock_response(html)
  109. results = google.response(response)
  110. self.assertEqual(type(results), list)
  111. self.assertEqual(len(results), 0)
  112. response = mock.Mock(text='<html></html>', url='https://sorry.google.com')
  113. response.search_params = mock.Mock()
  114. response.search_params.get = mock.Mock(return_value='www.google.com')
  115. self.assertRaises(RuntimeWarning, google.response, response)
  116. response = mock.Mock(text='<html></html>', url='https://www.google.com/sorry/IndexRedirect')
  117. response.search_params = mock.Mock()
  118. response.search_params.get = mock.Mock(return_value='www.google.com')
  119. self.assertRaises(RuntimeWarning, google.response, response)
  120. def test_parse_images(self):
  121. html = """
  122. <li>
  123. <div>
  124. <a href="http://www.google.com/url?q=http://this.is.the.url/">
  125. <img style="margin:3px 0;margin-right:6px;padding:0" height="90"
  126. src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0">
  127. </a>
  128. </div>
  129. </li>
  130. """
  131. dom = lxml.html.fromstring(html)
  132. results = google.parse_images(dom, 'www.google.com')
  133. self.assertEqual(type(results), list)
  134. self.assertEqual(len(results), 1)
  135. self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
  136. self.assertEqual(results[0]['title'], '')
  137. self.assertEqual(results[0]['content'], '')
  138. self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
  139. def test_fetch_supported_languages(self):
  140. html = """<html></html>"""
  141. response = mock.Mock(text=html)
  142. languages = google._fetch_supported_languages(response)
  143. self.assertEqual(type(languages), dict)
  144. self.assertEqual(len(languages), 0)
  145. html = u"""
  146. <html>
  147. <body>
  148. <div id="langSec">
  149. <div>
  150. <input name="lr" data-name="english" value="lang_en" />
  151. <input name="lr" data-name="中文 (简体)" value="lang_zh-CN" />
  152. <input name="lr" data-name="中文 (繁體)" value="lang_zh-TW" />
  153. </div>
  154. </div>
  155. </body>
  156. </html>
  157. """
  158. response = mock.Mock(text=html)
  159. languages = google._fetch_supported_languages(response)
  160. self.assertEqual(type(languages), dict)
  161. self.assertEqual(len(languages), 3)
  162. self.assertIn('en', languages)
  163. self.assertIn('zh-CN', languages)
  164. self.assertIn('zh-TW', languages)
  165. self.assertEquals(type(languages['en']), dict)
  166. self.assertEquals(type(languages['zh-CN']), dict)
  167. self.assertEquals(type(languages['zh-TW']), dict)
  168. self.assertIn('name', languages['en'])
  169. self.assertIn('name', languages['zh-CN'])
  170. self.assertIn('name', languages['zh-TW'])
  171. self.assertEquals(languages['en']['name'], 'English')
  172. self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
  173. self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')