test_google.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. import mock
  4. import lxml
  5. from searx.engines import google
  6. from searx.testing import SearxTestCase
  7. class TestGoogleEngine(SearxTestCase):
  8. def mock_response(self, text):
  9. response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr')
  10. response.search_params = mock.Mock()
  11. response.search_params.get = mock.Mock(return_value='www.google.com')
  12. return response
  13. def test_request(self):
  14. google.supported_languages = ['en', 'fr', 'zh-CN', 'iw']
  15. google.language_aliases = {'he': 'iw'}
  16. query = 'test_query'
  17. dicto = defaultdict(dict)
  18. dicto['pageno'] = 1
  19. dicto['language'] = 'fr-FR'
  20. dicto['time_range'] = ''
  21. params = google.request(query, dicto)
  22. self.assertIn('url', params)
  23. self.assertIn(query, params['url'])
  24. self.assertIn('google.fr', params['url'])
  25. self.assertIn('fr', params['url'])
  26. self.assertIn('fr', params['headers']['Accept-Language'])
  27. dicto['language'] = 'en-US'
  28. params = google.request(query, dicto)
  29. self.assertIn('google.com', params['url'])
  30. self.assertIn('en', params['url'])
  31. self.assertIn('en', params['headers']['Accept-Language'])
  32. dicto['language'] = 'zh'
  33. params = google.request(query, dicto)
  34. self.assertIn('google.com', params['url'])
  35. self.assertIn('zh-CN', params['url'])
  36. self.assertIn('zh-CN', params['headers']['Accept-Language'])
  37. dicto['language'] = 'he'
  38. params = google.request(query, dicto)
  39. self.assertIn('google.com', params['url'])
  40. self.assertIn('iw', params['url'])
  41. self.assertIn('iw', params['headers']['Accept-Language'])
  42. def test_response(self):
  43. self.assertRaises(AttributeError, google.response, None)
  44. self.assertRaises(AttributeError, google.response, [])
  45. self.assertRaises(AttributeError, google.response, '')
  46. self.assertRaises(AttributeError, google.response, '[]')
  47. response = self.mock_response('<html></html>')
  48. self.assertEqual(google.response(response), [])
  49. html = """
  50. <div class="g">
  51. <h3 class="r">
  52. <a href="http://this.should.be.the.link/">
  53. <b>This</b> is <b>the</b> title
  54. </a>
  55. </h3>
  56. <div class="s">
  57. <div class="kv" style="margin-bottom:2px">
  58. <cite>
  59. <b>test</b>.psychologies.com/
  60. </cite>
  61. <div class="_nBb">‎
  62. <div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
  63. aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
  64. <span class="_O0">
  65. </span>
  66. </div>
  67. <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">
  68. <ul>
  69. <li class="_Ykb">
  70. <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent
  71. .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">
  72. En cache
  73. </a>
  74. </li>
  75. <li class="_Ykb">
  76. <a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/">
  77. Pages similaires
  78. </a>
  79. </li>
  80. </ul>
  81. </div>
  82. </div>
  83. </div>
  84. <span class="st">
  85. This should be the content.
  86. </span>
  87. <br>
  88. <div class="osl">‎
  89. <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">
  90. Test Personnalité
  91. </a> - ‎
  92. <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">
  93. Tests - Moi
  94. </a> - ‎
  95. <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">
  96. Test Couple
  97. </a>
  98. - ‎
  99. <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">
  100. Test Amour
  101. </a>
  102. </div>
  103. </div>
  104. </div>
  105. <div class="g">
  106. <h3 class="r">
  107. <a href="http://www.google.com/images?q=toto">
  108. <b>This</b>
  109. </a>
  110. </h3>
  111. </div>
  112. <div class="g">
  113. <h3 class="r">
  114. <a href="http://www.google.com/search?q=toto">
  115. <b>This</b> is
  116. </a>
  117. </h3>
  118. </div>
  119. <div class="g">
  120. <h3 class="r">
  121. <a href="€">
  122. <b>This</b> is <b>the</b>
  123. </a>
  124. </h3>
  125. </div>
  126. <div class="g">
  127. <h3 class="r">
  128. <a href="/url?q=url">
  129. <b>This</b> is <b>the</b>
  130. </a>
  131. </h3>
  132. </div>
  133. <p class="_Bmc" style="margin:3px 8px">
  134. <a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved=">
  135. suggestion <b>title</b>
  136. </a>
  137. </p>
  138. """
  139. response = self.mock_response(html)
  140. results = google.response(response)
  141. self.assertEqual(type(results), list)
  142. self.assertEqual(len(results), 2)
  143. self.assertEqual(results[0]['title'], 'This is the title')
  144. self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
  145. self.assertEqual(results[0]['content'], 'This should be the content.')
  146. self.assertEqual(results[1]['suggestion'], 'suggestion title')
  147. html = """
  148. <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
  149. </li>
  150. """
  151. response = self.mock_response(html)
  152. results = google.response(response)
  153. self.assertEqual(type(results), list)
  154. self.assertEqual(len(results), 0)
  155. response = mock.Mock(text='<html></html>', url='https://sorry.google.com')
  156. response.search_params = mock.Mock()
  157. response.search_params.get = mock.Mock(return_value='www.google.com')
  158. self.assertRaises(RuntimeWarning, google.response, response)
  159. response = mock.Mock(text='<html></html>', url='https://www.google.com/sorry/IndexRedirect')
  160. response.search_params = mock.Mock()
  161. response.search_params.get = mock.Mock(return_value='www.google.com')
  162. self.assertRaises(RuntimeWarning, google.response, response)
  163. def test_parse_images(self):
  164. html = """
  165. <li>
  166. <div>
  167. <a href="http://www.google.com/url?q=http://this.is.the.url/">
  168. <img style="margin:3px 0;margin-right:6px;padding:0" height="90"
  169. src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0">
  170. </a>
  171. </div>
  172. </li>
  173. """
  174. dom = lxml.html.fromstring(html)
  175. results = google.parse_images(dom, 'www.google.com')
  176. self.assertEqual(type(results), list)
  177. self.assertEqual(len(results), 1)
  178. self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
  179. self.assertEqual(results[0]['title'], '')
  180. self.assertEqual(results[0]['content'], '')
  181. self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
  182. def test_fetch_supported_languages(self):
  183. html = """<html></html>"""
  184. response = mock.Mock(text=html)
  185. languages = google._fetch_supported_languages(response)
  186. self.assertEqual(type(languages), dict)
  187. self.assertEqual(len(languages), 0)
  188. html = u"""
  189. <html>
  190. <body>
  191. <div id="langSec">
  192. <div>
  193. <input name="lr" data-name="english" value="lang_en" />
  194. <input name="lr" data-name="中文 (简体)" value="lang_zh-CN" />
  195. <input name="lr" data-name="中文 (繁體)" value="lang_zh-TW" />
  196. </div>
  197. </div>
  198. </body>
  199. </html>
  200. """
  201. response = mock.Mock(text=html)
  202. languages = google._fetch_supported_languages(response)
  203. self.assertEqual(type(languages), dict)
  204. self.assertEqual(len(languages), 3)
  205. self.assertIn('en', languages)
  206. self.assertIn('zh-CN', languages)
  207. self.assertIn('zh-TW', languages)
  208. self.assertEquals(type(languages['en']), dict)
  209. self.assertEquals(type(languages['zh-CN']), dict)
  210. self.assertEquals(type(languages['zh-TW']), dict)
  211. self.assertIn('name', languages['en'])
  212. self.assertIn('name', languages['zh-CN'])
  213. self.assertIn('name', languages['zh-TW'])
  214. self.assertEquals(languages['en']['name'], 'English')
  215. self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
  216. self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')