test_google.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. import mock
  4. import lxml
  5. from searx.engines import google
  6. from searx.testing import SearxTestCase
  7. class TestGoogleEngine(SearxTestCase):
  8. def test_request(self):
  9. query = 'test_query'
  10. dicto = defaultdict(dict)
  11. dicto['pageno'] = 1
  12. dicto['language'] = 'fr_FR'
  13. params = google.request(query, dicto)
  14. self.assertIn('url', params)
  15. self.assertIn(query, params['url'])
  16. self.assertIn('google.com', params['url'])
  17. self.assertIn('PREF', params['cookies'])
  18. self.assertIn('fr', params['headers']['Accept-Language'])
  19. dicto['language'] = 'all'
  20. params = google.request(query, dicto)
  21. self.assertIn('en', params['headers']['Accept-Language'])
  22. def test_response(self):
  23. self.assertRaises(AttributeError, google.response, None)
  24. self.assertRaises(AttributeError, google.response, [])
  25. self.assertRaises(AttributeError, google.response, '')
  26. self.assertRaises(AttributeError, google.response, '[]')
  27. response = mock.Mock(text='<html></html>')
  28. self.assertEqual(google.response(response), [])
  29. html = """
  30. <li class="g">
  31. <h3 class="r">
  32. <a href="http://this.should.be.the.link/">
  33. <b>This</b> is <b>the</b> title
  34. </a>
  35. </h3>
  36. <div class="s">
  37. <div class="kv" style="margin-bottom:2px">
  38. <cite>
  39. <b>test</b>.psychologies.com/
  40. </cite>
  41. <div class="_nBb">‎
  42. <div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
  43. aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
  44. <span class="_O0">
  45. </span>
  46. </div>
  47. <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">
  48. <ul>
  49. <li class="_Ykb">
  50. <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent
  51. .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">
  52. En cache
  53. </a>
  54. </li>
  55. <li class="_Ykb">
  56. <a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/">
  57. Pages similaires
  58. </a>
  59. </li>
  60. </ul>
  61. </div>
  62. </div>
  63. </div>
  64. <span class="st">
  65. This should be the content.
  66. </span>
  67. <br>
  68. <div class="osl">‎
  69. <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">
  70. Test Personnalité
  71. </a> - ‎
  72. <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">
  73. Tests - Moi
  74. </a> - ‎
  75. <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">
  76. Test Couple
  77. </a>
  78. - ‎
  79. <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">
  80. Test Amour
  81. </a>
  82. </div>
  83. </div>
  84. </li>
  85. <li class="g">
  86. <h3 class="r">
  87. <a href="http://www.google.com/images?q=toto">
  88. <b>This</b>
  89. </a>
  90. </h3>
  91. </li>
  92. <li class="g">
  93. <h3 class="r">
  94. <a href="http://www.google.com/search?q=toto">
  95. <b>This</b> is
  96. </a>
  97. </h3>
  98. </li>
  99. <li class="g">
  100. <h3 class="r">
  101. <a href="€">
  102. <b>This</b> is <b>the</b>
  103. </a>
  104. </h3>
  105. </li>
  106. <li class="g">
  107. <h3 class="r">
  108. <a href="/url?q=url">
  109. <b>This</b> is <b>the</b>
  110. </a>
  111. </h3>
  112. </li>
  113. <p class="_Bmc" style="margin:3px 8px">
  114. <a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved=">
  115. suggestion <b>title</b>
  116. </a>
  117. </p>
  118. """
  119. response = mock.Mock(text=html)
  120. results = google.response(response)
  121. self.assertEqual(type(results), list)
  122. self.assertEqual(len(results), 2)
  123. self.assertEqual(results[0]['title'], 'This is the title')
  124. self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
  125. self.assertEqual(results[0]['content'], 'This should be the content.')
  126. self.assertEqual(results[1]['suggestion'], 'suggestion title')
  127. html = """
  128. <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
  129. </li>
  130. """
  131. response = mock.Mock(text=html)
  132. results = google.response(response)
  133. self.assertEqual(type(results), list)
  134. self.assertEqual(len(results), 0)
  135. def test_parse_images(self):
  136. html = """
  137. <li>
  138. <div>
  139. <a href="http://www.google.com/url?q=http://this.is.the.url/">
  140. <img style="margin:3px 0;margin-right:6px;padding:0" height="90"
  141. src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0">
  142. </a>
  143. </div>
  144. </li>
  145. """
  146. dom = lxml.html.fromstring(html)
  147. results = google.parse_images(dom)
  148. self.assertEqual(type(results), list)
  149. self.assertEqual(len(results), 1)
  150. self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
  151. self.assertEqual(results[0]['title'], '')
  152. self.assertEqual(results[0]['content'], '')
  153. self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')