test_yahoo.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. import mock
  4. from searx.engines import yahoo
  5. from searx.testing import SearxTestCase
  6. class TestYahooEngine(SearxTestCase):
  7. def test_parse_url(self):
  8. test_url = 'http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb' +\
  9. '2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=' +\
  10. 'dtcJsfP4mEeBOjnVfUQ-'
  11. url = yahoo.parse_url(test_url)
  12. self.assertEqual('https://this.is.the.url/', url)
  13. test_url = 'http://r.search.yahoo.com/_ylt=A0LElb9JUSKcAEGRXNyoA;_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb' +\
  14. '2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10/RU=https%3a%2f%2fthis.is.the.url%2f/RS=' +\
  15. 'dtcJsfP4mEeBOjnVfUQ-'
  16. url = yahoo.parse_url(test_url)
  17. self.assertEqual('https://this.is.the.url/', url)
  18. test_url = 'https://this.is.the.url/'
  19. url = yahoo.parse_url(test_url)
  20. self.assertEqual('https://this.is.the.url/', url)
  21. def test_request(self):
  22. query = 'test_query'
  23. dicto = defaultdict(dict)
  24. dicto['pageno'] = 1
  25. dicto['language'] = 'fr_FR'
  26. params = yahoo.request(query, dicto)
  27. self.assertIn('url', params)
  28. self.assertIn(query, params['url'])
  29. self.assertIn('search.yahoo.com', params['url'])
  30. self.assertIn('fr', params['url'])
  31. self.assertIn('cookies', params)
  32. self.assertIn('sB', params['cookies'])
  33. self.assertIn('fr', params['cookies']['sB'])
  34. dicto['language'] = 'all'
  35. params = yahoo.request(query, dicto)
  36. self.assertIn('cookies', params)
  37. self.assertIn('sB', params['cookies'])
  38. self.assertIn('en', params['cookies']['sB'])
  39. self.assertIn('en', params['url'])
  40. def test_response(self):
  41. self.assertRaises(AttributeError, yahoo.response, None)
  42. self.assertRaises(AttributeError, yahoo.response, [])
  43. self.assertRaises(AttributeError, yahoo.response, '')
  44. self.assertRaises(AttributeError, yahoo.response, '[]')
  45. response = mock.Mock(text='<html></html>')
  46. self.assertEqual(yahoo.response(response), [])
  47. html = """
  48. <div class="res">
  49. <div>
  50. <h3>
  51. <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
  52. _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
  53. /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
  54. <b>This</b> is the title
  55. </a>
  56. </h3>
  57. </div>
  58. <span class="url" dir="ltr">www.<b>test</b>.com</span>
  59. <div class="abstr">
  60. <b>This</b> is the content
  61. </div>
  62. </div>
  63. <div id="satat" data-bns="Yahoo" data-bk="124.1">
  64. <h2>Also Try</h2>
  65. <table>
  66. <tbody>
  67. <tr>
  68. <td>
  69. <a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" >
  70. <span>
  71. <b></b>This is <b>the suggestion</b>
  72. </span>
  73. </a>
  74. </td>
  75. </tr>
  76. </tbody>
  77. </table>
  78. </div>
  79. """
  80. response = mock.Mock(text=html)
  81. results = yahoo.response(response)
  82. self.assertEqual(type(results), list)
  83. self.assertEqual(len(results), 2)
  84. self.assertEqual(results[0]['title'], 'This is the title')
  85. self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
  86. self.assertEqual(results[0]['content'], 'This is the content')
  87. self.assertEqual(results[1]['suggestion'], 'This is the suggestion')
  88. html = """
  89. <div class="res">
  90. <div>
  91. <h3>
  92. <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
  93. _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
  94. /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
  95. <b>This</b> is the title
  96. </a>
  97. </h3>
  98. </div>
  99. <span class="url" dir="ltr">www.<b>test</b>.com</span>
  100. <div class="abstr">
  101. <b>This</b> is the content
  102. </div>
  103. </div>
  104. <div class="res">
  105. <div>
  106. <h3>
  107. <a id="link-1" class="yschttl spt">
  108. <b>This</b> is the title
  109. </a>
  110. </h3>
  111. </div>
  112. <span class="url" dir="ltr">www.<b>test</b>.com</span>
  113. <div class="abstr">
  114. <b>This</b> is the content
  115. </div>
  116. </div>
  117. <div class="res">
  118. <div>
  119. <h3>
  120. </h3>
  121. </div>
  122. <span class="url" dir="ltr">www.<b>test</b>.com</span>
  123. <div class="abstr">
  124. <b>This</b> is the content
  125. </div>
  126. </div>
  127. """
  128. response = mock.Mock(text=html)
  129. results = yahoo.response(response)
  130. self.assertEqual(type(results), list)
  131. self.assertEqual(len(results), 1)
  132. self.assertEqual(results[0]['title'], 'This is the title')
  133. self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
  134. self.assertEqual(results[0]['content'], 'This is the content')
  135. html = """
  136. <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
  137. </li>
  138. """
  139. response = mock.Mock(text=html)
  140. results = yahoo.response(response)
  141. self.assertEqual(type(results), list)
  142. self.assertEqual(len(results), 0)