test_yahoo.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. import mock
  4. from searx.engines import yahoo
  5. from searx.testing import SearxTestCase
  6. class TestYahooEngine(SearxTestCase):
  7. def test_parse_url(self):
  8. test_url = 'http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb' +\
  9. '2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=' +\
  10. 'dtcJsfP4mEeBOjnVfUQ-'
  11. url = yahoo.parse_url(test_url)
  12. self.assertEqual('https://this.is.the.url/', url)
  13. test_url = 'http://r.search.yahoo.com/_ylt=A0LElb9JUSKcAEGRXNyoA;_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb' +\
  14. '2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10/RU=https%3a%2f%2fthis.is.the.url%2f/RS=' +\
  15. 'dtcJsfP4mEeBOjnVfUQ-'
  16. url = yahoo.parse_url(test_url)
  17. self.assertEqual('https://this.is.the.url/', url)
  18. test_url = 'https://this.is.the.url/'
  19. url = yahoo.parse_url(test_url)
  20. self.assertEqual('https://this.is.the.url/', url)
  21. def test_request(self):
  22. yahoo.supported_languages = ['en', 'fr', 'zh-CHT', 'zh-CHS']
  23. query = 'test_query'
  24. dicto = defaultdict(dict)
  25. dicto['pageno'] = 1
  26. dicto['time_range'] = ''
  27. dicto['language'] = 'fr-FR'
  28. params = yahoo.request(query, dicto)
  29. self.assertIn('url', params)
  30. self.assertIn(query, params['url'])
  31. self.assertIn('search.yahoo.com', params['url'])
  32. self.assertIn('fr', params['url'])
  33. self.assertIn('cookies', params)
  34. self.assertIn('sB', params['cookies'])
  35. self.assertIn('fr', params['cookies']['sB'])
  36. dicto['language'] = 'zh'
  37. params = yahoo.request(query, dicto)
  38. self.assertIn('zh_chs', params['url'])
  39. self.assertIn('zh_chs', params['cookies']['sB'])
  40. dicto['language'] = 'zh-TW'
  41. params = yahoo.request(query, dicto)
  42. self.assertIn('zh_cht', params['url'])
  43. self.assertIn('zh_cht', params['cookies']['sB'])
  44. dicto['language'] = 'all'
  45. params = yahoo.request(query, dicto)
  46. self.assertIn('cookies', params)
  47. self.assertIn('sB', params['cookies'])
  48. self.assertIn('en', params['cookies']['sB'])
  49. self.assertIn('en', params['url'])
  50. def test_no_url_in_request_year_time_range(self):
  51. dicto = defaultdict(dict)
  52. query = 'test_query'
  53. dicto['time_range'] = 'year'
  54. params = yahoo.request(query, dicto)
  55. self.assertEqual({}, params['url'])
  56. def test_response(self):
  57. self.assertRaises(AttributeError, yahoo.response, None)
  58. self.assertRaises(AttributeError, yahoo.response, [])
  59. self.assertRaises(AttributeError, yahoo.response, '')
  60. self.assertRaises(AttributeError, yahoo.response, '[]')
  61. response = mock.Mock(text='<html></html>')
  62. self.assertEqual(yahoo.response(response), [])
  63. html = """
  64. <ol class="reg mb-15 searchCenterMiddle">
  65. <li class="first">
  66. <div class="dd algo fst Sr">
  67. <div class="compTitle">
  68. <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
  69. _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
  70. /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
  71. target="_blank" data-bid="54e712e13671c">
  72. <b><b>This is the title</b></b></a>
  73. </h3>
  74. </div>
  75. <div class="compText aAbs">
  76. <p class="lh-18"><b><b>This is the </b>content</b>
  77. </p>
  78. </div>
  79. </div>
  80. </li>
  81. <li>
  82. <div class="dd algo lst Sr">
  83. <div class="compTitle">
  84. </div>
  85. <div class="compText aAbs">
  86. <p class="lh-18">This is the second content</p>
  87. </div>
  88. </div>
  89. </li>
  90. </ol>
  91. <div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04">
  92. <div class="compTitle mb-4 h-17">
  93. <h3 class="title">Also Try</h3> </div>
  94. <table class="compTable m-0 ac-1st td-u fz-ms">
  95. <tbody>
  96. <tr>
  97. <td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a>
  98. </td>
  99. </tr>
  100. </table>
  101. </div>
  102. """
  103. response = mock.Mock(text=html)
  104. results = yahoo.response(response)
  105. self.assertEqual(type(results), list)
  106. self.assertEqual(len(results), 2)
  107. self.assertEqual(results[0]['title'], 'This is the title')
  108. self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
  109. self.assertEqual(results[0]['content'], 'This is the content')
  110. self.assertEqual(results[1]['suggestion'], 'This is the suggestion')
  111. html = """
  112. <ol class="reg mb-15 searchCenterMiddle">
  113. <li class="first">
  114. <div class="dd algo fst Sr">
  115. <div class="compTitle">
  116. <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
  117. _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
  118. /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
  119. target="_blank" data-bid="54e712e13671c">
  120. <b><b>This is the title</b></b></a>
  121. </h3>
  122. </div>
  123. <div class="compText aAbs">
  124. <p class="lh-18"><b><b>This is the </b>content</b>
  125. </p>
  126. </div>
  127. </div>
  128. </li>
  129. </ol>
  130. """
  131. response = mock.Mock(text=html)
  132. results = yahoo.response(response)
  133. self.assertEqual(type(results), list)
  134. self.assertEqual(len(results), 1)
  135. self.assertEqual(results[0]['title'], 'This is the title')
  136. self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
  137. self.assertEqual(results[0]['content'], 'This is the content')
  138. html = """
  139. <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
  140. </li>
  141. """
  142. response = mock.Mock(text=html)
  143. results = yahoo.response(response)
  144. self.assertEqual(type(results), list)
  145. self.assertEqual(len(results), 0)
  146. def test_fetch_supported_languages(self):
  147. html = """<html></html>"""
  148. response = mock.Mock(text=html)
  149. results = yahoo._fetch_supported_languages(response)
  150. self.assertEqual(type(results), list)
  151. self.assertEqual(len(results), 0)
  152. html = """
  153. <html>
  154. <div>
  155. <div id="yschlang">
  156. <span>
  157. <label><input value="lang_ar"></input></label>
  158. </span>
  159. <span>
  160. <label><input value="lang_zh_chs"></input></label>
  161. <label><input value="lang_zh_cht"></input></label>
  162. </span>
  163. </div>
  164. </div>
  165. </html>
  166. """
  167. response = mock.Mock(text=html)
  168. languages = yahoo._fetch_supported_languages(response)
  169. self.assertEqual(type(languages), list)
  170. self.assertEqual(len(languages), 3)
  171. self.assertIn('ar', languages)
  172. self.assertIn('zh-CHS', languages)
  173. self.assertIn('zh-CHT', languages)