test_yahoo_news.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. from datetime import datetime
  4. import mock
  5. from searx.engines import yahoo_news
  6. from searx.testing import SearxTestCase
  7. class TestYahooNewsEngine(SearxTestCase):
  8. def test_request(self):
  9. yahoo_news.supported_languages = ['en', 'fr']
  10. query = 'test_query'
  11. dicto = defaultdict(dict)
  12. dicto['pageno'] = 1
  13. dicto['language'] = 'fr-FR'
  14. params = yahoo_news.request(query, dicto)
  15. self.assertIn('url', params)
  16. self.assertIn(query, params['url'])
  17. self.assertIn('news.search.yahoo.com', params['url'])
  18. self.assertIn('fr', params['url'])
  19. self.assertIn('cookies', params)
  20. self.assertIn('sB', params['cookies'])
  21. self.assertIn('fr', params['cookies']['sB'])
  22. dicto['language'] = 'all'
  23. params = yahoo_news.request(query, dicto)
  24. self.assertIn('cookies', params)
  25. self.assertIn('sB', params['cookies'])
  26. self.assertIn('en', params['cookies']['sB'])
  27. self.assertIn('en', params['url'])
  28. def test_sanitize_url(self):
  29. url = "test.url"
  30. self.assertEqual(url, yahoo_news.sanitize_url(url))
  31. url = "www.yahoo.com/;_ylt=test"
  32. self.assertEqual("www.yahoo.com/", yahoo_news.sanitize_url(url))
  33. def test_response(self):
  34. self.assertRaises(AttributeError, yahoo_news.response, None)
  35. self.assertRaises(AttributeError, yahoo_news.response, [])
  36. self.assertRaises(AttributeError, yahoo_news.response, '')
  37. self.assertRaises(AttributeError, yahoo_news.response, '[]')
  38. response = mock.Mock(text='<html></html>')
  39. self.assertEqual(yahoo_news.response(response), [])
  40. html = """
  41. <ol class=" reg searchCenterMiddle">
  42. <li class="first">
  43. <div class="compTitle">
  44. <h3>
  45. <a class="yschttl spt" href="http://this.is.the.url" target="_blank">
  46. This is
  47. the <b>title</b>...
  48. </a>
  49. </h3>
  50. </div>
  51. <div>
  52. <span class="cite">Business via Yahoo!</span>
  53. <span class="tri fc-2nd ml-10">May 01 10:00 AM</span>
  54. </div>
  55. <div class="compText">
  56. This is the content
  57. </div>
  58. </li>
  59. <li class="first">
  60. <div class="compTitle">
  61. <h3>
  62. <a class="yschttl spt" target="_blank">
  63. </a>
  64. </h3>
  65. </div>
  66. <div class="compText">
  67. </div>
  68. </li>
  69. </ol>
  70. """
  71. response = mock.Mock(text=html)
  72. results = yahoo_news.response(response)
  73. self.assertEqual(type(results), list)
  74. self.assertEqual(len(results), 1)
  75. self.assertEqual(results[0]['title'], 'This is the title...')
  76. self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
  77. self.assertEqual(results[0]['content'], 'This is the content')
  78. html = """
  79. <ol class=" reg searchCenterMiddle">
  80. <li class="first">
  81. <div class="compTitle">
  82. <h3>
  83. <a class="yschttl spt" href="http://this.is.the.url" target="_blank">
  84. This is
  85. the <b>title</b>...
  86. </a>
  87. </h3>
  88. </div>
  89. <div>
  90. <span class="cite">Business via Yahoo!</span>
  91. <span class="tri fc-2nd ml-10">2 hours, 22 minutes ago</span>
  92. </div>
  93. <div class="compText">
  94. This is the content
  95. </div>
  96. </li>
  97. <li>
  98. <div class="compTitle">
  99. <h3>
  100. <a class="yschttl spt" href="http://this.is.the.url" target="_blank">
  101. This is
  102. the <b>title</b>...
  103. </a>
  104. </h3>
  105. </div>
  106. <div>
  107. <span class="cite">Business via Yahoo!</span>
  108. <span class="tri fc-2nd ml-10">22 minutes ago</span>
  109. </div>
  110. <div class="compText">
  111. This is the content
  112. </div>
  113. </li>
  114. <li>
  115. <div class="compTitle">
  116. <h3>
  117. <a class="yschttl spt" href="http://this.is.the.url" target="_blank">
  118. This is
  119. the <b>title</b>...
  120. </a>
  121. </h3>
  122. </div>
  123. <div>
  124. <span class="cite">Business via Yahoo!</span>
  125. <span class="tri fc-2nd ml-10">Feb 03 09:45AM 1900</span>
  126. </div>
  127. <div class="compText">
  128. This is the content
  129. </div>
  130. </li>
  131. </ol>
  132. """
  133. response = mock.Mock(text=html)
  134. results = yahoo_news.response(response)
  135. self.assertEqual(type(results), list)
  136. self.assertEqual(len(results), 3)
  137. self.assertEqual(results[0]['title'], 'This is the title...')
  138. self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
  139. self.assertEqual(results[0]['content'], 'This is the content')
  140. self.assertEqual(results[2]['publishedDate'].year, datetime.now().year)