test_yahoo_news.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. from datetime import datetime
  4. import mock
  5. from searx.engines import yahoo_news
  6. from searx.testing import SearxTestCase
  7. class TestYahooNewsEngine(SearxTestCase):
  8. def test_request(self):
  9. query = 'test_query'
  10. dicto = defaultdict(dict)
  11. dicto['pageno'] = 1
  12. dicto['language'] = 'fr_FR'
  13. params = yahoo_news.request(query, dicto)
  14. self.assertIn('url', params)
  15. self.assertIn(query, params['url'])
  16. self.assertIn('news.search.yahoo.com', params['url'])
  17. self.assertIn('fr', params['url'])
  18. self.assertIn('cookies', params)
  19. self.assertIn('sB', params['cookies'])
  20. self.assertIn('fr', params['cookies']['sB'])
  21. dicto['language'] = 'all'
  22. params = yahoo_news.request(query, dicto)
  23. self.assertIn('cookies', params)
  24. self.assertIn('sB', params['cookies'])
  25. self.assertIn('en', params['cookies']['sB'])
  26. self.assertIn('en', params['url'])
  27. def test_response(self):
  28. self.assertRaises(AttributeError, yahoo_news.response, None)
  29. self.assertRaises(AttributeError, yahoo_news.response, [])
  30. self.assertRaises(AttributeError, yahoo_news.response, '')
  31. self.assertRaises(AttributeError, yahoo_news.response, '[]')
  32. response = mock.Mock(text='<html></html>')
  33. self.assertEqual(yahoo_news.response(response), [])
  34. html = """
  35. <div class="res">
  36. <div>
  37. <h3>
  38. <a class="yschttl spt" href="http://this.is.the.url" target="_blank">
  39. This is
  40. the <b>title</b>...
  41. </a>
  42. </h3>
  43. </div>
  44. <span class="url">Business via Yahoo! Finance</span> &nbsp; <span class="timestamp">Feb 03 09:45am</span>
  45. <div class="abstr">
  46. This is the content
  47. </div>
  48. </div>
  49. """
  50. response = mock.Mock(text=html)
  51. results = yahoo_news.response(response)
  52. self.assertEqual(type(results), list)
  53. self.assertEqual(len(results), 1)
  54. self.assertEqual(results[0]['title'], 'This is the title...')
  55. self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
  56. self.assertEqual(results[0]['content'], 'This is the content')
  57. html = """
  58. <div class="res">
  59. <div>
  60. <h3>
  61. <a class="yschttl spt" href="http://this.is.the.url" target="_blank">
  62. This is
  63. the <b>title</b>...
  64. </a>
  65. </h3>
  66. </div>
  67. <span class="url">Business via Yahoo!</span> &nbsp; <span class="timestamp">2 hours, 22 minutes ago</span>
  68. <div class="abstr">
  69. This is the content
  70. </div>
  71. </div>
  72. <div class="res">
  73. <div>
  74. <h3>
  75. <a class="yschttl spt" href="http://this.is.the.url" target="_blank">
  76. This is
  77. the <b>title</b>...
  78. </a>
  79. </h3>
  80. </div>
  81. <span class="url">Business via Yahoo!</span> &nbsp; <span class="timestamp">22 minutes ago</span>
  82. <div class="abstr">
  83. This is the content
  84. </div>
  85. </div>
  86. <div class="res">
  87. <div>
  88. <h3>
  89. <a class="yschttl spt" href="http://this.is.the.url" target="_blank">
  90. This is
  91. the <b>title</b>...
  92. </a>
  93. </h3>
  94. </div>
  95. <span class="url">Business via Yahoo!</span> &nbsp; <span class="timestamp">Feb 03 09:45am 1900</span>
  96. <div class="abstr">
  97. This is the content
  98. </div>
  99. </div>
  100. """
  101. response = mock.Mock(text=html)
  102. results = yahoo_news.response(response)
  103. self.assertEqual(type(results), list)
  104. self.assertEqual(len(results), 3)
  105. self.assertEqual(results[0]['title'], 'This is the title...')
  106. self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
  107. self.assertEqual(results[0]['content'], 'This is the content')
  108. self.assertEqual(results[2]['publishedDate'].year, datetime.now().year)
  109. html = """
  110. <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
  111. <div Class="sa_mc">
  112. <div class="sb_tlst">
  113. <h2>
  114. <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
  115. <strong>This</strong> should be the title</a>
  116. </h2>
  117. </div>
  118. <div class="sb_meta">
  119. <cite>
  120. <strong>this</strong>.meta.com</cite>
  121. <span class="c_tlbxTrg">
  122. <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
  123. </span>
  124. </span>
  125. </div>
  126. <p>
  127. <strong>This</strong> should be the content.</p>
  128. </div>
  129. </li>
  130. """
  131. response = mock.Mock(text=html)
  132. results = yahoo_news.response(response)
  133. self.assertEqual(type(results), list)
  134. self.assertEqual(len(results), 0)