test_wikidata.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. # -*- coding: utf-8 -*-
  2. from lxml.html import fromstring
  3. from collections import defaultdict
  4. import mock
  5. from searx.engines import wikidata
  6. from searx.testing import SearxTestCase
  7. class TestWikidataEngine(SearxTestCase):
  8. def test_request(self):
  9. query = 'test_query'
  10. dicto = defaultdict(dict)
  11. dicto['language'] = 'all'
  12. params = wikidata.request(query, dicto)
  13. self.assertIn('url', params)
  14. self.assertIn(query, params['url'])
  15. self.assertIn('wikidata.org', params['url'])
  16. dicto['language'] = 'es_ES'
  17. params = wikidata.request(query, dicto)
  18. self.assertIn(query, params['url'])
  19. # successful cases are not tested here to avoid sending additional requests
  20. def test_response(self):
  21. self.assertRaises(AttributeError, wikidata.response, None)
  22. self.assertRaises(AttributeError, wikidata.response, [])
  23. self.assertRaises(AttributeError, wikidata.response, '')
  24. self.assertRaises(AttributeError, wikidata.response, '[]')
  25. wikidata.supported_languages = ['en', 'es']
  26. wikidata.language_aliases = {}
  27. response = mock.Mock(text='<html></html>', search_params={"language": "en"})
  28. self.assertEqual(wikidata.response(response), [])
  29. def test_getDetail(self):
  30. response = {}
  31. results = wikidata.getDetail(response, "Q123", "en", "en-US")
  32. self.assertEqual(results, [])
  33. title_html = '<div><div class="wikibase-title-label">Test</div></div>'
  34. html = """
  35. <div>
  36. <div class="wikibase-entitytermsview-heading-description">
  37. </div>
  38. <div>
  39. <ul class="wikibase-sitelinklistview-listview">
  40. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  41. </ul>
  42. </div>
  43. </div>
  44. """
  45. response = {"parse": {"displaytitle": title_html, "text": html}}
  46. results = wikidata.getDetail(response, "Q123", "en", "en-US")
  47. self.assertEqual(len(results), 1)
  48. self.assertEqual(results[0]['url'], 'https://en.wikipedia.org/wiki/Test')
  49. title_html = """
  50. <div>
  51. <div class="wikibase-title-label">
  52. <span lang="en">Test</span>
  53. <sup class="wb-language-fallback-indicator">English</sup>
  54. </div>
  55. </div>
  56. """
  57. html = """
  58. <div>
  59. <div class="wikibase-entitytermsview-heading-description">
  60. <span lang="en">Description</span>
  61. <sup class="wb-language-fallback-indicator">English</sup>
  62. </div>
  63. <div id="P856">
  64. <div class="wikibase-statementgroupview-property-label">
  65. <a href="/wiki/Property:P856">
  66. <span lang="en">official website</span>
  67. <sup class="wb-language-fallback-indicator">English</sup>
  68. </a>
  69. </div>
  70. <div class="wikibase-statementview-mainsnak">
  71. <a class="external free" href="https://officialsite.com">
  72. https://officialsite.com
  73. </a>
  74. </div>
  75. </div>
  76. <div>
  77. <ul class="wikibase-sitelinklistview-listview">
  78. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  79. </ul>
  80. </div>
  81. </div>
  82. """
  83. response = {"parse": {"displaytitle": title_html, "text": html}}
  84. results = wikidata.getDetail(response, "Q123", "yua", "yua_MX")
  85. self.assertEqual(len(results), 2)
  86. self.assertEqual(results[0]['title'], 'Official website')
  87. self.assertEqual(results[0]['url'], 'https://officialsite.com')
  88. self.assertEqual(results[1]['infobox'], 'Test')
  89. self.assertEqual(results[1]['id'], None)
  90. self.assertEqual(results[1]['content'], 'Description')
  91. self.assertEqual(results[1]['attributes'], [])
  92. self.assertEqual(results[1]['urls'][0]['title'], 'Official website')
  93. self.assertEqual(results[1]['urls'][0]['url'], 'https://officialsite.com')
  94. self.assertEqual(results[1]['urls'][1]['title'], 'Wikipedia (en)')
  95. self.assertEqual(results[1]['urls'][1]['url'], 'https://en.wikipedia.org/wiki/Test')
  96. def test_add_image(self):
  97. image_src = wikidata.add_image(fromstring("<div></div>"))
  98. self.assertEqual(image_src, None)
  99. html = u"""
  100. <div>
  101. <div id="P18">
  102. <div class="wikibase-statementgroupview-property-label">
  103. <a href="/wiki/Property:P18">
  104. image
  105. </a>
  106. </div>
  107. <div class="wikibase-statementlistview">
  108. <div class="wikibase-statementview listview-item">
  109. <div class="wikibase-statementview-rankselector">
  110. <span class="wikibase-rankselector-normal"></span>
  111. </div>
  112. <div class="wikibase-statementview-mainsnak">
  113. <div>
  114. <div class="wikibase-snakview-value">
  115. <div class="commons-media-caption">
  116. <a href="https://commons.wikimedia.org/wiki/File:image.png">image.png</a>
  117. <br/>2,687 &#215; 3,356; 1.22 MB
  118. </div>
  119. </div>
  120. </div>
  121. </div>
  122. </div>
  123. </div>
  124. </div>
  125. </div>
  126. """
  127. html_etree = fromstring(html)
  128. image_src = wikidata.add_image(html_etree)
  129. self.assertEqual(image_src,
  130. "https://commons.wikimedia.org/wiki/Special:FilePath/image.png?width=500&height=400")
  131. html = u"""
  132. <div>
  133. <div id="P2910">
  134. <div class="wikibase-statementgroupview-property-label">
  135. <a href="/wiki/Property:P2910">
  136. icon
  137. </a>
  138. </div>
  139. <div class="wikibase-statementlistview">
  140. <div class="wikibase-statementview listview-item">
  141. <div class="wikibase-statementview-rankselector">
  142. <span class="wikibase-rankselector-normal"></span>
  143. </div>
  144. <div class="wikibase-statementview-mainsnak">
  145. <div>
  146. <div class="wikibase-snakview-value">
  147. <div class="commons-media-caption">
  148. <a href="https://commons.wikimedia.org/wiki/File:icon.png">icon.png</a>
  149. <br/>671 &#215; 671; 18 KB</div>
  150. </div>
  151. </div>
  152. </div>
  153. </div>
  154. </div>
  155. </div>
  156. </div>
  157. <div id="P154">
  158. <div class="wikibase-statementgroupview-property-label">
  159. <a href="/wiki/Property:P154">
  160. logo
  161. </a>
  162. </div>
  163. <div class="wikibase-statementlistview">
  164. <div class="wikibase-statementview listview-item">
  165. <div class="wikibase-statementview-rankselector">
  166. <span class="wikibase-rankselector-normal"></span>
  167. </div>
  168. <div class="wikibase-statementview-mainsnak">
  169. <div>
  170. <div class="wikibase-snakview-value">
  171. <div class="commons-media-caption">
  172. <a href="https://commons.wikimedia.org/wiki/File:logo.png">logo.png</a>
  173. <br/>170 &#215; 170; 1 KB
  174. </div>
  175. </div>
  176. </div>
  177. </div>
  178. </div>
  179. </div>
  180. </div>
  181. </div>
  182. """
  183. html_etree = fromstring(html)
  184. image_src = wikidata.add_image(html_etree)
  185. self.assertEqual(image_src,
  186. "https://commons.wikimedia.org/wiki/Special:FilePath/logo.png?width=500&height=400")
  187. def test_add_attribute(self):
  188. html = u"""
  189. <div>
  190. <div id="P27">
  191. <div class="wikibase-statementgroupview-property-label">
  192. <a href="/wiki/Property:P27">
  193. country of citizenship
  194. </a>
  195. </div>
  196. <div class="wikibase-statementlistview">
  197. <div class="wikibase-statementview listview-item">
  198. <div class="wikibase-statementview-rankselector">
  199. <span class="wikibase-rankselector-normal"></span>
  200. </div>
  201. <div class="wikibase-statementview-mainsnak">
  202. <div>
  203. <div class="wikibase-snakview-value">
  204. <a href="/wiki/Q145">
  205. United Kingdom
  206. </a>
  207. </div>
  208. </div>
  209. </div>
  210. </div>
  211. </div>
  212. </div>
  213. </div>
  214. """
  215. attributes = []
  216. html_etree = fromstring(html)
  217. wikidata.add_attribute(attributes, html_etree, "Fail")
  218. self.assertEqual(attributes, [])
  219. wikidata.add_attribute(attributes, html_etree, "P27")
  220. self.assertEqual(len(attributes), 1)
  221. self.assertEqual(attributes[0]["label"], "Country of citizenship")
  222. self.assertEqual(attributes[0]["value"], "United Kingdom")
  223. html = u"""
  224. <div>
  225. <div id="P569">
  226. <div class="wikibase-statementgroupview-property-label">
  227. <a href="/wiki/Property:P569">
  228. date of birth
  229. </a>
  230. </div>
  231. <div class="wikibase-statementlistview">
  232. <div class="wikibase-statementview listview-item">
  233. <div class="wikibase-statementview-rankselector">
  234. <span class="wikibase-rankselector-normal"></span>
  235. </div>
  236. <div class="wikibase-statementview-mainsnak">
  237. <div>
  238. <div class="wikibase-snakview-value">
  239. 27 January 1832
  240. <sup class="wb-calendar-name">
  241. Gregorian
  242. </sup>
  243. </div>
  244. </div>
  245. </div>
  246. </div>
  247. </div>
  248. </div>
  249. </div>
  250. """
  251. attributes = []
  252. html_etree = fromstring(html)
  253. wikidata.add_attribute(attributes, html_etree, "P569", date=True)
  254. self.assertEqual(len(attributes), 1)
  255. self.assertEqual(attributes[0]["label"], "Date of birth")
  256. self.assertEqual(attributes[0]["value"], "27 January 1832")
  257. html = u"""
  258. <div>
  259. <div id="P6">
  260. <div class="wikibase-statementgroupview-property-label">
  261. <a href="/wiki/Property:P27">
  262. head of government
  263. </a>
  264. </div>
  265. <div class="wikibase-statementlistview">
  266. <div class="wikibase-statementview listview-item">
  267. <div class="wikibase-statementview-rankselector">
  268. <span class="wikibase-rankselector-normal"></span>
  269. </div>
  270. <div class="wikibase-statementview-mainsnak">
  271. <div>
  272. <div class="wikibase-snakview-value">
  273. <a href="/wiki/Q206">
  274. Old Prime Minister
  275. </a>
  276. </div>
  277. </div>
  278. </div>
  279. </div>
  280. <div class="wikibase-statementview listview-item">
  281. <div class="wikibase-statementview-rankselector">
  282. <span class="wikibase-rankselector-preferred"></span>
  283. </div>
  284. <div class="wikibase-statementview-mainsnak">
  285. <div>
  286. <div class="wikibase-snakview-value">
  287. <a href="/wiki/Q3099714">
  288. Actual Prime Minister
  289. </a>
  290. </div>
  291. </div>
  292. </div>
  293. </div>
  294. </div>
  295. </div>
  296. </div>
  297. """
  298. attributes = []
  299. html_etree = fromstring(html)
  300. wikidata.add_attribute(attributes, html_etree, "P6")
  301. self.assertEqual(len(attributes), 1)
  302. self.assertEqual(attributes[0]["label"], "Head of government")
  303. self.assertEqual(attributes[0]["value"], "Old Prime Minister, Actual Prime Minister")
  304. attributes = []
  305. html_etree = fromstring(html)
  306. wikidata.add_attribute(attributes, html_etree, "P6", trim=True)
  307. self.assertEqual(len(attributes), 1)
  308. self.assertEqual(attributes[0]["value"], "Actual Prime Minister")
  309. def test_add_url(self):
  310. html = u"""
  311. <div>
  312. <div id="P856">
  313. <div class="wikibase-statementgroupview-property-label">
  314. <a href="/wiki/Property:P856">
  315. official website
  316. </a>
  317. </div>
  318. <div class="wikibase-statementlistview">
  319. <div class="wikibase-statementview listview-item">
  320. <div class="wikibase-statementview-mainsnak">
  321. <div>
  322. <div class="wikibase-snakview-value">
  323. <a class="external free" href="https://searx.me">
  324. https://searx.me/
  325. </a>
  326. </div>
  327. </div>
  328. </div>
  329. </div>
  330. </div>
  331. </div>
  332. </div>
  333. """
  334. urls = []
  335. html_etree = fromstring(html)
  336. wikidata.add_url(urls, html_etree, 'P856')
  337. self.assertEquals(len(urls), 1)
  338. self.assertIn({'title': 'Official website', 'url': 'https://searx.me/'}, urls)
  339. urls = []
  340. results = []
  341. wikidata.add_url(urls, html_etree, 'P856', 'custom label', results=results)
  342. self.assertEquals(len(urls), 1)
  343. self.assertEquals(len(results), 1)
  344. self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, urls)
  345. self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, results)
  346. html = u"""
  347. <div>
  348. <div id="P856">
  349. <div class="wikibase-statementgroupview-property-label">
  350. <a href="/wiki/Property:P856">
  351. official website
  352. </a>
  353. </div>
  354. <div class="wikibase-statementlistview">
  355. <div class="wikibase-statementview listview-item">
  356. <div class="wikibase-statementview-mainsnak">
  357. <div>
  358. <div class="wikibase-snakview-value">
  359. <a class="external free" href="http://www.worldofwarcraft.com">
  360. http://www.worldofwarcraft.com
  361. </a>
  362. </div>
  363. </div>
  364. </div>
  365. </div>
  366. <div class="wikibase-statementview listview-item">
  367. <div class="wikibase-statementview-mainsnak">
  368. <div>
  369. <div class="wikibase-snakview-value">
  370. <a class="external free" href="http://eu.battle.net/wow/en/">
  371. http://eu.battle.net/wow/en/
  372. </a>
  373. </div>
  374. </div>
  375. </div>
  376. </div>
  377. </div>
  378. </div>
  379. </div>
  380. """
  381. urls = []
  382. html_etree = fromstring(html)
  383. wikidata.add_url(urls, html_etree, 'P856')
  384. self.assertEquals(len(urls), 2)
  385. self.assertIn({'title': 'Official website', 'url': 'http://www.worldofwarcraft.com'}, urls)
  386. self.assertIn({'title': 'Official website', 'url': 'http://eu.battle.net/wow/en/'}, urls)
  387. def test_get_imdblink(self):
  388. html = u"""
  389. <div>
  390. <div class="wikibase-statementview-mainsnak">
  391. <div>
  392. <div class="wikibase-snakview-value">
  393. <a class="wb-external-id" href="http://www.imdb.com/tt0433664">
  394. tt0433664
  395. </a>
  396. </div>
  397. </div>
  398. </div>
  399. </div>
  400. """
  401. html_etree = fromstring(html)
  402. imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/')
  403. html = u"""
  404. <div>
  405. <div class="wikibase-statementview-mainsnak">
  406. <div>
  407. <div class="wikibase-snakview-value">
  408. <a class="wb-external-id"
  409. href="href="http://tools.wmflabs.org/...http://www.imdb.com/&id=nm4915994"">
  410. nm4915994
  411. </a>
  412. </div>
  413. </div>
  414. </div>
  415. </div>
  416. """
  417. html_etree = fromstring(html)
  418. imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/')
  419. self.assertIn('https://www.imdb.com/name/nm4915994', imdblink)
  420. def test_get_geolink(self):
  421. html = u"""
  422. <div>
  423. <div class="wikibase-statementview-mainsnak">
  424. <div>
  425. <div class="wikibase-snakview-value">
  426. 60°N, 40°E
  427. </div>
  428. </div>
  429. </div>
  430. </div>
  431. """
  432. html_etree = fromstring(html)
  433. geolink = wikidata.get_geolink(html_etree)
  434. self.assertIn('https://www.openstreetmap.org/', geolink)
  435. self.assertIn('lat=60&lon=40', geolink)
  436. html = u"""
  437. <div>
  438. <div class="wikibase-statementview-mainsnak">
  439. <div>
  440. <div class="wikibase-snakview-value">
  441. 34°35'59"S, 58°22'55"W
  442. </div>
  443. </div>
  444. </div>
  445. </div>
  446. """
  447. html_etree = fromstring(html)
  448. geolink = wikidata.get_geolink(html_etree)
  449. self.assertIn('https://www.openstreetmap.org/', geolink)
  450. self.assertIn('lat=-34.59', geolink)
  451. self.assertIn('lon=-58.38', geolink)
  452. def test_get_wikilink(self):
  453. html = """
  454. <div>
  455. <div>
  456. <ul class="wikibase-sitelinklistview-listview">
  457. <li data-wb-siteid="arwiki"><a href="http://ar.wikipedia.org/wiki/Test">Test</a></li>
  458. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  459. </ul>
  460. </div>
  461. <div>
  462. <ul class="wikibase-sitelinklistview-listview">
  463. <li data-wb-siteid="enwikiquote"><a href="https://en.wikiquote.org/wiki/Test">Test</a></li>
  464. </ul>
  465. </div>
  466. </div>
  467. """
  468. html_etree = fromstring(html)
  469. wikilink = wikidata.get_wikilink(html_etree, 'nowiki')
  470. self.assertEqual(wikilink, None)
  471. wikilink = wikidata.get_wikilink(html_etree, 'enwiki')
  472. self.assertEqual(wikilink, 'https://en.wikipedia.org/wiki/Test')
  473. wikilink = wikidata.get_wikilink(html_etree, 'arwiki')
  474. self.assertEqual(wikilink, 'https://ar.wikipedia.org/wiki/Test')
  475. wikilink = wikidata.get_wikilink(html_etree, 'enwikiquote')
  476. self.assertEqual(wikilink, 'https://en.wikiquote.org/wiki/Test')