| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504 | # -*- coding: utf-8 -*-from json import loadsfrom lxml.html import fromstringfrom collections import defaultdictimport mockfrom searx.engines import wikidatafrom searx.testing import SearxTestCaseclass TestWikidataEngine(SearxTestCase):    def test_request(self):        query = 'test_query'        dicto = defaultdict(dict)        dicto['language'] = 'all'        params = wikidata.request(query, dicto)        self.assertIn('url', params)        self.assertIn(query, params['url'])        self.assertIn('wikidata.org', params['url'])        self.assertIn('en', params['url'])        dicto['language'] = 'es_ES'        params = wikidata.request(query, dicto)        self.assertIn(query, params['url'])        self.assertIn('es', params['url'])    # successful cases are not tested here to avoid sending additional requests    def test_response(self):        self.assertRaises(AttributeError, wikidata.response, None)        self.assertRaises(AttributeError, wikidata.response, [])        self.assertRaises(AttributeError, wikidata.response, '')        self.assertRaises(AttributeError, wikidata.response, '[]')        response = mock.Mock(content='<html></html>', search_params={"language": "all"})        self.assertEqual(wikidata.response(response), [])    def test_getDetail(self):        response = {}        results = wikidata.getDetail(response, "Q123", "en", "en-US")        self.assertEqual(results, [])        title_html = '<div><div class="wikibase-title-label">Test</div></div>'        html = """        <div>            <div class="wikibase-entitytermsview-heading-description">            </div>            <div>                <ul class="wikibase-sitelinklistview-listview">                    <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>                </ul>            </div>        </div>        """        response = {"parse": {"displaytitle": title_html, "text": html}}        results = wikidata.getDetail(response, "Q123", "en", "en-US")        self.assertEqual(len(results), 1)        self.assertEqual(results[0]['url'], 'https://en.wikipedia.org/wiki/Test')        title_html = """        <div>            <div class="wikibase-title-label">                <span lang="en">Test</span>                <sup class="wb-language-fallback-indicator">English</sup>            </div>        </div>        """        html = """        <div>            <div class="wikibase-entitytermsview-heading-description">                <span lang="en">Description</span>                <sup class="wb-language-fallback-indicator">English</sup>            </div>            <div id="P856">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P856">                        <span lang="en">official website</span>                        <sup class="wb-language-fallback-indicator">English</sup>                    </a>                </div>                <div class="wikibase-statementview-mainsnak">                    <a class="external free" href="https://officialsite.com">                        https://officialsite.com                    </a>                </div>            </div>            <div>                <ul class="wikibase-sitelinklistview-listview">                    <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>                </ul>            </div>        </div>        """        response = {"parse": {"displaytitle": title_html, "text": html}}        results = wikidata.getDetail(response, "Q123", "yua", "yua_MX")        self.assertEqual(len(results), 2)        self.assertEqual(results[0]['title'], 'Official website')        self.assertEqual(results[0]['url'], 'https://officialsite.com')        self.assertEqual(results[1]['infobox'], 'Test')        self.assertEqual(results[1]['id'], None)        self.assertEqual(results[1]['content'], 'Description')        self.assertEqual(results[1]['attributes'], [])        self.assertEqual(results[1]['urls'][0]['title'], 'Official website')        self.assertEqual(results[1]['urls'][0]['url'], 'https://officialsite.com')        self.assertEqual(results[1]['urls'][1]['title'], 'Wikipedia (en)')        self.assertEqual(results[1]['urls'][1]['url'], 'https://en.wikipedia.org/wiki/Test')    def test_add_image(self):        image_src = wikidata.add_image(fromstring("<div></div>"))        self.assertEqual(image_src, None)        html = u"""        <div>            <div id="P18">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P18">                        image                    </a>                </div>                <div class="wikibase-statementlistview">                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-rankselector">                            <span class="wikibase-rankselector-normal"></span>                        </div>                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a href="https://commons.wikimedia.org/wiki/File:image.png">                                        image.png                                    </a>                                </div>                            </div>                        </div>                    </div>                </div>            </div>        </div>        """        html_etree = fromstring(html)        image_src = wikidata.add_image(html_etree)        self.assertEqual(image_src,                         "https://commons.wikimedia.org/wiki/Special:FilePath/image.png?width=500&height=400")        html = u"""        <div>            <div id="P2910">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P2910">                        icon                    </a>                </div>                <div class="wikibase-statementlistview">                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-rankselector">                            <span class="wikibase-rankselector-normal"></span>                        </div>                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a href="https://commons.wikimedia.org/wiki/File:icon.png">                                        icon.png                                    </a>                                </div>                            </div>                        </div>                    </div>                </div>            </div>            <div id="P154">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P154">                        logo                    </a>                </div>                <div class="wikibase-statementlistview">                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-rankselector">                            <span class="wikibase-rankselector-normal"></span>                        </div>                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a href="https://commons.wikimedia.org/wiki/File:logo.png">                                        logo.png                                    </a>                                </div>                            </div>                        </div>                    </div>                </div>            </div>        </div>        """        html_etree = fromstring(html)        image_src = wikidata.add_image(html_etree)        self.assertEqual(image_src,                         "https://commons.wikimedia.org/wiki/Special:FilePath/logo.png?width=500&height=400")    def test_add_attribute(self):        html = u"""        <div>            <div id="P27">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P27">                        country of citizenship                    </a>                </div>                <div class="wikibase-statementlistview">                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-rankselector">                            <span class="wikibase-rankselector-normal"></span>                        </div>                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a href="/wiki/Q145">                                        United Kingdom                                    </a>                                </div>                            </div>                        </div>                    </div>                </div>            </div>        </div>        """        attributes = []        html_etree = fromstring(html)        wikidata.add_attribute(attributes, html_etree, "Fail")        self.assertEqual(attributes, [])        wikidata.add_attribute(attributes, html_etree, "P27")        self.assertEqual(len(attributes), 1)        self.assertEqual(attributes[0]["label"], "Country of citizenship")        self.assertEqual(attributes[0]["value"], "United Kingdom")        html = u"""        <div>            <div id="P569">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P569">                        date of birth                    </a>                </div>                <div class="wikibase-statementlistview">                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-rankselector">                            <span class="wikibase-rankselector-normal"></span>                        </div>                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    27 January 1832                                    <sup class="wb-calendar-name">                                        Gregorian                                    </sup>                                </div>                            </div>                        </div>                    </div>                </div>            </div>        </div>        """        attributes = []        html_etree = fromstring(html)        wikidata.add_attribute(attributes, html_etree, "P569", date=True)        self.assertEqual(len(attributes), 1)        self.assertEqual(attributes[0]["label"], "Date of birth")        self.assertEqual(attributes[0]["value"], "27 January 1832")        html = u"""        <div>            <div id="P6">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P27">                        head of government                    </a>                </div>                <div class="wikibase-statementlistview">                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-rankselector">                            <span class="wikibase-rankselector-normal"></span>                        </div>                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a href="/wiki/Q206">                                        Old Prime Minister                                    </a>                                </div>                            </div>                        </div>                    </div>                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-rankselector">                            <span class="wikibase-rankselector-preferred"></span>                        </div>                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a href="/wiki/Q3099714">                                        Actual Prime Minister                                    </a>                                </div>                            </div>                        </div>                    </div>                </div>            </div>        </div>        """        attributes = []        html_etree = fromstring(html)        wikidata.add_attribute(attributes, html_etree, "P6")        self.assertEqual(len(attributes), 1)        self.assertEqual(attributes[0]["label"], "Head of government")        self.assertEqual(attributes[0]["value"], "Old Prime Minister, Actual Prime Minister")        attributes = []        html_etree = fromstring(html)        wikidata.add_attribute(attributes, html_etree, "P6", trim=True)        self.assertEqual(len(attributes), 1)        self.assertEqual(attributes[0]["value"], "Actual Prime Minister")    def test_add_url(self):        html = u"""        <div>            <div id="P856">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P856">                        official website                    </a>                </div>                <div class="wikibase-statementlistview">                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a class="external free" href="https://searx.me">                                        https://searx.me/                                    </a>                                </div>                            </div>                        </div>                    </div>                </div>            </div>        </div>        """        urls = []        html_etree = fromstring(html)        wikidata.add_url(urls, html_etree, 'P856')        self.assertEquals(len(urls), 1)        self.assertIn({'title': 'Official website', 'url': 'https://searx.me/'}, urls)        urls = []        results = []        wikidata.add_url(urls, html_etree, 'P856', 'custom label', results=results)        self.assertEquals(len(urls), 1)        self.assertEquals(len(results), 1)        self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, urls)        self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, results)        html = u"""        <div>            <div id="P856">                <div class="wikibase-statementgroupview-property-label">                    <a href="/wiki/Property:P856">                        official website                    </a>                </div>                <div class="wikibase-statementlistview">                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a class="external free" href="http://www.worldofwarcraft.com">                                        http://www.worldofwarcraft.com                                    </a>                                </div>                            </div>                        </div>                    </div>                    <div class="wikibase-statementview listview-item">                        <div class="wikibase-statementview-mainsnak">                            <div>                                <div class="wikibase-snakview-value">                                    <a class="external free" href="http://eu.battle.net/wow/en/">                                        http://eu.battle.net/wow/en/                                    </a>                                </div>                            </div>                        </div>                    </div>                </div>            </div>        </div>        """        urls = []        html_etree = fromstring(html)        wikidata.add_url(urls, html_etree, 'P856')        self.assertEquals(len(urls), 2)        self.assertIn({'title': 'Official website', 'url': 'http://www.worldofwarcraft.com'}, urls)        self.assertIn({'title': 'Official website', 'url': 'http://eu.battle.net/wow/en/'}, urls)    def test_get_imdblink(self):        html = u"""        <div>            <div class="wikibase-statementview-mainsnak">                <div>                    <div class="wikibase-snakview-value">                        <a class="wb-external-id" href="http://www.imdb.com/tt0433664">                            tt0433664                        </a>                    </div>                </div>            </div>        </div>        """        html_etree = fromstring(html)        imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/')        html = u"""        <div>            <div class="wikibase-statementview-mainsnak">                <div>                    <div class="wikibase-snakview-value">                        <a class="wb-external-id"                           href="href="http://tools.wmflabs.org/...http://www.imdb.com/&id=nm4915994"">                            nm4915994                        </a>                    </div>                </div>            </div>        </div>        """        html_etree = fromstring(html)        imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/')        self.assertIn('https://www.imdb.com/name/nm4915994', imdblink)    def test_get_geolink(self):        html = u"""        <div>            <div class="wikibase-statementview-mainsnak">                <div>                    <div class="wikibase-snakview-value">                        60°N, 40°E                    </div>                </div>            </div>        </div>        """        html_etree = fromstring(html)        geolink = wikidata.get_geolink(html_etree)        self.assertIn('https://www.openstreetmap.org/', geolink)        self.assertIn('lat=60&lon=40', geolink)        html = u"""        <div>            <div class="wikibase-statementview-mainsnak">                <div>                    <div class="wikibase-snakview-value">                        34°35'59"S, 58°22'55"W                    </div>                </div>            </div>        </div>        """        html_etree = fromstring(html)        geolink = wikidata.get_geolink(html_etree)        self.assertIn('https://www.openstreetmap.org/', geolink)        self.assertIn('lat=-34.59', geolink)        self.assertIn('lon=-58.38', geolink)    def test_get_wikilink(self):        html = """        <div>            <div>                <ul class="wikibase-sitelinklistview-listview">                    <li data-wb-siteid="arwiki"><a href="http://ar.wikipedia.org/wiki/Test">Test</a></li>                    <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>                </ul>            </div>            <div>                <ul class="wikibase-sitelinklistview-listview">                    <li data-wb-siteid="enwikiquote"><a href="https://en.wikiquote.org/wiki/Test">Test</a></li>                </ul>            </div>        </div>        """        html_etree = fromstring(html)        wikilink = wikidata.get_wikilink(html_etree, 'nowiki')        self.assertEqual(wikilink, None)        wikilink = wikidata.get_wikilink(html_etree, 'enwiki')        self.assertEqual(wikilink, 'https://en.wikipedia.org/wiki/Test')        wikilink = wikidata.get_wikilink(html_etree, 'arwiki')        self.assertEqual(wikilink, 'https://ar.wikipedia.org/wiki/Test')        wikilink = wikidata.get_wikilink(html_etree, 'enwikiquote')        self.assertEqual(wikilink, 'https://en.wikiquote.org/wiki/Test')
 |