wikidata.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692
  1. # -*- coding: utf-8 -*-
  2. """
  3. Wikidata
  4. @website https://wikidata.org
  5. @provide-api yes (https://query.wikidata.org/)
  6. @using-api yes
  7. @results JSON
  8. @stable yes
  9. @parse url, infobox
  10. """
  11. from urllib.parse import urlencode
  12. from json import loads
  13. from dateutil.parser import isoparse
  14. from babel.dates import format_datetime, format_date, format_time, get_datetime_format
  15. from searx import logger
  16. from searx.data import WIKIDATA_UNITS
  17. from searx.poolrequests import post, get
  18. from searx.utils import match_language, searx_useragent, get_string_replaces_function
  19. from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
  20. from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
  21. logger = logger.getChild('wikidata')
  22. # SPARQL
  23. SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
  24. SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
  25. WIKIDATA_PROPERTIES = {
  26. 'P434': 'MusicBrainz',
  27. 'P435': 'MusicBrainz',
  28. 'P436': 'MusicBrainz',
  29. 'P966': 'MusicBrainz',
  30. 'P345': 'IMDb',
  31. 'P2397': 'YouTube',
  32. 'P1651': 'YouTube',
  33. 'P2002': 'Twitter',
  34. 'P2013': 'Facebook',
  35. 'P2003': 'Instagram',
  36. }
  37. # SERVICE wikibase:mwapi : https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI
  38. # SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE
  39. # https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates
  40. # https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model
  41. # optmization:
  42. # * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization
  43. # * https://github.com/blazegraph/database/wiki/QueryHints
  44. QUERY_TEMPLATE = """
  45. SELECT ?item ?itemLabel ?itemDescription ?lat ?long %SELECT%
  46. WHERE
  47. {
  48. SERVICE wikibase:mwapi {
  49. bd:serviceParam wikibase:endpoint "www.wikidata.org";
  50. wikibase:api "EntitySearch";
  51. wikibase:limit 1;
  52. mwapi:search "%QUERY%";
  53. mwapi:language "%LANGUAGE%".
  54. ?item wikibase:apiOutputItem mwapi:item.
  55. }
  56. %WHERE%
  57. SERVICE wikibase:label {
  58. bd:serviceParam wikibase:language "%LANGUAGE%,en".
  59. ?item rdfs:label ?itemLabel .
  60. ?item schema:description ?itemDescription .
  61. %WIKIBASE_LABELS%
  62. }
  63. }
  64. GROUP BY ?item ?itemLabel ?itemDescription ?lat ?long %GROUP_BY%
  65. """
  66. # Get the calendar names and the property names
  67. QUERY_PROPERTY_NAMES = """
  68. SELECT ?item ?name
  69. WHERE {
  70. {
  71. SELECT ?item
  72. WHERE { ?item wdt:P279* wd:Q12132 }
  73. } UNION {
  74. VALUES ?item { %ATTRIBUTES% }
  75. }
  76. OPTIONAL { ?item rdfs:label ?name. }
  77. }
  78. """
  79. # https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1
  80. # https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html
  81. sparql_string_escape = get_string_replaces_function({'\t': '\\\t',
  82. '\n': '\\\n',
  83. '\r': '\\\r',
  84. '\b': '\\\b',
  85. '\f': '\\\f',
  86. '\"': '\\\"',
  87. '\'': '\\\'',
  88. '\\': '\\\\'})
  89. replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
  90. def get_headers():
  91. # user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits
  92. return {
  93. 'Accept': 'application/sparql-results+json',
  94. 'User-Agent': searx_useragent()
  95. }
  96. def get_label_for_entity(entity_id, language):
  97. name = WIKIDATA_PROPERTIES.get(entity_id)
  98. if name is None:
  99. name = WIKIDATA_PROPERTIES.get((entity_id, language))
  100. if name is None:
  101. name = WIKIDATA_PROPERTIES.get((entity_id, language.split('-')[0]))
  102. if name is None:
  103. name = WIKIDATA_PROPERTIES.get((entity_id, 'en'))
  104. if name is None:
  105. name = entity_id
  106. return name
  107. def send_wikidata_query(query, method='GET'):
  108. if method == 'GET':
  109. # query will be cached by wikidata
  110. http_response = get(SPARQL_ENDPOINT_URL + '?' + urlencode({'query': query}), headers=get_headers())
  111. else:
  112. # query won't be cached by wikidata
  113. http_response = post(SPARQL_ENDPOINT_URL, data={'query': query}, headers=get_headers())
  114. if http_response.status_code != 200:
  115. logger.debug('SPARQL endpoint error %s', http_response.content.decode())
  116. logger.debug('request time %s', str(http_response.elapsed))
  117. http_response.raise_for_status()
  118. return loads(http_response.content.decode())
  119. def request(query, params):
  120. language = params['language'].split('-')[0]
  121. if language == 'all':
  122. language = 'en'
  123. else:
  124. language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
  125. query, attributes = get_query(query, language)
  126. params['method'] = 'POST'
  127. params['url'] = SPARQL_ENDPOINT_URL
  128. params['data'] = {'query': query}
  129. params['headers'] = get_headers()
  130. params['language'] = language
  131. params['attributes'] = attributes
  132. return params
  133. def response(resp):
  134. results = []
  135. jsonresponse = loads(resp.content.decode())
  136. language = resp.search_params['language'].lower()
  137. attributes = resp.search_params['attributes']
  138. seen_entities = set()
  139. for result in jsonresponse.get('results', {}).get('bindings', []):
  140. attribute_result = {key: value['value'] for key, value in result.items()}
  141. entity_url = attribute_result['item']
  142. if entity_url not in seen_entities:
  143. seen_entities.add(entity_url)
  144. results += get_results(attribute_result, attributes, language)
  145. else:
  146. logger.debug('The SPARQL request returns duplicate entities: %s', str(attribute_result))
  147. return results
  148. def get_results(attribute_result, attributes, language):
  149. results = []
  150. infobox_title = attribute_result.get('itemLabel')
  151. infobox_id = attribute_result['item']
  152. infobox_id_lang = None
  153. infobox_urls = []
  154. infobox_attributes = []
  155. infobox_content = attribute_result.get('itemDescription', [])
  156. img_src = None
  157. img_src_priority = 100
  158. for attribute in attributes:
  159. value = attribute.get_str(attribute_result, language)
  160. if value is not None and value != '':
  161. attribute_type = type(attribute)
  162. if attribute_type in (WDURLAttribute, WDArticle):
  163. # get_select() method : there is group_concat(distinct ...;separator=", ")
  164. # split the value here
  165. for url in value.split(', '):
  166. infobox_urls.append({'title': attribute.get_label(language), 'url': url, **attribute.kwargs})
  167. # "normal" results (not infobox) include official website and Wikipedia links.
  168. if attribute.kwargs.get('official') or attribute_type == WDArticle:
  169. results.append({'title': infobox_title, 'url': url})
  170. # update the infobox_id with the wikipedia URL
  171. # first the local wikipedia URL, and as fallback the english wikipedia URL
  172. if attribute_type == WDArticle\
  173. and ((attribute.language == 'en' and infobox_id_lang is None)
  174. or attribute.language != 'en'):
  175. infobox_id_lang = attribute.language
  176. infobox_id = url
  177. elif attribute_type == WDImageAttribute:
  178. # this attribute is an image.
  179. # replace the current image only the priority is lower
  180. # (the infobox contain only one image).
  181. if attribute.priority < img_src_priority:
  182. img_src = value
  183. img_src_priority = attribute.priority
  184. elif attribute_type == WDGeoAttribute:
  185. # geocoordinate link
  186. # use the area to get the OSM zoom
  187. # Note: ignre the unit (must be km² otherwise the calculation is wrong)
  188. # Should use normalized value p:P2046/psn:P2046/wikibase:quantityAmount
  189. area = attribute_result.get('P2046')
  190. osm_zoom = area_to_osm_zoom(area) if area else 19
  191. url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom)
  192. if url:
  193. infobox_urls.append({'title': attribute.get_label(language),
  194. 'url': url,
  195. 'entity': attribute.name})
  196. else:
  197. infobox_attributes.append({'label': attribute.get_label(language),
  198. 'value': value,
  199. 'entity': attribute.name})
  200. if infobox_id:
  201. infobox_id = replace_http_by_https(infobox_id)
  202. # add the wikidata URL at the end
  203. infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
  204. if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and\
  205. len(infobox_content) == 0:
  206. results.append({
  207. 'url': infobox_urls[0]['url'],
  208. 'title': infobox_title,
  209. 'content': infobox_content
  210. })
  211. else:
  212. results.append({
  213. 'infobox': infobox_title,
  214. 'id': infobox_id,
  215. 'content': infobox_content,
  216. 'img_src': img_src,
  217. 'urls': infobox_urls,
  218. 'attributes': infobox_attributes
  219. })
  220. return results
  221. def get_query(query, language):
  222. attributes = get_attributes(language)
  223. select = [a.get_select() for a in attributes]
  224. where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes]))
  225. wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes]))
  226. group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes]))
  227. query = QUERY_TEMPLATE\
  228. .replace('%QUERY%', sparql_string_escape(query))\
  229. .replace('%SELECT%', ' '.join(select))\
  230. .replace('%WHERE%', '\n '.join(where))\
  231. .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))\
  232. .replace('%GROUP_BY%', ' '.join(group_by))\
  233. .replace('%LANGUAGE%', language)
  234. return query, attributes
  235. def get_attributes(language):
  236. attributes = []
  237. def add_value(name):
  238. attributes.append(WDAttribute(name))
  239. def add_amount(name):
  240. attributes.append(WDAmountAttribute(name))
  241. def add_label(name):
  242. attributes.append(WDLabelAttribute(name))
  243. def add_url(name, url_id=None, **kwargs):
  244. attributes.append(WDURLAttribute(name, url_id, kwargs))
  245. def add_image(name, url_id=None, priority=1):
  246. attributes.append(WDImageAttribute(name, url_id, priority))
  247. def add_date(name):
  248. attributes.append(WDDateAttribute(name))
  249. # Dates
  250. for p in ['P571', # inception date
  251. 'P576', # dissolution date
  252. 'P580', # start date
  253. 'P582', # end date
  254. 'P569', # date of birth
  255. 'P570', # date of death
  256. 'P619', # date of spacecraft launch
  257. 'P620']: # date of spacecraft landing
  258. add_date(p)
  259. for p in ['P27', # country of citizenship
  260. 'P495', # country of origin
  261. 'P17', # country
  262. 'P159']: # headquarters location
  263. add_label(p)
  264. # Places
  265. for p in ['P36', # capital
  266. 'P35', # head of state
  267. 'P6', # head of government
  268. 'P122', # basic form of government
  269. 'P37']: # official language
  270. add_label(p)
  271. add_value('P1082') # population
  272. add_amount('P2046') # area
  273. add_amount('P281') # postal code
  274. add_label('P38') # currency
  275. add_amount('P2048') # heigth (building)
  276. # Media
  277. for p in ['P400', # platform (videogames, computing)
  278. 'P50', # author
  279. 'P170', # creator
  280. 'P57', # director
  281. 'P175', # performer
  282. 'P178', # developer
  283. 'P162', # producer
  284. 'P176', # manufacturer
  285. 'P58', # screenwriter
  286. 'P272', # production company
  287. 'P264', # record label
  288. 'P123', # publisher
  289. 'P449', # original network
  290. 'P750', # distributed by
  291. 'P86']: # composer
  292. add_label(p)
  293. add_date('P577') # publication date
  294. add_label('P136') # genre (music, film, artistic...)
  295. add_label('P364') # original language
  296. add_value('P212') # ISBN-13
  297. add_value('P957') # ISBN-10
  298. add_label('P275') # copyright license
  299. add_label('P277') # programming language
  300. add_value('P348') # version
  301. add_label('P840') # narrative location
  302. # Languages
  303. add_value('P1098') # number of speakers
  304. add_label('P282') # writing system
  305. add_label('P1018') # language regulatory body
  306. add_value('P218') # language code (ISO 639-1)
  307. # Other
  308. add_label('P169') # ceo
  309. add_label('P112') # founded by
  310. add_label('P1454') # legal form (company, organization)
  311. add_label('P137') # operator (service, facility, ...)
  312. add_label('P1029') # crew members (tripulation)
  313. add_label('P225') # taxon name
  314. add_value('P274') # chemical formula
  315. add_label('P1346') # winner (sports, contests, ...)
  316. add_value('P1120') # number of deaths
  317. add_value('P498') # currency code (ISO 4217)
  318. # URL
  319. add_url('P856', official=True) # official website
  320. attributes.append(WDArticle(language)) # wikipedia (user language)
  321. if not language.startswith('en'):
  322. attributes.append(WDArticle('en')) # wikipedia (english)
  323. add_url('P1324') # source code repository
  324. add_url('P1581') # blog
  325. add_url('P434', url_id='musicbrainz_artist')
  326. add_url('P435', url_id='musicbrainz_work')
  327. add_url('P436', url_id='musicbrainz_release_group')
  328. add_url('P966', url_id='musicbrainz_label')
  329. add_url('P345', url_id='imdb_id')
  330. add_url('P2397', url_id='youtube_channel')
  331. add_url('P1651', url_id='youtube_video')
  332. add_url('P2002', url_id='twitter_profile')
  333. add_url('P2013', url_id='facebook_profile')
  334. add_url('P2003', url_id='instagram_profile')
  335. # Map
  336. attributes.append(WDGeoAttribute('P625'))
  337. # Image
  338. add_image('P15', priority=1, url_id='wikimedia_image') # route map
  339. add_image('P242', priority=2, url_id='wikimedia_image') # locator map
  340. add_image('P154', priority=3, url_id='wikimedia_image') # logo
  341. add_image('P18', priority=4, url_id='wikimedia_image') # image
  342. add_image('P41', priority=5, url_id='wikimedia_image') # flag
  343. add_image('P2716', priority=6, url_id='wikimedia_image') # collage
  344. add_image('P2910', priority=7, url_id='wikimedia_image') # icon
  345. return attributes
  346. class WDAttribute:
  347. __slots__ = 'name',
  348. def __init__(self, name):
  349. self.name = name
  350. def get_select(self):
  351. return '(group_concat(distinct ?{name};separator=", ") as ?{name}s)'.replace('{name}', self.name)
  352. def get_label(self, language):
  353. return get_label_for_entity(self.name, language)
  354. def get_where(self):
  355. return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace('{name}', self.name)
  356. def get_wikibase_label(self):
  357. return ""
  358. def get_group_by(self):
  359. return ""
  360. def get_str(self, result, language):
  361. return result.get(self.name + 's')
  362. def __repr__(self):
  363. return '<' + str(type(self).__name__) + ':' + self.name + '>'
  364. class WDAmountAttribute(WDAttribute):
  365. def get_select(self):
  366. return '?{name} ?{name}Unit'.replace('{name}', self.name)
  367. def get_where(self):
  368. return """ OPTIONAL { ?item p:{name} ?{name}Node .
  369. ?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} .
  370. OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace('{name}', self.name)
  371. def get_group_by(self):
  372. return self.get_select()
  373. def get_str(self, result, language):
  374. value = result.get(self.name)
  375. unit = result.get(self.name + "Unit")
  376. if unit is not None:
  377. unit = unit.replace('http://www.wikidata.org/entity/', '')
  378. return value + " " + get_label_for_entity(unit, language)
  379. return value
  380. class WDArticle(WDAttribute):
  381. __slots__ = 'language', 'kwargs'
  382. def __init__(self, language, kwargs=None):
  383. super().__init__('wikipedia')
  384. self.language = language
  385. self.kwargs = kwargs or {}
  386. def get_label(self, language):
  387. # language parameter is ignored
  388. return "Wikipedia ({language})".replace('{language}', self.language)
  389. def get_select(self):
  390. return "?article{language} ?articleName{language}".replace('{language}', self.language)
  391. def get_where(self):
  392. return """OPTIONAL { ?article{language} schema:about ?item ;
  393. schema:inLanguage "{language}" ;
  394. schema:isPartOf <https://{language}.wikipedia.org/> ;
  395. schema:name ?articleName{language} . }""".replace('{language}', self.language)
  396. def get_group_by(self):
  397. return self.get_select()
  398. def get_str(self, result, language):
  399. key = 'article{language}'.replace('{language}', self.language)
  400. return result.get(key)
  401. class WDLabelAttribute(WDAttribute):
  402. def get_select(self):
  403. return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name)
  404. def get_where(self):
  405. return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace('{name}', self.name)
  406. def get_wikibase_label(self):
  407. return "?{name} rdfs:label ?{name}Label .".replace('{name}', self.name)
  408. def get_str(self, result, language):
  409. return result.get(self.name + 'Labels')
  410. class WDURLAttribute(WDAttribute):
  411. HTTP_WIKIMEDIA_IMAGE = 'http://commons.wikimedia.org/wiki/Special:FilePath/'
  412. __slots__ = 'url_id', 'kwargs'
  413. def __init__(self, name, url_id=None, kwargs=None):
  414. super().__init__(name)
  415. self.url_id = url_id
  416. self.kwargs = kwargs
  417. def get_str(self, result, language):
  418. value = result.get(self.name + 's')
  419. if self.url_id and value is not None and value != '':
  420. value = value.split(',')[0]
  421. url_id = self.url_id
  422. if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):
  423. value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):]
  424. url_id = 'wikimedia_image'
  425. return get_external_url(url_id, value)
  426. return value
  427. class WDGeoAttribute(WDAttribute):
  428. def get_label(self, language):
  429. return "OpenStreetMap"
  430. def get_select(self):
  431. return "?{name}Lat ?{name}Long".replace('{name}', self.name)
  432. def get_where(self):
  433. return """OPTIONAL { ?item p:{name}/psv:{name} [
  434. wikibase:geoLatitude ?{name}Lat ;
  435. wikibase:geoLongitude ?{name}Long ] }""".replace('{name}', self.name)
  436. def get_group_by(self):
  437. return self.get_select()
  438. def get_str(self, result, language):
  439. latitude = result.get(self.name + 'Lat')
  440. longitude = result.get(self.name + 'Long')
  441. if latitude and longitude:
  442. return latitude + ' ' + longitude
  443. return None
  444. def get_geo_url(self, result, osm_zoom=19):
  445. latitude = result.get(self.name + 'Lat')
  446. longitude = result.get(self.name + 'Long')
  447. if latitude and longitude:
  448. return get_earth_coordinates_url(latitude, longitude, osm_zoom)
  449. return None
  450. class WDImageAttribute(WDURLAttribute):
  451. __slots__ = 'priority',
  452. def __init__(self, name, url_id=None, priority=100):
  453. super().__init__(name, url_id)
  454. self.priority = priority
  455. class WDDateAttribute(WDAttribute):
  456. def get_select(self):
  457. return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
  458. def get_where(self):
  459. # To remove duplicate, add
  460. # FILTER NOT EXISTS { ?item p:{name}/psv:{name}/wikibase:timeValue ?{name}bis FILTER (?{name}bis < ?{name}) }
  461. # this filter is too slow, so the response function ignore duplicate results
  462. # (see the seen_entities variable)
  463. return """OPTIONAL { ?item p:{name}/psv:{name} [
  464. wikibase:timeValue ?{name} ;
  465. wikibase:timePrecision ?{name}timePrecision ;
  466. wikibase:timeTimezone ?{name}timeZone ;
  467. wikibase:timeCalendarModel ?{name}timeCalendar ] . }
  468. hint:Prior hint:rangeSafe true;""".replace('{name}', self.name)
  469. def get_group_by(self):
  470. return self.get_select()
  471. def format_8(self, value, locale):
  472. # precision: less than a year
  473. return value
  474. def format_9(self, value, locale):
  475. year = int(value)
  476. # precision: year
  477. if year < 1584:
  478. if year < 0:
  479. return str(year - 1)
  480. return str(year)
  481. timestamp = isoparse(value)
  482. return format_date(timestamp, format='yyyy', locale=locale)
  483. def format_10(self, value, locale):
  484. # precision: month
  485. timestamp = isoparse(value)
  486. return format_date(timestamp, format='MMMM y', locale=locale)
  487. def format_11(self, value, locale):
  488. # precision: day
  489. timestamp = isoparse(value)
  490. return format_date(timestamp, format='full', locale=locale)
  491. def format_13(self, value, locale):
  492. timestamp = isoparse(value)
  493. # precision: minute
  494. return get_datetime_format(format, locale=locale) \
  495. .replace("'", "") \
  496. .replace('{0}', format_time(timestamp, 'full', tzinfo=None,
  497. locale=locale)) \
  498. .replace('{1}', format_date(timestamp, 'short', locale=locale))
  499. def format_14(self, value, locale):
  500. # precision: second.
  501. return format_datetime(isoparse(value), format='full', locale=locale)
  502. DATE_FORMAT = {
  503. '0': ('format_8', 1000000000),
  504. '1': ('format_8', 100000000),
  505. '2': ('format_8', 10000000),
  506. '3': ('format_8', 1000000),
  507. '4': ('format_8', 100000),
  508. '5': ('format_8', 10000),
  509. '6': ('format_8', 1000),
  510. '7': ('format_8', 100),
  511. '8': ('format_8', 10),
  512. '9': ('format_9', 1), # year
  513. '10': ('format_10', 1), # month
  514. '11': ('format_11', 0), # day
  515. '12': ('format_13', 0), # hour (not supported by babel, display minute)
  516. '13': ('format_13', 0), # minute
  517. '14': ('format_14', 0) # second
  518. }
  519. def get_str(self, result, language):
  520. value = result.get(self.name)
  521. if value == '' or value is None:
  522. return None
  523. precision = result.get(self.name + 'timePrecision')
  524. date_format = WDDateAttribute.DATE_FORMAT.get(precision)
  525. if date_format is not None:
  526. format_method = getattr(self, date_format[0])
  527. precision = date_format[1]
  528. try:
  529. if precision >= 1:
  530. t = value.split('-')
  531. if value.startswith('-'):
  532. value = '-' + t[1]
  533. else:
  534. value = t[0]
  535. return format_method(value, language)
  536. except Exception:
  537. return value
  538. return value
  539. def debug_explain_wikidata_query(query, method='GET'):
  540. if method == 'GET':
  541. http_response = get(SPARQL_EXPLAIN_URL + '&' + urlencode({'query': query}), headers=get_headers())
  542. else:
  543. http_response = post(SPARQL_EXPLAIN_URL, data={'query': query}, headers=get_headers())
  544. http_response.raise_for_status()
  545. return http_response.content
  546. def init(engine_settings=None):
  547. # WIKIDATA_PROPERTIES : add unit symbols
  548. WIKIDATA_PROPERTIES.update(WIKIDATA_UNITS)
  549. # WIKIDATA_PROPERTIES : add property labels
  550. wikidata_property_names = []
  551. for attribute in get_attributes('en'):
  552. if type(attribute) in (WDAttribute, WDAmountAttribute, WDURLAttribute, WDDateAttribute, WDLabelAttribute):
  553. if attribute.name not in WIKIDATA_PROPERTIES:
  554. wikidata_property_names.append("wd:" + attribute.name)
  555. query = QUERY_PROPERTY_NAMES.replace('%ATTRIBUTES%', " ".join(wikidata_property_names))
  556. jsonresponse = send_wikidata_query(query)
  557. for result in jsonresponse.get('results', {}).get('bindings', {}):
  558. name = result['name']['value']
  559. lang = result['name']['xml:lang']
  560. entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
  561. WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()