5 years ago · 95bd6033fa
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -3,501 +3,686 @@
 
				  Wikidata
			
 
				 
			
 
				  @website     https://wikidata.org
			
 
				- @provide-api yes (https://wikidata.org/w/api.php)
			
 
				+ @provide-api yes (https://query.wikidata.org/)
			
 
				 
			
 
				- @using-api   partially (most things require scraping)
			
 
				- @results     JSON, HTML
			
 
				- @stable      no (html can change)
			
 
				+ @using-api   yes
			
 
				+ @results     JSON
			
 
				+ @stable      yes
			
 
				  @parse       url, infobox
			
 
				 """
			
 
				 
			
 
				-from searx import logger
			
 
				-from searx.poolrequests import get
			
 
				-from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
			
 
				-from searx.utils import extract_text, match_language, eval_xpath
			
 
				 
			
 
				 from urllib.parse import urlencode
			
 
				 from json import loads
			
 
				-from lxml.html import fromstring
			
 
				-from lxml import etree
			
 
				+
			
 
				+from dateutil.parser import isoparse
			
 
				+from babel.dates import format_datetime, format_date, format_time, get_datetime_format
			
 
				+
			
 
				+from searx import logger
			
 
				+from searx.data import WIKIDATA_UNITS
			
 
				+from searx.poolrequests import post, get
			
 
				+from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
			
 
				+from searx.utils import match_language, searx_useragent, get_string_replaces_function
			
 
				+from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
			
 
				 
			
 
				 logger = logger.getChild('wikidata')
			
 
				-result_count = 1
			
 
				-
			
 
				-# urls
			
 
				-wikidata_host = 'https://www.wikidata.org'
			
 
				-url_search = wikidata_host \
			
 
				-    + '/w/index.php?{query}&ns0=1'
			
 
				-
			
 
				-wikidata_api = wikidata_host + '/w/api.php'
			
 
				-url_detail = wikidata_api\
			
 
				-    + '?action=parse&format=json&{query}'\
			
 
				-    + '&redirects=1&prop=text%7Cdisplaytitle%7Cparsewarnings'\
			
 
				-    + '&disableeditsection=1&preview=1&sectionpreview=1&disabletoc=1&utf8=1&formatversion=2'
			
 
				-
			
 
				-url_map = 'https://www.openstreetmap.org/'\
			
 
				-    + '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
			
 
				-url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
			
 
				-
			
 
				-# xpaths
			
 
				-div_ids_xpath = '//div[@id]'
			
 
				-wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
			
 
				-title_xpath = '//*[contains(@class,"wikibase-title-label")]'
			
 
				-description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
			
 
				-label_xpath = './/div[contains(@class,"wikibase-statementgroupview-property-label")]/a'
			
 
				-url_xpath = './/a[contains(@class,"external free") or contains(@class, "wb-external-id")]'
			
 
				-wikilink_xpath = './/ul[contains(@class,"wikibase-sitelinklistview-listview")]'\
			
 
				-    + '/li[contains(@data-wb-siteid,"{wikiid}")]//a/@href'
			
 
				-property_row_xpath = './/div[contains(@class,"wikibase-statementview")]'
			
 
				-preferred_rank_xpath = './/span[contains(@class,"wikibase-rankselector-preferred")]'
			
 
				-value_xpath = './/div[contains(@class,"wikibase-statementview-mainsnak")]'\
			
 
				-    + '/*/div[contains(@class,"wikibase-snakview-value")]'
			
 
				-language_fallback_xpath = '//sup[contains(@class,"wb-language-fallback-indicator")]'
			
 
				-calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
			
 
				-media_xpath = value_xpath + '//div[contains(@class,"commons-media-caption")]//a'
			
 
				-
			
 
				-
			
 
				-def get_id_cache(result):
			
 
				-    id_cache = {}
			
 
				-    for e in eval_xpath(result, div_ids_xpath):
			
 
				-        id = e.get('id')
			
 
				-        if id.startswith('P'):
			
 
				-            id_cache[id] = e
			
 
				-    return id_cache
			
 
				 
			
 
				+# SPARQL
			
 
				+SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
			
 
				+SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
			
 
				+WIKIDATA_PROPERTIES = {
			
 
				+    'P434': 'MusicBrainz',
			
 
				+    'P435': 'MusicBrainz',
			
 
				+    'P436': 'MusicBrainz',
			
 
				+    'P966': 'MusicBrainz',
			
 
				+    'P345': 'IMDb',
			
 
				+    'P2397': 'YouTube',
			
 
				+    'P1651': 'YouTube',
			
 
				+    'P2002': 'Twitter',
			
 
				+    'P2013': 'Facebook',
			
 
				+    'P2003': 'Instagram',
			
 
				+}
			
 
				+
			
 
				+# SERVICE wikibase:mwapi : https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI
			
 
				+# SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE
			
 
				+# https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates
			
 
				+# https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model
			
 
				+# optmization:
			
 
				+# * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization
			
 
				+# * https://github.com/blazegraph/database/wiki/QueryHints
			
 
				+QUERY_TEMPLATE = """
			
 
				+SELECT ?item ?itemLabel ?itemDescription ?lat ?long %SELECT%
			
 
				+WHERE
			
 
				+{
			
 
				+  SERVICE wikibase:mwapi {
			
 
				+        bd:serviceParam wikibase:endpoint "www.wikidata.org";
			
 
				+        wikibase:api "EntitySearch";
			
 
				+        wikibase:limit 1;
			
 
				+        mwapi:search "%QUERY%";
			
 
				+        mwapi:language "%LANGUAGE%".
			
 
				+        ?item wikibase:apiOutputItem mwapi:item.
			
 
				+  }
			
 
				+
			
 
				+  %WHERE%
			
 
				+
			
 
				+  SERVICE wikibase:label {
			
 
				+      bd:serviceParam wikibase:language "%LANGUAGE%,en".
			
 
				+      ?item rdfs:label ?itemLabel .
			
 
				+      ?item schema:description ?itemDescription .
			
 
				+      %WIKIBASE_LABELS%
			
 
				+  }
			
 
				+
			
 
				+}
			
 
				+GROUP BY ?item ?itemLabel ?itemDescription ?lat ?long %GROUP_BY%
			
 
				+"""
			
 
				 
			
 
				-def request(query, params):
			
 
				-    params['url'] = url_search.format(
			
 
				-        query=urlencode({'search': query}))
			
 
				-    return params
			
 
				+# Get the calendar names and the property names
			
 
				+QUERY_PROPERTY_NAMES = """
			
 
				+SELECT ?item ?name
			
 
				+WHERE {
			
 
				+    {
			
 
				+      SELECT ?item
			
 
				+      WHERE { ?item wdt:P279* wd:Q12132 }
			
 
				+    } UNION {
			
 
				+      VALUES ?item { %ATTRIBUTES% }
			
 
				+    }
			
 
				+    OPTIONAL { ?item rdfs:label ?name. }
			
 
				+}
			
 
				+"""
			
 
				 
			
 
				 
			
 
				-def response(resp):
			
 
				-    results = []
			
 
				-    htmlparser = etree.HTMLParser()
			
 
				-    html = fromstring(resp.content.decode(), parser=htmlparser)
			
 
				-    search_results = eval_xpath(html, wikidata_ids_xpath)
			
 
				+# https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1
			
 
				+# https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html
			
 
				+sparql_string_escape = get_string_replaces_function({'\t': '\\\t',
			
 
				+                                                     '\n': '\\\n',
			
 
				+                                                     '\r': '\\\r',
			
 
				+                                                     '\b': '\\\b',
			
 
				+                                                     '\f': '\\\f',
			
 
				+                                                     '\"': '\\\"',
			
 
				+                                                     '\'': '\\\'',
			
 
				+                                                     '\\': '\\\\'})
			
 
				+
			
 
				+replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
			
 
				+
			
 
				+
			
 
				+def get_headers():
			
 
				+    # user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits
			
 
				+    return {
			
 
				+        'Accept': 'application/sparql-results+json',
			
 
				+        'User-Agent': searx_useragent()
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def get_label_for_entity(entity_id, language):
			
 
				+    name = WIKIDATA_PROPERTIES.get(entity_id)
			
 
				+    if name is None:
			
 
				+        name = WIKIDATA_PROPERTIES.get((entity_id, language))
			
 
				+    if name is None:
			
 
				+        name = WIKIDATA_PROPERTIES.get((entity_id, language.split('-')[0]))
			
 
				+    if name is None:
			
 
				+        name = WIKIDATA_PROPERTIES.get((entity_id, 'en'))
			
 
				+    if name is None:
			
 
				+        name = entity_id
			
 
				+    return name
			
 
				+
			
 
				+
			
 
				+def send_wikidata_query(query, method='GET'):
			
 
				+    if method == 'GET':
			
 
				+        # query will be cached by wikidata
			
 
				+        http_response = get(SPARQL_ENDPOINT_URL + '?' + urlencode({'query': query}), headers=get_headers())
			
 
				+    else:
			
 
				+        # query won't be cached by wikidata
			
 
				+        http_response = post(SPARQL_ENDPOINT_URL, data={'query': query}, headers=get_headers())
			
 
				+    if http_response.status_code != 200:
			
 
				+        logger.debug('SPARQL endpoint error %s', http_response.content.decode())
			
 
				+    logger.debug('request time %s', str(http_response.elapsed))
			
 
				+    http_response.raise_for_status()
			
 
				+    return loads(http_response.content.decode())
			
 
				+
			
 
				 
			
 
				-    if resp.search_params['language'].split('-')[0] == 'all':
			
 
				+def request(query, params):
			
 
				+    language = params['language'].split('-')[0]
			
 
				+    if language == 'all':
			
 
				         language = 'en'
			
 
				     else:
			
 
				-        language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0]
			
 
				+        language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
			
 
				+
			
 
				+    query, attributes = get_query(query, language)
			
 
				 
			
 
				-    # TODO: make requests asynchronous to avoid timeout when result_count > 1
			
 
				-    for search_result in search_results[:result_count]:
			
 
				-        wikidata_id = search_result.split('/')[-1]
			
 
				-        url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
			
 
				-        htmlresponse = get(url)
			
 
				-        jsonresponse = loads(htmlresponse.content.decode())
			
 
				-        results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
			
 
				+    params['method'] = 'POST'
			
 
				+    params['url'] = SPARQL_ENDPOINT_URL
			
 
				+    params['data'] = {'query': query}
			
 
				+    params['headers'] = get_headers()
			
 
				+
			
 
				+    params['language'] = language
			
 
				+    params['attributes'] = attributes
			
 
				+    return params
			
 
				+
			
 
				+
			
 
				+def response(resp):
			
 
				+    results = []
			
 
				+    if resp.status_code != 200:
			
 
				+        logger.debug('SPARQL endpoint error %s', resp.content.decode())
			
 
				+    resp.raise_for_status()
			
 
				+    jsonresponse = loads(resp.content.decode())
			
 
				+
			
 
				+    language = resp.search_params['language'].lower()
			
 
				+    attributes = resp.search_params['attributes']
			
 
				+
			
 
				+    seen_entities = set()
			
 
				+
			
 
				+    for result in jsonresponse.get('results', {}).get('bindings', []):
			
 
				+        attribute_result = {key: value['value'] for key, value in result.items()}
			
 
				+        entity_url = attribute_result['item']
			
 
				+        if entity_url not in seen_entities:
			
 
				+            seen_entities.add(entity_url)
			
 
				+            results += get_results(attribute_result, attributes, language)
			
 
				+        else:
			
 
				+            logger.debug('The SPARQL request returns duplicate entities: %s', str(attribute_result))
			
 
				 
			
 
				     return results
			
 
				 
			
 
				 
			
 
				-def getDetail(jsonresponse, wikidata_id, language, locale, htmlparser):
			
 
				+def get_results(attribute_result, attributes, language):
			
 
				     results = []
			
 
				-    urls = []
			
 
				-    attributes = []
			
 
				+    infobox_title = attribute_result.get('itemLabel')
			
 
				+    infobox_id = attribute_result['item']
			
 
				+    infobox_id_lang = None
			
 
				+    infobox_urls = []
			
 
				+    infobox_attributes = []
			
 
				+    infobox_content = attribute_result.get('itemDescription')
			
 
				+    img_src = None
			
 
				+    img_src_priority = 100
			
 
				+
			
 
				+    for attribute in attributes:
			
 
				+        value = attribute.get_str(attribute_result, language)
			
 
				+        if value is not None and value != '':
			
 
				+            attribute_type = type(attribute)
			
 
				+
			
 
				+            if attribute_type in (WDURLAttribute, WDArticle):
			
 
				+                # get_select() method : there is group_concat(distinct ...;separator=", ")
			
 
				+                # split the value here
			
 
				+                for url in value.split(', '):
			
 
				+                    infobox_urls.append({'title': attribute.get_label(language), 'url': url, **attribute.kwargs})
			
 
				+                    # "normal" results (not infobox) include official website and Wikipedia links.
			
 
				+                    if attribute.kwargs.get('official') or attribute_type == WDArticle:
			
 
				+                        results.append({'title': infobox_title, 'url': url})
			
 
				+                    # update the infobox_id with the wikipedia URL
			
 
				+                    # first the local wikipedia URL, and as fallback the english wikipedia URL
			
 
				+                    if attribute_type == WDArticle\
			
 
				+                       and ((attribute.language == 'en' and infobox_id_lang is None)
			
 
				+                            or attribute.language != 'en'):
			
 
				+                        infobox_id_lang = attribute.language
			
 
				+                        infobox_id = url
			
 
				+            elif attribute_type == WDImageAttribute:
			
 
				+                # this attribute is an image.
			
 
				+                # replace the current image only the priority is lower
			
 
				+                # (the infobox contain only one image).
			
 
				+                if attribute.priority < img_src_priority:
			
 
				+                    img_src = value
			
 
				+                    img_src_priority = attribute.priority
			
 
				+            elif attribute_type == WDGeoAttribute:
			
 
				+                # geocoordinate link
			
 
				+                # use the area to get the OSM zoom
			
 
				+                # Note: ignre the unit (must be km² otherwise the calculation is wrong)
			
 
				+                # Should use normalized value p:P2046/psn:P2046/wikibase:quantityAmount
			
 
				+                area = attribute_result.get('P2046')
			
 
				+                osm_zoom = area_to_osm_zoom(area) if area else 19
			
 
				+                url = attribute.get_str(attribute_result, language, osm_zoom=osm_zoom)
			
 
				+                if url:
			
 
				+                    infobox_urls.append({'title': attribute.get_label(language),
			
 
				+                                         'url': url,
			
 
				+                                         'entity': attribute.name})
			
 
				+            else:
			
 
				+                infobox_attributes.append({'label': attribute.get_label(language),
			
 
				+                                           'value': value,
			
 
				+                                           'entity': attribute.name})
			
 
				+
			
 
				+    if infobox_id:
			
 
				+        infobox_id = replace_http_by_https(infobox_id)
			
 
				 
			
 
				-    title = jsonresponse.get('parse', {}).get('displaytitle', {})
			
 
				-    result = jsonresponse.get('parse', {}).get('text', {})
			
 
				-
			
 
				-    if not title or not result:
			
 
				-        return results
			
 
				-
			
 
				-    title = fromstring(title, parser=htmlparser)
			
 
				-    for elem in eval_xpath(title, language_fallback_xpath):
			
 
				-        elem.getparent().remove(elem)
			
 
				-    title = extract_text(eval_xpath(title, title_xpath))
			
 
				-
			
 
				-    result = fromstring(result, parser=htmlparser)
			
 
				-    for elem in eval_xpath(result, language_fallback_xpath):
			
 
				-        elem.getparent().remove(elem)
			
 
				-
			
 
				-    description = extract_text(eval_xpath(result, description_xpath))
			
 
				-
			
 
				-    id_cache = get_id_cache(result)
			
 
				-
			
 
				-    # URLS
			
 
				-
			
 
				-    # official website
			
 
				-    add_url(urls, result, id_cache, 'P856', results=results)
			
 
				-
			
 
				-    # wikipedia
			
 
				-    wikipedia_link_count = 0
			
 
				-    wikipedia_link = get_wikilink(result, language + 'wiki')
			
 
				-    if wikipedia_link:
			
 
				-        wikipedia_link_count += 1
			
 
				-        urls.append({'title': 'Wikipedia (' + language + ')',
			
 
				-                     'url': wikipedia_link})
			
 
				-
			
 
				-    if language != 'en':
			
 
				-        wikipedia_en_link = get_wikilink(result, 'enwiki')
			
 
				-        if wikipedia_en_link:
			
 
				-            wikipedia_link_count += 1
			
 
				-            urls.append({'title': 'Wikipedia (en)',
			
 
				-                         'url': wikipedia_en_link})
			
 
				-
			
 
				-    # TODO: get_wiki_firstlanguage
			
 
				-    # if wikipedia_link_count == 0:
			
 
				-
			
 
				-    # more wikis
			
 
				-    add_url(urls, result, id_cache, default_label='Wikivoyage (' + language + ')', link_type=language + 'wikivoyage')
			
 
				-    add_url(urls, result, id_cache, default_label='Wikiquote (' + language + ')', link_type=language + 'wikiquote')
			
 
				-    add_url(urls, result, id_cache, default_label='Wikimedia Commons', link_type='commonswiki')
			
 
				-
			
 
				-    add_url(urls, result, id_cache, 'P625', 'OpenStreetMap', link_type='geo')
			
 
				-
			
 
				-    # musicbrainz
			
 
				-    add_url(urls, result, id_cache, 'P434', 'MusicBrainz', 'http://musicbrainz.org/artist/')
			
 
				-    add_url(urls, result, id_cache, 'P435', 'MusicBrainz', 'http://musicbrainz.org/work/')
			
 
				-    add_url(urls, result, id_cache, 'P436', 'MusicBrainz', 'http://musicbrainz.org/release-group/')
			
 
				-    add_url(urls, result, id_cache, 'P966', 'MusicBrainz', 'http://musicbrainz.org/label/')
			
 
				-
			
 
				-    # IMDb
			
 
				-    add_url(urls, result, id_cache, 'P345', 'IMDb', 'https://www.imdb.com/', link_type='imdb')
			
 
				-    # source code repository
			
 
				-    add_url(urls, result, id_cache, 'P1324')
			
 
				-    # blog
			
 
				-    add_url(urls, result, id_cache, 'P1581')
			
 
				-    # social media links
			
 
				-    add_url(urls, result, id_cache, 'P2397', 'YouTube', 'https://www.youtube.com/channel/')
			
 
				-    add_url(urls, result, id_cache, 'P1651', 'YouTube', 'https://www.youtube.com/watch?v=')
			
 
				-    add_url(urls, result, id_cache, 'P2002', 'Twitter', 'https://twitter.com/')
			
 
				-    add_url(urls, result, id_cache, 'P2013', 'Facebook', 'https://facebook.com/')
			
 
				-    add_url(urls, result, id_cache, 'P2003', 'Instagram', 'https://instagram.com/')
			
 
				-
			
 
				-    urls.append({'title': 'Wikidata',
			
 
				-                 'url': 'https://www.wikidata.org/wiki/'
			
 
				-                 + wikidata_id + '?uselang=' + language})
			
 
				-
			
 
				-    # INFOBOX ATTRIBUTES (ROWS)
			
 
				-
			
 
				-    # DATES
			
 
				-    # inception date
			
 
				-    add_attribute(attributes, id_cache, 'P571', date=True)
			
 
				-    # dissolution date
			
 
				-    add_attribute(attributes, id_cache, 'P576', date=True)
			
 
				-    # start date
			
 
				-    add_attribute(attributes, id_cache, 'P580', date=True)
			
 
				-    # end date
			
 
				-    add_attribute(attributes, id_cache, 'P582', date=True)
			
 
				-    # date of birth
			
 
				-    add_attribute(attributes, id_cache, 'P569', date=True)
			
 
				-    # date of death
			
 
				-    add_attribute(attributes, id_cache, 'P570', date=True)
			
 
				-    # date of spacecraft launch
			
 
				-    add_attribute(attributes, id_cache, 'P619', date=True)
			
 
				-    # date of spacecraft landing
			
 
				-    add_attribute(attributes, id_cache, 'P620', date=True)
			
 
				-
			
 
				-    # nationality
			
 
				-    add_attribute(attributes, id_cache, 'P27')
			
 
				-    # country of origin
			
 
				-    add_attribute(attributes, id_cache, 'P495')
			
 
				-    # country
			
 
				-    add_attribute(attributes, id_cache, 'P17')
			
 
				-    # headquarters
			
 
				-    add_attribute(attributes, id_cache, 'Q180')
			
 
				-
			
 
				-    # PLACES
			
 
				-    # capital
			
 
				-    add_attribute(attributes, id_cache, 'P36', trim=True)
			
 
				-    # head of state
			
 
				-    add_attribute(attributes, id_cache, 'P35', trim=True)
			
 
				-    # head of government
			
 
				-    add_attribute(attributes, id_cache, 'P6', trim=True)
			
 
				-    # type of government
			
 
				-    add_attribute(attributes, id_cache, 'P122')
			
 
				-    # official language
			
 
				-    add_attribute(attributes, id_cache, 'P37')
			
 
				-    # population
			
 
				-    add_attribute(attributes, id_cache, 'P1082', trim=True)
			
 
				-    # area
			
 
				-    add_attribute(attributes, id_cache, 'P2046')
			
 
				-    # currency
			
 
				-    add_attribute(attributes, id_cache, 'P38', trim=True)
			
 
				-    # heigth (building)
			
 
				-    add_attribute(attributes, id_cache, 'P2048')
			
 
				-
			
 
				-    # MEDIA
			
 
				-    # platform (videogames)
			
 
				-    add_attribute(attributes, id_cache, 'P400')
			
 
				-    # author
			
 
				-    add_attribute(attributes, id_cache, 'P50')
			
 
				-    # creator
			
 
				-    add_attribute(attributes, id_cache, 'P170')
			
 
				-    # director
			
 
				-    add_attribute(attributes, id_cache, 'P57')
			
 
				-    # performer
			
 
				-    add_attribute(attributes, id_cache, 'P175')
			
 
				-    # developer
			
 
				-    add_attribute(attributes, id_cache, 'P178')
			
 
				-    # producer
			
 
				-    add_attribute(attributes, id_cache, 'P162')
			
 
				-    # manufacturer
			
 
				-    add_attribute(attributes, id_cache, 'P176')
			
 
				-    # screenwriter
			
 
				-    add_attribute(attributes, id_cache, 'P58')
			
 
				-    # production company
			
 
				-    add_attribute(attributes, id_cache, 'P272')
			
 
				-    # record label
			
 
				-    add_attribute(attributes, id_cache, 'P264')
			
 
				-    # publisher
			
 
				-    add_attribute(attributes, id_cache, 'P123')
			
 
				-    # original network
			
 
				-    add_attribute(attributes, id_cache, 'P449')
			
 
				-    # distributor
			
 
				-    add_attribute(attributes, id_cache, 'P750')
			
 
				-    # composer
			
 
				-    add_attribute(attributes, id_cache, 'P86')
			
 
				-    # publication date
			
 
				-    add_attribute(attributes, id_cache, 'P577', date=True)
			
 
				-    # genre
			
 
				-    add_attribute(attributes, id_cache, 'P136')
			
 
				-    # original language
			
 
				-    add_attribute(attributes, id_cache, 'P364')
			
 
				-    # isbn
			
 
				-    add_attribute(attributes, id_cache, 'Q33057')
			
 
				-    # software license
			
 
				-    add_attribute(attributes, id_cache, 'P275')
			
 
				-    # programming language
			
 
				-    add_attribute(attributes, id_cache, 'P277')
			
 
				-    # version
			
 
				-    add_attribute(attributes, id_cache, 'P348', trim=True)
			
 
				-    # narrative location
			
 
				-    add_attribute(attributes, id_cache, 'P840')
			
 
				-
			
 
				-    # LANGUAGES
			
 
				-    # number of speakers
			
 
				-    add_attribute(attributes, id_cache, 'P1098')
			
 
				-    # writing system
			
 
				-    add_attribute(attributes, id_cache, 'P282')
			
 
				-    # regulatory body
			
 
				-    add_attribute(attributes, id_cache, 'P1018')
			
 
				-    # language code
			
 
				-    add_attribute(attributes, id_cache, 'P218')
			
 
				-
			
 
				-    # OTHER
			
 
				-    # ceo
			
 
				-    add_attribute(attributes, id_cache, 'P169', trim=True)
			
 
				-    # founder
			
 
				-    add_attribute(attributes, id_cache, 'P112')
			
 
				-    # legal form (company/organization)
			
 
				-    add_attribute(attributes, id_cache, 'P1454')
			
 
				-    # operator
			
 
				-    add_attribute(attributes, id_cache, 'P137')
			
 
				-    # crew members (tripulation)
			
 
				-    add_attribute(attributes, id_cache, 'P1029')
			
 
				-    # taxon
			
 
				-    add_attribute(attributes, id_cache, 'P225')
			
 
				-    # chemical formula
			
 
				-    add_attribute(attributes, id_cache, 'P274')
			
 
				-    # winner (sports/contests)
			
 
				-    add_attribute(attributes, id_cache, 'P1346')
			
 
				-    # number of deaths
			
 
				-    add_attribute(attributes, id_cache, 'P1120')
			
 
				-    # currency code
			
 
				-    add_attribute(attributes, id_cache, 'P498')
			
 
				-
			
 
				-    image = add_image(id_cache)
			
 
				-
			
 
				-    if len(attributes) == 0 and len(urls) == 2 and len(description) == 0:
			
 
				+    # add the wikidata URL at the end
			
 
				+    infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
			
 
				+
			
 
				+    if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and\
			
 
				+       len(infobox_content) == 0:
			
 
				         results.append({
			
 
				-                       'url': urls[0]['url'],
			
 
				-                       'title': title,
			
 
				-                       'content': description
			
 
				-                       })
			
 
				+            'url': infobox_urls[0]['url'],
			
 
				+            'title': infobox_title,
			
 
				+            'content': infobox_content
			
 
				+        })
			
 
				     else:
			
 
				         results.append({
			
 
				-                       'infobox': title,
			
 
				-                       'id': wikipedia_link,
			
 
				-                       'content': description,
			
 
				-                       'img_src': image,
			
 
				-                       'attributes': attributes,
			
 
				-                       'urls': urls
			
 
				-                       })
			
 
				-
			
 
				+            'infobox': infobox_title,
			
 
				+            'id': infobox_id,
			
 
				+            'content': infobox_content,
			
 
				+            'img_src': img_src,
			
 
				+            'urls': infobox_urls,
			
 
				+            'attributes': infobox_attributes
			
 
				+        })
			
 
				     return results
			
 
				 
			
 
				 
			
 
				-# only returns first match
			
 
				-def add_image(id_cache):
			
 
				-    # P15: route map, P242: locator map, P154: logo, P18: image, P242: map, P41: flag, P2716: collage, P2910: icon
			
 
				-    property_ids = ['P15', 'P242', 'P154', 'P18', 'P242', 'P41', 'P2716', 'P2910']
			
 
				+def get_query(query, language):
			
 
				+    attributes = get_attributes(language)
			
 
				+    select = [a.get_select() for a in attributes]
			
 
				+    where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes]))
			
 
				+    wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes]))
			
 
				+    group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes]))
			
 
				+    query = QUERY_TEMPLATE\
			
 
				+        .replace('%QUERY%', sparql_string_escape(query))\
			
 
				+        .replace('%SELECT%', ' '.join(select))\
			
 
				+        .replace('%WHERE%', '\n  '.join(where))\
			
 
				+        .replace('%WIKIBASE_LABELS%', '\n      '.join(wikibase_label))\
			
 
				+        .replace('%GROUP_BY%', ' '.join(group_by))\
			
 
				+        .replace('%LANGUAGE%', language)
			
 
				+    return query, attributes
			
 
				 
			
 
				-    for property_id in property_ids:
			
 
				-        image = id_cache.get(property_id, None)
			
 
				-        if image is not None:
			
 
				-            image_name = eval_xpath(image, media_xpath)
			
 
				-            image_src = url_image.replace('{filename}', extract_text(image_name[0]))
			
 
				-            return image_src
			
 
				 
			
 
				+def get_attributes(language):
			
 
				+    attributes = []
			
 
				 
			
 
				-# setting trim will only returned high ranked rows OR the first row
			
 
				-def add_attribute(attributes, id_cache, property_id, default_label=None, date=False, trim=False):
			
 
				-    attribute = id_cache.get(property_id, None)
			
 
				-    if attribute is not None:
			
 
				+    def add_value(name):
			
 
				+        attributes.append(WDAttribute(name))
			
 
				+
			
 
				+    def add_amount(name):
			
 
				+        attributes.append(WDAmountAttribute(name))
			
 
				+
			
 
				+    def add_label(name):
			
 
				+        attributes.append(WDLabelAttribute(name))
			
 
				+
			
 
				+    def add_url(name, url_id=None, **kwargs):
			
 
				+        attributes.append(WDURLAttribute(name, url_id, kwargs))
			
 
				+
			
 
				+    def add_image(name, url_id=None, priority=1):
			
 
				+        attributes.append(WDImageAttribute(name, url_id, priority))
			
 
				+
			
 
				+    def add_date(name):
			
 
				+        attributes.append(WDDateAttribute(name))
			
 
				+
			
 
				+    # Dates
			
 
				+    for p in ['P571',    # inception date
			
 
				+              'P576',    # dissolution date
			
 
				+              'P580',    # start date
			
 
				+              'P582',    # end date
			
 
				+              'P569',    # date of birth
			
 
				+              'P570',    # date of death
			
 
				+              'P619',    # date of spacecraft launch
			
 
				+              'P620']:   # date of spacecraft landing
			
 
				+        add_date(p)
			
 
				+
			
 
				+    for p in ['P27',     # country of citizenship
			
 
				+              'P495',    # country of origin
			
 
				+              'P17',     # country
			
 
				+              'P159']:   # headquarters location
			
 
				+        add_label(p)
			
 
				+
			
 
				+    # Places
			
 
				+    for p in ['P36',     # capital
			
 
				+              'P35',     # head of state
			
 
				+              'P6',      # head of government
			
 
				+              'P122',    # basic form of government
			
 
				+              'P37']:    # official language
			
 
				+        add_label(p)
			
 
				+
			
 
				+    add_value('P1082')   # population
			
 
				+    add_amount('P2046')  # area
			
 
				+    add_amount('P281')   # postal code
			
 
				+    add_label('P38')     # currency
			
 
				+    add_amount('P2048')  # heigth (building)
			
 
				+
			
 
				+    # Media
			
 
				+    for p in ['P400',    # platform (videogames, computing)
			
 
				+              'P50',     # author
			
 
				+              'P170',    # creator
			
 
				+              'P57',     # director
			
 
				+              'P175',    # performer
			
 
				+              'P178',    # developer
			
 
				+              'P162',    # producer
			
 
				+              'P176',    # manufacturer
			
 
				+              'P58',     # screenwriter
			
 
				+              'P272',    # production company
			
 
				+              'P264',    # record label
			
 
				+              'P123',    # publisher
			
 
				+              'P449',    # original network
			
 
				+              'P750',    # distributed by
			
 
				+              'P86']:    # composer
			
 
				+        add_label(p)
			
 
				+
			
 
				+    add_date('P577')     # publication date
			
 
				+    add_label('P136')    # genre (music, film, artistic...)
			
 
				+    add_label('P364')    # original language
			
 
				+    add_value('P212')    # ISBN-13
			
 
				+    add_value('P957')    # ISBN-10
			
 
				+    add_label('P275')    # copyright license
			
 
				+    add_label('P277')    # programming language
			
 
				+    add_value('P348')    # version
			
 
				+    add_label('P840')    # narrative location
			
 
				+
			
 
				+    # Languages
			
 
				+    add_value('P1098')   # number of speakers
			
 
				+    add_label('P282')    # writing system
			
 
				+    add_label('P1018')   # language regulatory body
			
 
				+    add_value('P218')    # language code (ISO 639-1)
			
 
				+
			
 
				+    # Other
			
 
				+    add_label('P169')    # ceo
			
 
				+    add_label('P112')    # founded by
			
 
				+    add_label('P1454')   # legal form (company, organization)
			
 
				+    add_label('P137')    # operator (service, facility, ...)
			
 
				+    add_label('P1029')   # crew members (tripulation)
			
 
				+    add_label('P225')    # taxon name
			
 
				+    add_value('P274')    # chemical formula
			
 
				+    add_label('P1346')   # winner (sports, contests, ...)
			
 
				+    add_value('P1120')   # number of deaths
			
 
				+    add_value('P498')    # currency code (ISO 4217)
			
 
				+
			
 
				+    # URL
			
 
				+    add_url('P856', official=True)          # official website
			
 
				+    attributes.append(WDArticle(language))  # wikipedia (user language)
			
 
				+    if not language.startswith('en'):
			
 
				+        attributes.append(WDArticle('en'))  # wikipedia (english)
			
 
				+
			
 
				+    add_url('P1324')     # source code repository
			
 
				+    add_url('P1581')     # blog
			
 
				+    add_url('P434', url_id='musicbrainz_artist')
			
 
				+    add_url('P435', url_id='musicbrainz_work')
			
 
				+    add_url('P436', url_id='musicbrainz_release_group')
			
 
				+    add_url('P966', url_id='musicbrainz_label')
			
 
				+    add_url('P345', url_id='imdb_id')
			
 
				+    add_url('P2397', url_id='youtube_channel')
			
 
				+    add_url('P1651', url_id='youtube_video')
			
 
				+    add_url('P2002', url_id='twitter_profile')
			
 
				+    add_url('P2013', url_id='facebook_profile')
			
 
				+    add_url('P2003', url_id='instagram_profile')
			
 
				+
			
 
				+    # Map
			
 
				+    attributes.append(WDGeoAttribute('P625'))
			
 
				+
			
 
				+    # Image
			
 
				+    add_image('P15', priority=1, url_id='wikimedia_image')    # route map
			
 
				+    add_image('P242', priority=2, url_id='wikimedia_image')   # locator map
			
 
				+    add_image('P154', priority=3, url_id='wikimedia_image')   # logo
			
 
				+    add_image('P18', priority=4, url_id='wikimedia_image')    # image
			
 
				+    add_image('P41', priority=5, url_id='wikimedia_image')    # flag
			
 
				+    add_image('P2716', priority=6, url_id='wikimedia_image')  # collage
			
 
				+    add_image('P2910', priority=7, url_id='wikimedia_image')  # icon
			
 
				+
			
 
				+    return attributes
			
 
				+
			
 
				+
			
 
				+class WDAttribute:
			
 
				+
			
 
				+    __slots__ = 'name',
			
 
				+
			
 
				+    def __init__(self, name):
			
 
				+        self.name = name
			
 
				+
			
 
				+    def get_select(self):
			
 
				+        return '(group_concat(distinct ?{name};separator=", ") as ?{name}s)'.replace('{name}', self.name)
			
 
				+
			
 
				+    def get_label(self, language):
			
 
				+        return get_label_for_entity(self.name, language)
			
 
				+
			
 
				+    def get_where(self):
			
 
				+        return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace('{name}', self.name)
			
 
				+
			
 
				+    def get_wikibase_label(self):
			
 
				+        return ""
			
 
				+
			
 
				+    def get_group_by(self):
			
 
				+        return ""
			
 
				+
			
 
				+    def get_str(self, result, language):
			
 
				+        return result.get(self.name + 's')
			
 
				 
			
 
				-        if default_label:
			
 
				-            label = default_label
			
 
				-        else:
			
 
				-            label = extract_text(eval_xpath(attribute, label_xpath))
			
 
				-            label = label[0].upper() + label[1:]
			
 
				-
			
 
				-        if date:
			
 
				-            trim = True
			
 
				-            # remove calendar name
			
 
				-            calendar_name = eval_xpath(attribute, calendar_name_xpath)
			
 
				-            for calendar in calendar_name:
			
 
				-                calendar.getparent().remove(calendar)
			
 
				-
			
 
				-        concat_values = ""
			
 
				-        values = []
			
 
				-        first_value = None
			
 
				-        for row in eval_xpath(attribute, property_row_xpath):
			
 
				-            if not first_value or not trim or eval_xpath(row, preferred_rank_xpath):
			
 
				-                value = eval_xpath(row, value_xpath)
			
 
				-                if not value:
			
 
				-                    continue
			
 
				-                value = extract_text(value)
			
 
				-
			
 
				-                # save first value in case no ranked row is found
			
 
				-                if trim and not first_value:
			
 
				-                    first_value = value
			
 
				-                else:
			
 
				-                    # to avoid duplicate values
			
 
				-                    if value not in values:
			
 
				-                        concat_values += value + ", "
			
 
				-                        values.append(value)
			
 
				-
			
 
				-        if trim and not values:
			
 
				-            attributes.append({'label': label,
			
 
				-                               'value': first_value})
			
 
				-        else:
			
 
				-            attributes.append({'label': label,
			
 
				-                               'value': concat_values[:-2]})
			
 
				+    def __repr__(self):
			
 
				+        return '<' + str(type(self).__name__) + ':' + self.name + '>'
			
 
				 
			
 
				 
			
 
				-# requires property_id unless it's a wiki link (defined in link_type)
			
 
				-def add_url(urls, result, id_cache, property_id=None, default_label=None, url_prefix=None, results=None,
			
 
				-            link_type=None, only_first=True):
			
 
				-    links = []
			
 
				+class WDAmountAttribute(WDAttribute):
			
 
				 
			
 
				-    # wiki links don't have property in wikidata page
			
 
				-    if link_type and 'wiki' in link_type:
			
 
				-            links.append(get_wikilink(result, link_type))
			
 
				-    else:
			
 
				-        dom_element = id_cache.get(property_id, None)
			
 
				-        if dom_element is not None:
			
 
				-            if not default_label:
			
 
				-                label = extract_text(eval_xpath(dom_element, label_xpath))
			
 
				-                label = label[0].upper() + label[1:]
			
 
				+    def get_select(self):
			
 
				+        return '?{name} ?{name}Unit'.replace('{name}', self.name)
			
 
				 
			
 
				-            if link_type == 'geo':
			
 
				-                links.append(get_geolink(dom_element))
			
 
				+    def get_where(self):
			
 
				+        return """  OPTIONAL { ?item p:{name} ?{name}Node .
			
 
				+    ?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} .
			
 
				+    OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace('{name}', self.name)
			
 
				 
			
 
				-            elif link_type == 'imdb':
			
 
				-                links.append(get_imdblink(dom_element, url_prefix))
			
 
				+    def get_group_by(self):
			
 
				+        return self.get_select()
			
 
				 
			
 
				-            else:
			
 
				-                url_results = eval_xpath(dom_element, url_xpath)
			
 
				-                for link in url_results:
			
 
				-                    if link is not None:
			
 
				-                        if url_prefix:
			
 
				-                            link = url_prefix + extract_text(link)
			
 
				-                        else:
			
 
				-                            link = extract_text(link)
			
 
				-                        links.append(link)
			
 
				-
			
 
				-    # append urls
			
 
				-    for url in links:
			
 
				-        if url is not None:
			
 
				-            u = {'title': default_label or label, 'url': url}
			
 
				-            if property_id == 'P856':
			
 
				-                u['official'] = True
			
 
				-                u['domain'] = url.split('/')[2]
			
 
				-            urls.append(u)
			
 
				-            if results is not None:
			
 
				-                results.append(u)
			
 
				-            if only_first:
			
 
				-                break
			
 
				-
			
 
				-
			
 
				-def get_imdblink(result, url_prefix):
			
 
				-    imdb_id = eval_xpath(result, value_xpath)
			
 
				-    if imdb_id:
			
 
				-        imdb_id = extract_text(imdb_id)
			
 
				-        id_prefix = imdb_id[:2]
			
 
				-        if id_prefix == 'tt':
			
 
				-            url = url_prefix + 'title/' + imdb_id
			
 
				-        elif id_prefix == 'nm':
			
 
				-            url = url_prefix + 'name/' + imdb_id
			
 
				-        elif id_prefix == 'ch':
			
 
				-            url = url_prefix + 'character/' + imdb_id
			
 
				-        elif id_prefix == 'co':
			
 
				-            url = url_prefix + 'company/' + imdb_id
			
 
				-        elif id_prefix == 'ev':
			
 
				-            url = url_prefix + 'event/' + imdb_id
			
 
				-        else:
			
 
				-            url = None
			
 
				-        return url
			
 
				+    def get_str(self, result, language):
			
 
				+        value = result.get(self.name)
			
 
				+        unit = result.get(self.name + "Unit")
			
 
				+        if unit is not None:
			
 
				+            unit = unit.replace('http://www.wikidata.org/entity/', '')
			
 
				+            return value + " " + get_label_for_entity(unit, language)
			
 
				+        return value
			
 
				 
			
 
				 
			
 
				-def get_geolink(result):
			
 
				-    coordinates = eval_xpath(result, value_xpath)
			
 
				-    if not coordinates:
			
 
				-        return None
			
 
				-    coordinates = extract_text(coordinates[0])
			
 
				-    latitude, longitude = coordinates.split(',')
			
 
				-
			
 
				-    # convert to decimal
			
 
				-    lat = int(latitude[:latitude.find('°')])
			
 
				-    if latitude.find('\'') >= 0:
			
 
				-        lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0
			
 
				-    if latitude.find('"') >= 0:
			
 
				-        lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
			
 
				-    if latitude.find('S') >= 0:
			
 
				-        lat *= -1
			
 
				-    lon = int(longitude[:longitude.find('°')])
			
 
				-    if longitude.find('\'') >= 0:
			
 
				-        lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0
			
 
				-    if longitude.find('"') >= 0:
			
 
				-        lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
			
 
				-    if longitude.find('W') >= 0:
			
 
				-        lon *= -1
			
 
				-
			
 
				-    # TODO: get precision
			
 
				-    precision = 0.0002
			
 
				-    # there is no zoom information, deduce from precision (error prone)
			
 
				-    # samples :
			
 
				-    # 13 --> 5
			
 
				-    # 1 --> 6
			
 
				-    # 0.016666666666667 --> 9
			
 
				-    # 0.00027777777777778 --> 19
			
 
				-    # wolframalpha :
			
 
				-    # quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}}
			
 
				-    # 14.1186-8.8322 x+0.625447 x^2
			
 
				-    if precision < 0.0003:
			
 
				-        zoom = 19
			
 
				-    else:
			
 
				-        zoom = int(15 - precision * 8.8322 + precision * precision * 0.625447)
			
 
				+class WDArticle(WDAttribute):
			
 
				+
			
 
				+    __slots__ = 'language', 'kwargs'
			
 
				+
			
 
				+    def __init__(self, language, kwargs=None):
			
 
				+        super().__init__('wikipedia')
			
 
				+        self.language = language
			
 
				+        self.kwargs = kwargs or {}
			
 
				+
			
 
				+    def get_label(self, language):
			
 
				+        # language parameter is ignored
			
 
				+        return "Wikipedia ({language})".replace('{language}', self.language)
			
 
				+
			
 
				+    def get_select(self):
			
 
				+        return "?article{language} ?articleName{language}".replace('{language}', self.language)
			
 
				+
			
 
				+    def get_where(self):
			
 
				+        return """OPTIONAL { ?article{language} schema:about ?item ;
			
 
				+             schema:inLanguage "{language}" ;
			
 
				+             schema:isPartOf <https://{language}.wikipedia.org/> ;
			
 
				+             schema:name ?articleName{language} . }""".replace('{language}', self.language)
			
 
				+
			
 
				+    def get_group_by(self):
			
 
				+        return self.get_select()
			
 
				+
			
 
				+    def get_str(self, result, language):
			
 
				+        key = 'article{language}'.replace('{language}', self.language)
			
 
				+        return result.get(key)
			
 
				+
			
 
				+
			
 
				+class WDLabelAttribute(WDAttribute):
			
 
				+
			
 
				+    def get_select(self):
			
 
				+        return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name)
			
 
				 
			
 
				-    url = url_map\
			
 
				-        .replace('{latitude}', str(lat))\
			
 
				-        .replace('{longitude}', str(lon))\
			
 
				-        .replace('{zoom}', str(zoom))
			
 
				+    def get_where(self):
			
 
				+        return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace('{name}', self.name)
			
 
				 
			
 
				-    return url
			
 
				+    def get_wikibase_label(self):
			
 
				+        return "?{name} rdfs:label ?{name}Label .".replace('{name}', self.name)
			
 
				 
			
 
				+    def get_str(self, result, language):
			
 
				+        return result.get(self.name + 'Labels')
			
 
				 
			
 
				-def get_wikilink(result, wikiid):
			
 
				-    url = eval_xpath(result, wikilink_xpath.replace('{wikiid}', wikiid))
			
 
				-    if not url:
			
 
				+
			
 
				+class WDURLAttribute(WDAttribute):
			
 
				+
			
 
				+    HTTP_WIKIMEDIA_IMAGE = 'http://commons.wikimedia.org/wiki/Special:FilePath/'
			
 
				+
			
 
				+    __slots__ = 'url_id', 'kwargs'
			
 
				+
			
 
				+    def __init__(self, name, url_id=None, kwargs=None):
			
 
				+        super().__init__(name)
			
 
				+        self.url_id = url_id
			
 
				+        self.kwargs = kwargs
			
 
				+
			
 
				+    def get_str(self, result, language):
			
 
				+        value = result.get(self.name + 's')
			
 
				+        if self.url_id and value is not None and value != '':
			
 
				+            value = value.split(',')[0]
			
 
				+            url_id = self.url_id
			
 
				+            if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):
			
 
				+                value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):]
			
 
				+                url_id = 'wikimedia_image'
			
 
				+            return get_external_url(url_id, value)
			
 
				+        return value
			
 
				+
			
 
				+
			
 
				+class WDGeoAttribute(WDAttribute):
			
 
				+
			
 
				+    def get_label(self, language):
			
 
				+        return "OpenStreetMap"
			
 
				+
			
 
				+    def get_select(self):
			
 
				+        return "?{name}Lat ?{name}Long".replace('{name}', self.name)
			
 
				+
			
 
				+    def get_where(self):
			
 
				+        return """OPTIONAL { ?item p:{name}/psv:{name} [
			
 
				+    wikibase:geoLatitude ?{name}Lat ;
			
 
				+    wikibase:geoLongitude ?{name}Long ] }""".replace('{name}', self.name)
			
 
				+
			
 
				+    def get_group_by(self):
			
 
				+        return self.get_select()
			
 
				+
			
 
				+    def get_str(self, result, language, osm_zoom=19):
			
 
				+        latitude = result.get(self.name + 'Lat')
			
 
				+        longitude = result.get(self.name + 'Long')
			
 
				+        if latitude and longitude:
			
 
				+            return get_earth_coordinates_url(latitude, longitude, osm_zoom)
			
 
				         return None
			
 
				-    url = url[0]
			
 
				-    if url.startswith('http://'):
			
 
				-        url = url.replace('http://', 'https://')
			
 
				-    elif url.startswith('//'):
			
 
				-        url = 'https:' + url
			
 
				-    return url
			
 
				+
			
 
				+
			
 
				+class WDImageAttribute(WDURLAttribute):
			
 
				+
			
 
				+    __slots__ = 'priority',
			
 
				+
			
 
				+    def __init__(self, name, url_id=None, priority=100):
			
 
				+        super().__init__(name, url_id)
			
 
				+        self.priority = priority
			
 
				+
			
 
				+
			
 
				+class WDDateAttribute(WDAttribute):
			
 
				+
			
 
				+    def get_select(self):
			
 
				+        return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
			
 
				+
			
 
				+    def get_where(self):
			
 
				+        # To remove duplicate, add
			
 
				+        # FILTER NOT EXISTS { ?item p:{name}/psv:{name}/wikibase:timeValue ?{name}bis FILTER (?{name}bis < ?{name}) }
			
 
				+        # this filter is too slow, so the response function ignore duplicate results
			
 
				+        # (see the seen_entities variable)
			
 
				+        return """OPTIONAL { ?item p:{name}/psv:{name} [
			
 
				+    wikibase:timeValue ?{name} ;
			
 
				+    wikibase:timePrecision ?{name}timePrecision ;
			
 
				+    wikibase:timeTimezone ?{name}timeZone ;
			
 
				+    wikibase:timeCalendarModel ?{name}timeCalendar ] . }
			
 
				+    hint:Prior hint:rangeSafe true;""".replace('{name}', self.name)
			
 
				+
			
 
				+    def get_group_by(self):
			
 
				+        return self.get_select()
			
 
				+
			
 
				+    def format_8(self, value, locale):
			
 
				+        # precision: less than a year
			
 
				+        return value
			
 
				+
			
 
				+    def format_9(self, value, locale):
			
 
				+        year = int(value)
			
 
				+        # precision: year
			
 
				+        if year < 1584:
			
 
				+            if year < 0:
			
 
				+                return str(year - 1)
			
 
				+            return str(year)
			
 
				+        timestamp = isoparse(value)
			
 
				+        return format_date(timestamp, format='yyyy', locale=locale)
			
 
				+
			
 
				+    def format_10(self, value, locale):
			
 
				+        # precision: month
			
 
				+        timestamp = isoparse(value)
			
 
				+        return format_date(timestamp, format='MMMM y', locale=locale)
			
 
				+
			
 
				+    def format_11(self, value, locale):
			
 
				+        # precision: day
			
 
				+        timestamp = isoparse(value)
			
 
				+        return format_date(timestamp, format='full', locale=locale)
			
 
				+
			
 
				+    def format_13(self, value, locale):
			
 
				+        timestamp = isoparse(value)
			
 
				+        # precision: minute
			
 
				+        return get_datetime_format(format, locale=locale) \
			
 
				+            .replace("'", "") \
			
 
				+            .replace('{0}', format_time(timestamp, 'full', tzinfo=None,
			
 
				+                                        locale=locale)) \
			
 
				+            .replace('{1}', format_date(timestamp, 'short', locale=locale))
			
 
				+
			
 
				+    def format_14(self, value, locale):
			
 
				+        # precision: second.
			
 
				+        return format_datetime(isoparse(value), format='full', locale=locale)
			
 
				+
			
 
				+    DATE_FORMAT = {
			
 
				+        '0': ('format_8', 1000000000),
			
 
				+        '1': ('format_8', 100000000),
			
 
				+        '2': ('format_8', 10000000),
			
 
				+        '3': ('format_8', 1000000),
			
 
				+        '4': ('format_8', 100000),
			
 
				+        '5': ('format_8', 10000),
			
 
				+        '6': ('format_8', 1000),
			
 
				+        '7': ('format_8', 100),
			
 
				+        '8': ('format_8', 10),
			
 
				+        '9': ('format_9', 1),  # year
			
 
				+        '10': ('format_10', 1),  # month
			
 
				+        '11': ('format_11', 0),  # day
			
 
				+        '12': ('format_13', 0),  # hour (not supported by babel, display minute)
			
 
				+        '13': ('format_13', 0),  # minute
			
 
				+        '14': ('format_14', 0)  # second
			
 
				+    }
			
 
				+
			
 
				+    def get_str(self, result, language):
			
 
				+        value = result.get(self.name)
			
 
				+        if value == '' or value is None:
			
 
				+            return None
			
 
				+        precision = result.get(self.name + 'timePrecision')
			
 
				+        date_format = WDDateAttribute.DATE_FORMAT.get(precision)
			
 
				+        if date_format is not None:
			
 
				+            format_method = getattr(self, date_format[0])
			
 
				+            precision = date_format[1]
			
 
				+            try:
			
 
				+                if precision >= 1:
			
 
				+                    t = value.split('-')
			
 
				+                    if value.startswith('-'):
			
 
				+                        value = '-' + t[1]
			
 
				+                    else:
			
 
				+                        value = t[0]
			
 
				+                return format_method(value, language)
			
 
				+            except Exception:
			
 
				+                return value
			
 
				+        return value
			
 
				+
			
 
				+
			
 
				+def debug_explain_wikidata_query(query, method='GET'):
			
 
				+    if method == 'GET':
			
 
				+        http_response = get(SPARQL_EXPLAIN_URL + '&' + urlencode({'query': query}), headers=get_headers())
			
 
				+    else:
			
 
				+        http_response = post(SPARQL_EXPLAIN_URL, data={'query': query}, headers=get_headers())
			
 
				+    http_response.raise_for_status()
			
 
				+    return http_response.content
			
 
				+
			
 
				+
			
 
				+def init(engine_settings=None):
			
 
				+    # WIKIDATA_PROPERTIES : add unit symbols
			
 
				+    WIKIDATA_PROPERTIES.update(WIKIDATA_UNITS)
			
 
				+
			
 
				+    # WIKIDATA_PROPERTIES : add property labels
			
 
				+    wikidata_property_names = []
			
 
				+    for attribute in get_attributes('en'):
			
 
				+        if type(attribute) in (WDAttribute, WDAmountAttribute, WDURLAttribute, WDDateAttribute, WDLabelAttribute):
			
 
				+            if attribute.name not in WIKIDATA_PROPERTIES:
			
 
				+                wikidata_property_names.append("wd:" + attribute.name)
			
 
				+    query = QUERY_PROPERTY_NAMES.replace('%ATTRIBUTES%', " ".join(wikidata_property_names))
			
 
				+    jsonresponse = send_wikidata_query(query)
			
 
				+    for result in jsonresponse.get('results', {}).get('bindings', {}):
			
 
				+        name = result['name']['value']
			
 
				+        lang = result['name']['xml:lang']
			
 
				+        entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
			
 
				+        WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()