Browse Source

[pylint] searx_extra/update/update_osm_keys_tags.py

BTW: move some comments into script's  doc-string

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
8871e39122
1 changed files with 54 additions and 49 deletions
  1. 54 49
      searx_extra/update/update_osm_keys_tags.py

+ 54 - 49
searx_extra/update/update_osm_keys_tags.py

@@ -1,12 +1,43 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
-"""
-Fetch OSM keys and tags
-
-to get the i18n names, the scripts uses query.wikidata.org
-instead of for example https://taginfo.openstreetmap.org/taginfo/apidoc
+# lint: pylint
+# pylint: disable=missing-function-docstring
+"""Fetch OSM keys and tags.
+
+To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for
+example `OSM tags API`_ (sidenote: the actual change log from
+map.atownsend.org.uk_ might be useful to normalize OSM tags)
+
+.. _Wikidata Query Service: https://query.wikidata.org/
+.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc
+.. _map.atownsend.org.uk: https://map.atownsend.org.uk/maps/map/changelog.html
+
+:py:obj:`SPARQL_TAGS_REQUEST` :
+    Wikidata SPARQL query that returns *type-categories* and *types*.  The
+    returned tag is ``Tag:{category}={type}`` (see :py:func:`get_tags`).
+    Example:
+
+    - https://taginfo.openstreetmap.org/tags/building=house#overview
+    - https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse
+      at the bottom of the infobox (right side), there is a link to wikidata:
+      https://www.wikidata.org/wiki/Q3947
+      see property "OpenStreetMap tag or key" (P1282)
+    - https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow
+      https://www.wikidata.org/wiki/Q850107
+
+:py:obj:`SPARQL_KEYS_REQUEST` :
+    Wikidata SPARQL query that returns *keys*.  Example with "payment":
+
+    - https://wiki.openstreetmap.org/wiki/Key%3Apayment
+      at the bottom of infobox (right side), there is a link to wikidata:
+      https://www.wikidata.org/wiki/Q1148747
+      link made using the "OpenStreetMap tag or key" property (P1282)
+      to be confirm: there is a one wiki page per key ?
+    - https://taginfo.openstreetmap.org/keys/payment#values
+    - https://taginfo.openstreetmap.org/keys/payment:cash#values
+
+    ``rdfs:label`` get all the labels without language selection
+    (as opposed to SERVICE ``wikibase:label``).
 
 
-https://map.atownsend.org.uk/maps/map/changelog.html (the actual change log)
-might be useful to normalize OSM tags
 """
 """
 
 
 import json
 import json
@@ -19,50 +50,28 @@ from searx.engines.wikidata import send_wikidata_query
 from searx.languages import language_codes
 from searx.languages import language_codes
 from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
 from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
 
 
-
-# nominatim return type category and type
-# the tag is "Tag:{category}={type}"
-# Example:
-# * https://taginfo.openstreetmap.org/tags/building=house#overview
-# * https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse
-#   at the bottom of the infobox (right side), there is a link to wikidata:
-#   https://www.wikidata.org/wiki/Q3947
-#   see property "OpenStreetMap tag or key" (P1282)
-# * https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow
-#   https://www.wikidata.org/wiki/Q850107
-SARQL_TAGS_REQUEST = """
+SPARQL_TAGS_REQUEST = """
 SELECT ?tag ?item ?itemLabel WHERE {
 SELECT ?tag ?item ?itemLabel WHERE {
   ?item wdt:P1282 ?tag .
   ?item wdt:P1282 ?tag .
   ?item rdfs:label ?itemLabel .
   ?item rdfs:label ?itemLabel .
   FILTER(STRSTARTS(?tag, 'Tag'))
   FILTER(STRSTARTS(?tag, 'Tag'))
 }
 }
-GROUP BY ?tag ?item ?itemLabel 
+GROUP BY ?tag ?item ?itemLabel
 ORDER BY ?tag ?item ?itemLabel
 ORDER BY ?tag ?item ?itemLabel
 """
 """
 
 
-# keys
-# Example with "payment"":
-# * https://wiki.openstreetmap.org/wiki/Key%3Apayment
-#   at the bottom of infobox (right side), there is a link to wikidata:
-#   https://www.wikidata.org/wiki/Q1148747
-#   link made using the "OpenStreetMap tag or key" property (P1282)
-#   to be confirm: there is a one wiki page per key ?
-# * https://taginfo.openstreetmap.org/keys/payment#values
-# * https://taginfo.openstreetmap.org/keys/payment:cash#values
-#
-# rdfs:label get all the labels without language selection
-# (as opposed to SERVICE wikibase:label)
-SARQL_KEYS_REQUEST = """
+SPARQL_KEYS_REQUEST = """
 SELECT ?key ?item ?itemLabel WHERE {
 SELECT ?key ?item ?itemLabel WHERE {
   ?item wdt:P1282 ?key .
   ?item wdt:P1282 ?key .
   ?item rdfs:label ?itemLabel .
   ?item rdfs:label ?itemLabel .
   FILTER(STRSTARTS(?key, 'Key'))
   FILTER(STRSTARTS(?key, 'Key'))
 }
 }
-GROUP BY ?key ?item ?itemLabel 
+GROUP BY ?key ?item ?itemLabel
 ORDER BY ?key ?item ?itemLabel
 ORDER BY ?key ?item ?itemLabel
 """
 """
 
 
 LANGUAGES = [l[0].lower() for l in language_codes]
 LANGUAGES = [l[0].lower() for l in language_codes]
+
 PRESET_KEYS = {
 PRESET_KEYS = {
     ('wikidata',): {'en': 'Wikidata'},
     ('wikidata',): {'en': 'Wikidata'},
     ('wikipedia',): {'en': 'Wikipedia'},
     ('wikipedia',): {'en': 'Wikipedia'},
@@ -71,11 +80,11 @@ PRESET_KEYS = {
     ('fax',): {'en': 'Fax'},
     ('fax',): {'en': 'Fax'},
     ('internet_access', 'ssid'): {'en': 'Wi-Fi'},
     ('internet_access', 'ssid'): {'en': 'Wi-Fi'},
 }
 }
+
 INCLUDED_KEYS = {
 INCLUDED_KEYS = {
     ('addr', )
     ('addr', )
 }
 }
 
 
-
 def get_preset_keys():
 def get_preset_keys():
     results = collections.OrderedDict()
     results = collections.OrderedDict()
     for keys, value in PRESET_KEYS.items():
     for keys, value in PRESET_KEYS.items():
@@ -85,10 +94,9 @@ def get_preset_keys():
         r.setdefault('*', value)
         r.setdefault('*', value)
     return results
     return results
 
 
-
 def get_keys():
 def get_keys():
     results = get_preset_keys()
     results = get_preset_keys()
-    response = send_wikidata_query(SARQL_KEYS_REQUEST)
+    response = send_wikidata_query(SPARQL_KEYS_REQUEST)
 
 
     for key in response['results']['bindings']:
     for key in response['results']['bindings']:
         keys = key['key']['value'].split(':')[1:]
         keys = key['key']['value'].split(':')[1:]
@@ -136,7 +144,7 @@ def get_keys():
 
 
 def get_tags():
 def get_tags():
     results = collections.OrderedDict()
     results = collections.OrderedDict()
-    response = send_wikidata_query(SARQL_TAGS_REQUEST)
+    response = send_wikidata_query(SPARQL_TAGS_REQUEST)
     for tag in response['results']['bindings']:
     for tag in response['results']['bindings']:
         tag_names = tag['tag']['value'].split(':')[1].split('=')
         tag_names = tag['tag']['value'].split(':')[1].split('=')
         if len(tag_names) == 2:
         if len(tag_names) == 2:
@@ -149,7 +157,6 @@ def get_tags():
             results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label)
             results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label)
     return results
     return results
 
 
-
 def optimize_data_lang(translations):
 def optimize_data_lang(translations):
     language_to_delete = []
     language_to_delete = []
     # remove "zh-hk" entry if the value is the same as "zh"
     # remove "zh-hk" entry if the value is the same as "zh"
@@ -174,14 +181,12 @@ def optimize_data_lang(translations):
     for language in language_to_delete:
     for language in language_to_delete:
         del translations[language]
         del translations[language]
 
 
-
 def optimize_tags(data):
 def optimize_tags(data):
     for v in data.values():
     for v in data.values():
         for translations in v.values():
         for translations in v.values():
             optimize_data_lang(translations)
             optimize_data_lang(translations)
     return data
     return data
 
 
-
 def optimize_keys(data):
 def optimize_keys(data):
     for k, v in data.items():
     for k, v in data.items():
         if k == '*':
         if k == '*':
@@ -190,15 +195,15 @@ def optimize_keys(data):
             optimize_keys(v)
             optimize_keys(v)
     return data
     return data
 
 
-
 def get_osm_tags_filename():
 def get_osm_tags_filename():
     return Path(searx_dir) / "data" / "osm_keys_tags.json"
     return Path(searx_dir) / "data" / "osm_keys_tags.json"
 
 
+if __name__ == '__main__':
 
 
-set_timeout_for_thread(60)
-result = {
-    'keys': optimize_keys(get_keys()),
-    'tags': optimize_tags(get_tags()),
-}
-with open(get_osm_tags_filename(), 'w') as f:
-    json.dump(result, f, indent=4, ensure_ascii=False)
+    set_timeout_for_thread(60)
+    result = {
+        'keys': optimize_keys(get_keys()),
+        'tags': optimize_tags(get_tags()),
+    }
+    with open(get_osm_tags_filename(), 'w') as f:
+        json.dump(result, f, indent=4, ensure_ascii=False)