wikidata_units.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
  3. Coordinates`_
  4. .. _SPARQL/WIKIDATA Precision, Units and Coordinates:
  5. https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
  6. """
  7. __all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]
  8. import collections
  9. from searx import data
  10. from searx.engines import wikidata
  11. ADDITIONAL_UNITS = [
  12. {
  13. "si_name": "Q11579",
  14. "symbol": "°C",
  15. "to_si": lambda val: val + 273.15,
  16. "from_si": lambda val: val - 273.15,
  17. },
  18. {
  19. "si_name": "Q11579",
  20. "symbol": "°F",
  21. "to_si": lambda val: (val + 459.67) * 5 / 9,
  22. "from_si": lambda val: (val * 9 / 5) - 459.67,
  23. },
  24. ]
  25. """Additional items to convert from a measure unit to a SI unit (vice versa).
  26. .. code:: python
  27. {
  28. "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
  29. "symbol": "°C", # symbol of the measure unit
  30. "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
  31. "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
  32. },
  33. {
  34. "si_name": "Q11573",
  35. "symbol": "mi",
  36. "to_si": 1609.344, # convert measure value (val) to SI unit
  37. "from_si": 1 / 1609.344 # convert SI value (val) measure unit
  38. },
  39. The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
  40. or a callable_ (val in / converted value returned).
  41. .. _callable: https://docs.python.org/3/glossary.html#term-callable
  42. """
  43. ALIAS_SYMBOLS = {
  44. '°C': ('C',),
  45. '°F': ('F',),
  46. 'mi': ('L',),
  47. }
  48. """Alias symbols for known unit of measure symbols / by example::
  49. '°C': ('C', ...), # list of alias symbols for °C (Q69362731)
  50. '°F': ('F', ...), # list of alias symbols for °F (Q99490479)
  51. 'mi': ('L',), # list of alias symbols for mi (Q253276)
  52. """
  53. SYMBOL_TO_SI = []
  54. UNITS_BY_SI_NAME: dict | None = None
  55. def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
  56. from_si = units_by_si_name(si_name)[symbol][symbol]["from_si"]
  57. if isinstance(from_si, (float, int)):
  58. value = float(value) * from_si
  59. else:
  60. value = from_si(float(value))
  61. return value
  62. def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
  63. to_si = units_by_si_name(si_name)[symbol][symbol]["to_si"]
  64. if isinstance(to_si, (float, int)):
  65. value = float(value) * to_si
  66. else:
  67. value = to_si(float(value))
  68. return value
  69. def units_by_si_name(si_name):
  70. global UNITS_BY_SI_NAME
  71. if UNITS_BY_SI_NAME is not None:
  72. return UNITS_BY_SI_NAME[si_name]
  73. UNITS_BY_SI_NAME = {}
  74. for item in symbol_to_si():
  75. by_symbol = UNITS_BY_SI_NAME.get(si_name)
  76. if by_symbol is None:
  77. by_symbol = {}
  78. UNITS_BY_SI_NAME[si_name] = by_symbol
  79. by_symbol[item["symbol"]] = item
  80. return UNITS_BY_SI_NAME[si_name]
  81. def symbol_to_si():
  82. """Generates a list of tuples, each tuple is a measure unit and the fields
  83. in the tuple are:
  84. 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
  85. 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
  86. 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
  87. multiplied by 1609.344)
  88. 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
  89. 100mi divided by 1609.344)
  90. The returned list is sorted, the first items are created from
  91. ``WIKIDATA_UNITS``, the second group of items is build from
  92. :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
  93. If you search this list for a symbol, then a match with a symbol from
  94. Wikidata has the highest weighting (first hit in the list), followed by the
  95. symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
  96. given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
  97. """
  98. global SYMBOL_TO_SI # pylint: disable=global-statement
  99. if SYMBOL_TO_SI:
  100. return SYMBOL_TO_SI
  101. # filter out units which can't be normalized to a SI unit and filter out
  102. # units without a symbol / arcsecond does not have a symbol
  103. # https://www.wikidata.org/wiki/Q829073
  104. for item in data.WIKIDATA_UNITS.values():
  105. if item['to_si_factor'] and item['symbol']:
  106. SYMBOL_TO_SI.append(
  107. (
  108. item['symbol'],
  109. item['si_name'],
  110. 1 / item['to_si_factor'], # from_si
  111. item['to_si_factor'], # to_si
  112. item['symbol'],
  113. )
  114. )
  115. for item in ADDITIONAL_UNITS:
  116. SYMBOL_TO_SI.append(
  117. (
  118. item['symbol'],
  119. item['si_name'],
  120. item['from_si'],
  121. item['to_si'],
  122. item['symbol'],
  123. )
  124. )
  125. alias_items = []
  126. for item in SYMBOL_TO_SI:
  127. for alias in ALIAS_SYMBOLS.get(item[0], ()):
  128. alias_items.append(
  129. (
  130. alias,
  131. item[1],
  132. item[2], # from_si
  133. item[3], # to_si
  134. item[0], # origin unit
  135. )
  136. )
  137. SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
  138. return SYMBOL_TO_SI
  139. # the response contains duplicate ?item with the different ?symbol
  140. # "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
  141. # even if a ?item has different ?symbol of the same rank.
  142. # A deterministic result
  143. # see:
  144. # * https://www.wikidata.org/wiki/Help:Ranking
  145. # * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
  146. # * https://w.wiki/32BT
  147. # * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
  148. # see the result for https://www.wikidata.org/wiki/Q11582
  149. # there are multiple symbols the same rank
  150. SARQL_REQUEST = """
  151. SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
  152. WHERE
  153. {
  154. ?item wdt:P31/wdt:P279 wd:Q47574 .
  155. ?item p:P5061 ?symbolP .
  156. ?symbolP ps:P5061 ?symbol ;
  157. wikibase:rank ?rank .
  158. OPTIONAL {
  159. ?item p:P2370 ?tosistmt .
  160. ?tosistmt psv:P2370 ?tosinode .
  161. ?tosinode wikibase:quantityAmount ?tosi .
  162. ?tosinode wikibase:quantityUnit ?tosiUnit .
  163. }
  164. FILTER(LANG(?symbol) = "en").
  165. }
  166. ORDER BY ?item DESC(?rank) ?symbol
  167. """
  168. def fetch_units():
  169. """Fetch units from Wikidata. Function is used to update persistence of
  170. :py:obj:`searx.data.WIKIDATA_UNITS`."""
  171. results = collections.OrderedDict()
  172. response = wikidata.send_wikidata_query(SARQL_REQUEST)
  173. for unit in response['results']['bindings']:
  174. symbol = unit['symbol']['value']
  175. name = unit['item']['value'].rsplit('/', 1)[1]
  176. si_name = unit.get('tosiUnit', {}).get('value', '')
  177. if si_name:
  178. si_name = si_name.rsplit('/', 1)[1]
  179. to_si_factor = unit.get('tosi', {}).get('value', '')
  180. if name not in results:
  181. # ignore duplicate: always use the first one
  182. results[name] = {
  183. 'symbol': symbol,
  184. 'si_name': si_name if si_name else None,
  185. 'to_si_factor': float(to_si_factor) if to_si_factor else None,
  186. }
  187. return results