wikidata_units.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
  3. Coordinates`_
  4. .. _SPARQL/WIKIDATA Precision, Units and Coordinates:
  5. https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
  6. """
  7. from __future__ import annotations
  8. __all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]
  9. import collections
  10. from searx import data
  11. from searx.engines import wikidata
  12. class Beaufort:
  13. """The mapping of the Beaufort_ contains values from 0 to 16 (55.6 m/s),
  14. wind speeds greater than 200km/h (55.6 m/s) are given as 17 Bft. Thats why
  15. a value of 17 Bft cannot be converted to SI.
  16. .. hint::
  17. Negative values or values greater 16 Bft (55.6 m/s) will throw a
  18. :py:obj:`ValueError`.
  19. _Beaufort: https://en.wikipedia.org/wiki/Beaufort_scale
  20. """
  21. # fmt: off
  22. scale: list[float] = [
  23. 0.2, 1.5, 3.3, 5.4, 7.9,
  24. 10.7, 13.8, 17.1, 20.7, 24.4,
  25. 28.4, 32.6, 32.7, 41.1, 45.8,
  26. 50.8, 55.6
  27. ]
  28. # fmt: on
  29. @classmethod
  30. def from_si(cls, value) -> float:
  31. if value < 0 or value > 55.6:
  32. raise ValueError(f"invalid value {value} / the Beaufort scales from 0 to 16 (55.6 m/s)")
  33. bft = 0
  34. for bft, mps in enumerate(cls.scale):
  35. if mps >= value:
  36. break
  37. return bft
  38. @classmethod
  39. def to_si(cls, value) -> float:
  40. idx = round(value)
  41. if idx < 0 or idx > 16:
  42. raise ValueError(f"invalid value {value} / the Beaufort scales from 0 to 16 (55.6 m/s)")
  43. return cls.scale[idx]
  44. ADDITIONAL_UNITS = [
  45. {
  46. "si_name": "Q11579",
  47. "symbol": "°C",
  48. "to_si": lambda val: val + 273.15,
  49. "from_si": lambda val: val - 273.15,
  50. },
  51. {
  52. "si_name": "Q11579",
  53. "symbol": "°F",
  54. "to_si": lambda val: (val + 459.67) * 5 / 9,
  55. "from_si": lambda val: (val * 9 / 5) - 459.67,
  56. },
  57. {
  58. "si_name": "Q182429",
  59. "symbol": "Bft",
  60. "to_si": Beaufort.to_si,
  61. "from_si": Beaufort.from_si,
  62. },
  63. ]
  64. """Additional items to convert from a measure unit to a SI unit (vice versa).
  65. .. code:: python
  66. {
  67. "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
  68. "symbol": "°C", # symbol of the measure unit
  69. "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
  70. "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
  71. },
  72. {
  73. "si_name": "Q11573",
  74. "symbol": "mi",
  75. "to_si": 1609.344, # convert measure value (val) to SI unit
  76. "from_si": 1 / 1609.344 # convert SI value (val) measure unit
  77. },
  78. The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
  79. or a callable_ (val in / converted value returned).
  80. .. _callable: https://docs.python.org/3/glossary.html#term-callable
  81. """
  82. ALIAS_SYMBOLS = {
  83. '°C': ('C',),
  84. '°F': ('F',),
  85. 'mi': ('L',),
  86. 'Bft': ('bft',),
  87. }
  88. """Alias symbols for known unit of measure symbols / by example::
  89. '°C': ('C', ...), # list of alias symbols for °C (Q69362731)
  90. '°F': ('F', ...), # list of alias symbols for °F (Q99490479)
  91. 'mi': ('L',), # list of alias symbols for mi (Q253276)
  92. """
  93. SYMBOL_TO_SI = []
  94. UNITS_BY_SI_NAME: dict = {}
  95. def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
  96. from_si = units_by_si_name(si_name)[symbol][pos_from_si]
  97. if isinstance(from_si, (float, int)):
  98. value = float(value) * from_si
  99. else:
  100. value = from_si(float(value))
  101. return value
  102. def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
  103. to_si = units_by_si_name(si_name)[symbol][pos_to_si]
  104. if isinstance(to_si, (float, int)):
  105. value = float(value) * to_si
  106. else:
  107. value = to_si(float(value))
  108. return value
  109. def units_by_si_name(si_name):
  110. global UNITS_BY_SI_NAME # pylint: disable=global-statement,global-variable-not-assigned
  111. if UNITS_BY_SI_NAME:
  112. return UNITS_BY_SI_NAME[si_name]
  113. # build the catalog ..
  114. for item in symbol_to_si():
  115. item_si_name = item[pos_si_name]
  116. item_symbol = item[pos_symbol]
  117. by_symbol = UNITS_BY_SI_NAME.get(item_si_name)
  118. if by_symbol is None:
  119. by_symbol = {}
  120. UNITS_BY_SI_NAME[item_si_name] = by_symbol
  121. by_symbol[item_symbol] = item
  122. return UNITS_BY_SI_NAME[si_name]
  123. pos_symbol = 0 # (alias) symbol
  124. pos_si_name = 1 # si_name
  125. pos_from_si = 2 # from_si
  126. pos_to_si = 3 # to_si
  127. pos_symbol = 4 # standardized symbol
  128. def symbol_to_si():
  129. """Generates a list of tuples, each tuple is a measure unit and the fields
  130. in the tuple are:
  131. 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
  132. 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
  133. 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
  134. multiplied by 1609.344)
  135. 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
  136. 100mi divided by 1609.344)
  137. The returned list is sorted, the first items are created from
  138. ``WIKIDATA_UNITS``, the second group of items is build from
  139. :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
  140. If you search this list for a symbol, then a match with a symbol from
  141. Wikidata has the highest weighting (first hit in the list), followed by the
  142. symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
  143. given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
  144. """
  145. global SYMBOL_TO_SI # pylint: disable=global-statement
  146. if SYMBOL_TO_SI:
  147. return SYMBOL_TO_SI
  148. # filter out units which can't be normalized to a SI unit and filter out
  149. # units without a symbol / arcsecond does not have a symbol
  150. # https://www.wikidata.org/wiki/Q829073
  151. for item in data.WIKIDATA_UNITS.values():
  152. if item['to_si_factor'] and item['symbol']:
  153. SYMBOL_TO_SI.append(
  154. (
  155. item['symbol'],
  156. item['si_name'],
  157. 1 / item['to_si_factor'], # from_si
  158. item['to_si_factor'], # to_si
  159. item['symbol'],
  160. )
  161. )
  162. for item in ADDITIONAL_UNITS:
  163. SYMBOL_TO_SI.append(
  164. (
  165. item['symbol'],
  166. item['si_name'],
  167. item['from_si'],
  168. item['to_si'],
  169. item['symbol'],
  170. )
  171. )
  172. alias_items = []
  173. for item in SYMBOL_TO_SI:
  174. for alias in ALIAS_SYMBOLS.get(item[0], ()):
  175. alias_items.append(
  176. (
  177. alias,
  178. item[1],
  179. item[2], # from_si
  180. item[3], # to_si
  181. item[0], # origin unit
  182. )
  183. )
  184. SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
  185. return SYMBOL_TO_SI
  186. # the response contains duplicate ?item with the different ?symbol
  187. # "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
  188. # even if a ?item has different ?symbol of the same rank.
  189. # A deterministic result
  190. # see:
  191. # * https://www.wikidata.org/wiki/Help:Ranking
  192. # * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
  193. # * https://w.wiki/32BT
  194. # * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
  195. # see the result for https://www.wikidata.org/wiki/Q11582
  196. # there are multiple symbols the same rank
  197. SARQL_REQUEST = """
  198. SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
  199. WHERE
  200. {
  201. ?item wdt:P31/wdt:P279 wd:Q47574 .
  202. ?item p:P5061 ?symbolP .
  203. ?symbolP ps:P5061 ?symbol ;
  204. wikibase:rank ?rank .
  205. OPTIONAL {
  206. ?item p:P2370 ?tosistmt .
  207. ?tosistmt psv:P2370 ?tosinode .
  208. ?tosinode wikibase:quantityAmount ?tosi .
  209. ?tosinode wikibase:quantityUnit ?tosiUnit .
  210. }
  211. FILTER(LANG(?symbol) = "en").
  212. }
  213. ORDER BY ?item DESC(?rank) ?symbol
  214. """
  215. def fetch_units():
  216. """Fetch units from Wikidata. Function is used to update persistence of
  217. :py:obj:`searx.data.WIKIDATA_UNITS`."""
  218. results = collections.OrderedDict()
  219. response = wikidata.send_wikidata_query(SARQL_REQUEST)
  220. for unit in response['results']['bindings']:
  221. symbol = unit['symbol']['value']
  222. name = unit['item']['value'].rsplit('/', 1)[1]
  223. si_name = unit.get('tosiUnit', {}).get('value', '')
  224. if si_name:
  225. si_name = si_name.rsplit('/', 1)[1]
  226. to_si_factor = unit.get('tosi', {}).get('value', '')
  227. if name not in results:
  228. # ignore duplicate: always use the first one
  229. results[name] = {
  230. 'symbol': symbol,
  231. 'si_name': si_name if si_name else None,
  232. 'to_si_factor': float(to_si_factor) if to_si_factor else None,
  233. }
  234. return results