|
@@ -0,0 +1,231 @@
|
|
|
+# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
+"""Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
|
|
|
+Coordinates`_
|
|
|
+
|
|
|
+.. _SPARQL/WIKIDATA Precision, Units and Coordinates:
|
|
|
+ https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
|
|
|
+"""
|
|
|
+
|
|
|
+__all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]
|
|
|
+
|
|
|
+import collections
|
|
|
+
|
|
|
+from searx import data
|
|
|
+from searx.engines import wikidata
|
|
|
+
|
|
|
+ADDITIONAL_UNITS = [
|
|
|
+ {
|
|
|
+ "si_name": "Q11579",
|
|
|
+ "symbol": "°C",
|
|
|
+ "to_si": lambda val: val + 273.15,
|
|
|
+ "from_si": lambda val: val - 273.15,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "si_name": "Q11579",
|
|
|
+ "symbol": "°F",
|
|
|
+ "to_si": lambda val: (val + 459.67) * 5 / 9,
|
|
|
+ "from_si": lambda val: (val * 9 / 5) - 459.67,
|
|
|
+ },
|
|
|
+]
|
|
|
+"""Additional items to convert from a measure unit to a SI unit (vice versa).
|
|
|
+
|
|
|
+.. code:: python
|
|
|
+
|
|
|
+ {
|
|
|
+ "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
|
|
|
+ "symbol": "°C", # symbol of the measure unit
|
|
|
+ "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
|
|
|
+ "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "si_name": "Q11573",
|
|
|
+ "symbol": "mi",
|
|
|
+ "to_si": 1609.344, # convert measure value (val) to SI unit
|
|
|
+ "from_si": 1 / 1609.344 # convert SI value (val) measure unit
|
|
|
+ },
|
|
|
+
|
|
|
+The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
|
|
|
+or a callable_ (val in / converted value returned).
|
|
|
+
|
|
|
+.. _callable: https://docs.python.org/3/glossary.html#term-callable
|
|
|
+"""
|
|
|
+
|
|
|
+
|
|
|
+ALIAS_SYMBOLS = {
|
|
|
+ '°C': ('C',),
|
|
|
+ '°F': ('F',),
|
|
|
+ 'mi': ('L',),
|
|
|
+}
|
|
|
+"""Alias symbols for known unit of measure symbols / by example::
|
|
|
+
|
|
|
+ '°C': ('C', ...), # list of alias symbols for °C (Q69362731)
|
|
|
+ '°F': ('F', ...), # list of alias symbols for °F (Q99490479)
|
|
|
+ 'mi': ('L',), # list of alias symbols for mi (Q253276)
|
|
|
+"""
|
|
|
+
|
|
|
+
|
|
|
+SYMBOL_TO_SI = []
|
|
|
+UNITS_BY_SI_NAME: dict | None = None
|
|
|
+
|
|
|
+
|
|
|
+def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
|
|
|
+ from_si = units_by_si_name(si_name)[symbol][symbol]["from_si"]
|
|
|
+ if isinstance(from_si, (float, int)):
|
|
|
+ value = float(value) * from_si
|
|
|
+ else:
|
|
|
+ value = from_si(float(value))
|
|
|
+ return value
|
|
|
+
|
|
|
+
|
|
|
+def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
|
|
|
+ to_si = units_by_si_name(si_name)[symbol][symbol]["to_si"]
|
|
|
+ if isinstance(to_si, (float, int)):
|
|
|
+ value = float(value) * to_si
|
|
|
+ else:
|
|
|
+ value = to_si(float(value))
|
|
|
+ return value
|
|
|
+
|
|
|
+
|
|
|
+def units_by_si_name(si_name):
|
|
|
+
|
|
|
+ global UNITS_BY_SI_NAME
|
|
|
+ if UNITS_BY_SI_NAME is not None:
|
|
|
+ return UNITS_BY_SI_NAME[si_name]
|
|
|
+
|
|
|
+ UNITS_BY_SI_NAME = {}
|
|
|
+ for item in symbol_to_si():
|
|
|
+ by_symbol = UNITS_BY_SI_NAME.get(si_name)
|
|
|
+ if by_symbol is None:
|
|
|
+ by_symbol = {}
|
|
|
+ UNITS_BY_SI_NAME[si_name] = by_symbol
|
|
|
+ by_symbol[item["symbol"]] = item
|
|
|
+ return UNITS_BY_SI_NAME[si_name]
|
|
|
+
|
|
|
+
|
|
|
+def symbol_to_si():
|
|
|
+ """Generates a list of tuples, each tuple is a measure unit and the fields
|
|
|
+ in the tuple are:
|
|
|
+
|
|
|
+ 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
|
|
|
+
|
|
|
+ 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
|
|
|
+
|
|
|
+ 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
|
|
|
+ multiplied by 1609.344)
|
|
|
+
|
|
|
+ 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
|
|
|
+ 100mi divided by 1609.344)
|
|
|
+
|
|
|
+ The returned list is sorted, the first items are created from
|
|
|
+ ``WIKIDATA_UNITS``, the second group of items is build from
|
|
|
+ :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
|
|
|
+
|
|
|
+ If you search this list for a symbol, then a match with a symbol from
|
|
|
+ Wikidata has the highest weighting (first hit in the list), followed by the
|
|
|
+ symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
|
|
|
+ given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
|
|
|
+
|
|
|
+ """
|
|
|
+
|
|
|
+ global SYMBOL_TO_SI # pylint: disable=global-statement
|
|
|
+ if SYMBOL_TO_SI:
|
|
|
+ return SYMBOL_TO_SI
|
|
|
+
|
|
|
+ # filter out units which can't be normalized to a SI unit and filter out
|
|
|
+ # units without a symbol / arcsecond does not have a symbol
|
|
|
+ # https://www.wikidata.org/wiki/Q829073
|
|
|
+
|
|
|
+ for item in data.WIKIDATA_UNITS.values():
|
|
|
+ if item['to_si_factor'] and item['symbol']:
|
|
|
+ SYMBOL_TO_SI.append(
|
|
|
+ (
|
|
|
+ item['symbol'],
|
|
|
+ item['si_name'],
|
|
|
+ 1 / item['to_si_factor'], # from_si
|
|
|
+ item['to_si_factor'], # to_si
|
|
|
+ item['symbol'],
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ for item in ADDITIONAL_UNITS:
|
|
|
+ SYMBOL_TO_SI.append(
|
|
|
+ (
|
|
|
+ item['symbol'],
|
|
|
+ item['si_name'],
|
|
|
+ item['from_si'],
|
|
|
+ item['to_si'],
|
|
|
+ item['symbol'],
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ alias_items = []
|
|
|
+ for item in SYMBOL_TO_SI:
|
|
|
+ for alias in ALIAS_SYMBOLS.get(item[0], ()):
|
|
|
+ alias_items.append(
|
|
|
+ (
|
|
|
+ alias,
|
|
|
+ item[1],
|
|
|
+ item[2], # from_si
|
|
|
+ item[3], # to_si
|
|
|
+ item[0], # origin unit
|
|
|
+ )
|
|
|
+ )
|
|
|
+ SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
|
|
|
+ return SYMBOL_TO_SI
|
|
|
+
|
|
|
+
|
|
|
+# the response contains duplicate ?item with the different ?symbol
|
|
|
+# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
|
|
|
+# even if a ?item has different ?symbol of the same rank.
|
|
|
+# A deterministic result
|
|
|
+# see:
|
|
|
+# * https://www.wikidata.org/wiki/Help:Ranking
|
|
|
+# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
|
|
|
+# * https://w.wiki/32BT
|
|
|
+# * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
|
|
|
+# see the result for https://www.wikidata.org/wiki/Q11582
|
|
|
+# there are multiple symbols the same rank
|
|
|
+
|
|
|
+SARQL_REQUEST = """
|
|
|
+SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
|
|
|
+WHERE
|
|
|
+{
|
|
|
+ ?item wdt:P31/wdt:P279 wd:Q47574 .
|
|
|
+ ?item p:P5061 ?symbolP .
|
|
|
+ ?symbolP ps:P5061 ?symbol ;
|
|
|
+ wikibase:rank ?rank .
|
|
|
+ OPTIONAL {
|
|
|
+ ?item p:P2370 ?tosistmt .
|
|
|
+ ?tosistmt psv:P2370 ?tosinode .
|
|
|
+ ?tosinode wikibase:quantityAmount ?tosi .
|
|
|
+ ?tosinode wikibase:quantityUnit ?tosiUnit .
|
|
|
+ }
|
|
|
+ FILTER(LANG(?symbol) = "en").
|
|
|
+}
|
|
|
+ORDER BY ?item DESC(?rank) ?symbol
|
|
|
+"""
|
|
|
+
|
|
|
+
|
|
|
+def fetch_units():
|
|
|
+ """Fetch units from Wikidata. Function is used to update persistence of
|
|
|
+ :py:obj:`searx.data.WIKIDATA_UNITS`."""
|
|
|
+
|
|
|
+ results = collections.OrderedDict()
|
|
|
+ response = wikidata.send_wikidata_query(SARQL_REQUEST)
|
|
|
+ for unit in response['results']['bindings']:
|
|
|
+
|
|
|
+ symbol = unit['symbol']['value']
|
|
|
+ name = unit['item']['value'].rsplit('/', 1)[1]
|
|
|
+ si_name = unit.get('tosiUnit', {}).get('value', '')
|
|
|
+ if si_name:
|
|
|
+ si_name = si_name.rsplit('/', 1)[1]
|
|
|
+
|
|
|
+ to_si_factor = unit.get('tosi', {}).get('value', '')
|
|
|
+ if name not in results:
|
|
|
+ # ignore duplicate: always use the first one
|
|
|
+ results[name] = {
|
|
|
+ 'symbol': symbol,
|
|
|
+ 'si_name': si_name if si_name else None,
|
|
|
+ 'to_si_factor': float(to_si_factor) if to_si_factor else None,
|
|
|
+ }
|
|
|
+ return results
|