123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """Unit conversion on the basis of `SPARQL/WIKIDATA Precision, Units and
- Coordinates`_
- .. _SPARQL/WIKIDATA Precision, Units and Coordinates:
- https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
- """
- __all__ = ["convert_from_si", "convert_to_si", "symbol_to_si"]
- import collections
- from searx import data
- from searx.engines import wikidata
- ADDITIONAL_UNITS = [
- {
- "si_name": "Q11579",
- "symbol": "°C",
- "to_si": lambda val: val + 273.15,
- "from_si": lambda val: val - 273.15,
- },
- {
- "si_name": "Q11579",
- "symbol": "°F",
- "to_si": lambda val: (val + 459.67) * 5 / 9,
- "from_si": lambda val: (val * 9 / 5) - 459.67,
- },
- ]
- """Additional items to convert from a measure unit to a SI unit (vice versa).
- .. code:: python
- {
- "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
- "symbol": "°C", # symbol of the measure unit
- "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
- "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
- },
- {
- "si_name": "Q11573",
- "symbol": "mi",
- "to_si": 1609.344, # convert measure value (val) to SI unit
- "from_si": 1 / 1609.344 # convert SI value (val) measure unit
- },
- The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
- or a callable_ (val in / converted value returned).
- .. _callable: https://docs.python.org/3/glossary.html#term-callable
- """
- ALIAS_SYMBOLS = {
- '°C': ('C',),
- '°F': ('F',),
- 'mi': ('L',),
- }
- """Alias symbols for known unit of measure symbols / by example::
- '°C': ('C', ...), # list of alias symbols for °C (Q69362731)
- '°F': ('F', ...), # list of alias symbols for °F (Q99490479)
- 'mi': ('L',), # list of alias symbols for mi (Q253276)
- """
- SYMBOL_TO_SI = []
- UNITS_BY_SI_NAME: dict | None = None
- def convert_from_si(si_name: str, symbol: str, value: float | int) -> float:
- from_si = units_by_si_name(si_name)[symbol][symbol]["from_si"]
- if isinstance(from_si, (float, int)):
- value = float(value) * from_si
- else:
- value = from_si(float(value))
- return value
- def convert_to_si(si_name: str, symbol: str, value: float | int) -> float:
- to_si = units_by_si_name(si_name)[symbol][symbol]["to_si"]
- if isinstance(to_si, (float, int)):
- value = float(value) * to_si
- else:
- value = to_si(float(value))
- return value
- def units_by_si_name(si_name):
- global UNITS_BY_SI_NAME
- if UNITS_BY_SI_NAME is not None:
- return UNITS_BY_SI_NAME[si_name]
- UNITS_BY_SI_NAME = {}
- for item in symbol_to_si():
- by_symbol = UNITS_BY_SI_NAME.get(si_name)
- if by_symbol is None:
- by_symbol = {}
- UNITS_BY_SI_NAME[si_name] = by_symbol
- by_symbol[item["symbol"]] = item
- return UNITS_BY_SI_NAME[si_name]
- def symbol_to_si():
- """Generates a list of tuples, each tuple is a measure unit and the fields
- in the tuple are:
- 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
- 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
- 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
- multiplied by 1609.344)
- 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
- 100mi divided by 1609.344)
- The returned list is sorted, the first items are created from
- ``WIKIDATA_UNITS``, the second group of items is build from
- :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
- If you search this list for a symbol, then a match with a symbol from
- Wikidata has the highest weighting (first hit in the list), followed by the
- symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
- given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
- """
- global SYMBOL_TO_SI # pylint: disable=global-statement
- if SYMBOL_TO_SI:
- return SYMBOL_TO_SI
- # filter out units which can't be normalized to a SI unit and filter out
- # units without a symbol / arcsecond does not have a symbol
- # https://www.wikidata.org/wiki/Q829073
- for item in data.WIKIDATA_UNITS.values():
- if item['to_si_factor'] and item['symbol']:
- SYMBOL_TO_SI.append(
- (
- item['symbol'],
- item['si_name'],
- 1 / item['to_si_factor'], # from_si
- item['to_si_factor'], # to_si
- item['symbol'],
- )
- )
- for item in ADDITIONAL_UNITS:
- SYMBOL_TO_SI.append(
- (
- item['symbol'],
- item['si_name'],
- item['from_si'],
- item['to_si'],
- item['symbol'],
- )
- )
- alias_items = []
- for item in SYMBOL_TO_SI:
- for alias in ALIAS_SYMBOLS.get(item[0], ()):
- alias_items.append(
- (
- alias,
- item[1],
- item[2], # from_si
- item[3], # to_si
- item[0], # origin unit
- )
- )
- SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
- return SYMBOL_TO_SI
- # the response contains duplicate ?item with the different ?symbol
- # "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
- # even if a ?item has different ?symbol of the same rank.
- # A deterministic result
- # see:
- # * https://www.wikidata.org/wiki/Help:Ranking
- # * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
- # * https://w.wiki/32BT
- # * https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates#Quantities
- # see the result for https://www.wikidata.org/wiki/Q11582
- # there are multiple symbols the same rank
- SARQL_REQUEST = """
- SELECT DISTINCT ?item ?symbol ?tosi ?tosiUnit
- WHERE
- {
- ?item wdt:P31/wdt:P279 wd:Q47574 .
- ?item p:P5061 ?symbolP .
- ?symbolP ps:P5061 ?symbol ;
- wikibase:rank ?rank .
- OPTIONAL {
- ?item p:P2370 ?tosistmt .
- ?tosistmt psv:P2370 ?tosinode .
- ?tosinode wikibase:quantityAmount ?tosi .
- ?tosinode wikibase:quantityUnit ?tosiUnit .
- }
- FILTER(LANG(?symbol) = "en").
- }
- ORDER BY ?item DESC(?rank) ?symbol
- """
- def fetch_units():
- """Fetch units from Wikidata. Function is used to update persistence of
- :py:obj:`searx.data.WIKIDATA_UNITS`."""
- results = collections.OrderedDict()
- response = wikidata.send_wikidata_query(SARQL_REQUEST)
- for unit in response['results']['bindings']:
- symbol = unit['symbol']['value']
- name = unit['item']['value'].rsplit('/', 1)[1]
- si_name = unit.get('tosiUnit', {}).get('value', '')
- if si_name:
- si_name = si_name.rsplit('/', 1)[1]
- to_si_factor = unit.get('tosi', {}).get('value', '')
- if name not in results:
- # ignore duplicate: always use the first one
- results[name] = {
- 'symbol': symbol,
- 'si_name': si_name if si_name else None,
- 'to_si_factor': float(to_si_factor) if to_si_factor else None,
- }
- return results
|