Browse Source

[mod] improve unit converter plugin

- l10n support: parse and format decimal numbers by babel
- ability to add additional units
- improved unit detection (symbols are not unique)
- support for alias units (0,010C to F --> 32,018 °F)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
742303d030
2 changed files with 225 additions and 32 deletions
  1. 9 0
      docs/src/searx.plugins.unit_converter.rst
  2. 216 32
      searx/plugins/unit_converter.py

+ 9 - 0
docs/src/searx.plugins.unit_converter.rst

@@ -0,0 +1,9 @@
+.. _unit converter plugin:
+
+=====================
+Unit converter plugin
+=====================
+
+.. automodule:: searx.plugins.unit_converter
+   :members:
+

+ 216 - 32
searx/plugins/unit_converter.py

@@ -1,10 +1,29 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Calculate mathematical expressions using ack#eval
+"""A plugin for converting measured values from one unit to another unit (a
+unit converter).
+
+The plugin looks up the symbols (given in the query term) in a list of
+converters, each converter is one item in the list (compare
+:py:obj:`ADDITIONAL_UNITS`).  If the symbols are ambiguous, the matching units
+of measurement are evaluated.  The weighting in the evaluation results from the
+sorting of the :py:obj:`list of unit converters<symbol_to_si>`.
+
+Enable in ``settings.yml``:
+
+.. code:: yaml
+
+  enabled_plugins:
+    ..
+    - 'Unit converter plugin'
+
 """
 """
 
 
-from flask_babel import gettext
+import re
+import babel.numbers
+from flask_babel import gettext, get_locale
+
+from searx import data
 
 
-from searx.data import WIKIDATA_UNITS
 
 
 name = "Unit converter plugin"
 name = "Unit converter plugin"
 description = gettext("Convert between units")
 description = gettext("Convert between units")
@@ -12,47 +31,212 @@ default_on = True
 
 
 CONVERT_KEYWORDS = ["in", "to", "as"]
 CONVERT_KEYWORDS = ["in", "to", "as"]
 
 
+# inspired from https://stackoverflow.com/a/42475086
+RE_MEASURE = r'''
+(?P<sign>[-+]?)         # +/- or nothing for positive
+(\s*)                   # separator: white space or nothing
+(?P<number>[\d\.,]*)    # number: 1,000.00 (en) or 1.000,00 (de)
+(?P<E>[eE][-+]?\d+)?    # scientific notation: e(+/-)2 (*10^2)
+(\s*)                   # separator: white space or nothing
+(?P<unit>\S+)           # unit of measure
+'''
+
+
+ADDITIONAL_UNITS = [
+    {
+        "si_name": "Q11579",
+        "symbol": "°C",
+        "to_si": lambda val: val + 273.15,
+        "from_si": lambda val: val - 273.15,
+    },
+    {
+        "si_name": "Q11579",
+        "symbol": "°F",
+        "to_si": lambda val: (val + 459.67) * 5 / 9,
+        "from_si": lambda val: (val * 9 / 5) - 459.67,
+    },
+]
+"""Additional items to convert from a measure unit to a SI unit (vice versa).
+
+.. code:: python
+
+    {
+        "si_name": "Q11579",                 # Wikidata item ID of the SI unit (Kelvin)
+        "symbol": "°C",                      # symbol of the measure unit
+        "to_si": lambda val: val + 273.15,   # convert measure value (val) to SI unit
+        "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
+    },
+    {
+        "si_name": "Q11573",
+        "symbol": "mi",
+        "to_si": 1609.344,                   # convert measure value (val) to SI unit
+        "from_si": 1 / 1609.344              # convert SI value (val) measure unit
+    },
+
+The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
+or a callable_ (val in / converted value returned).
+
+.. _callable: https://docs.python.org/3/glossary.html#term-callable
+"""
+
+
+ALIAS_SYMBOLS = {
+    '°C': ('C',),
+    '°F': ('F',),
+    'mi': ('L',),
+}
+"""Alias symbols for known unit of measure symbols / by example::
+
+    '°C': ('C', ...),  # list of alias symbols for °C (Q69362731)
+    '°F': ('F', ...),  # list of alias symbols for °F (Q99490479)
+    'mi': ('L',),      # list of alias symbols for mi (Q253276)
+"""
+
+
+SYMBOL_TO_SI = []
+
+
+def symbol_to_si():
+    """Generates a list of tuples, each tuple is a measure unit and the fields
+    in the tuple are:
+
+    0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
+
+    1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
 
 
-def _convert(from_value, source_si_factor, target_si_factor):
-    return from_value * source_si_factor / target_si_factor
+    2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
+       multiplied by 1609.344)
 
 
+    3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
+       100mi divided by 1609.344)
 
 
-def _parse_text_and_convert(search, splitted_query):
-    if len(splitted_query) != 2 or splitted_query[0].strip() == "" or splitted_query[1].strip() == "":
+    The returned list is sorted, the first items are created from
+    ``WIKIDATA_UNITS``, the second group of items is build from
+    :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
+
+    If you search this list for a symbol, then a match with a symbol from
+    Wikidata has the highest weighting (first hit in the list), followed by the
+    symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
+    given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
+
+    """
+
+    global SYMBOL_TO_SI  # pylint: disable=global-statement
+    if SYMBOL_TO_SI:
+        return SYMBOL_TO_SI
+
+    # filter out units which can't be normalized to a SI unit and filter out
+    # units without a symbol / arcsecond does not have a symbol
+    # https://www.wikidata.org/wiki/Q829073
+
+    for item in data.WIKIDATA_UNITS.values():
+        if item['to_si_factor'] and item['symbol']:
+            SYMBOL_TO_SI.append(
+                (
+                    item['symbol'],
+                    item['si_name'],
+                    item['to_si_factor'],  # from_si
+                    1 / item['to_si_factor'],  # to_si
+                    item['symbol'],
+                )
+            )
+
+    for item in ADDITIONAL_UNITS:
+        SYMBOL_TO_SI.append(
+            (
+                item['symbol'],
+                item['si_name'],
+                item['from_si'],
+                item['to_si'],
+                item['symbol'],
+            )
+        )
+
+    alias_items = []
+    for item in SYMBOL_TO_SI:
+        for alias in ALIAS_SYMBOLS.get(item[0], ()):
+            alias_items.append(
+                (
+                    alias,
+                    item[1],
+                    item[2],  # from_si
+                    item[3],  # to_si
+                    item[0],  # origin unit
+                )
+            )
+    SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
+    return SYMBOL_TO_SI
+
+
+def _parse_text_and_convert(search, from_query, to_query):
+
+    # pylint: disable=too-many-branches, too-many-locals
+
+    if not (from_query and to_query):
         return
         return
 
 
-    from_value = ""
-    from_unit_key = ""
+    measured = re.match(RE_MEASURE, from_query, re.VERBOSE)
+    if not (measured and measured.group('number'), measured.group('unit')):
+        return
 
 
-    # only parse digits as value that belong together
-    read_alpha = False
-    for c in splitted_query[0]:
-        if not read_alpha and (c in ("-", ".") or str.isdigit(c)):
-            from_value += c
-            read_alpha = True
-        elif c != " ":
-            from_unit_key += c
+    # Symbols are not unique, if there are several hits for the from-unit, then
+    # the correct one must be determined by comparing it with the to-unit
+    # https://github.com/searxng/searxng/pull/3378#issuecomment-2080974863
 
 
-    to_unit_key = splitted_query[1].strip()
+    # first: collecting possible units
 
 
-    from_unit = None
-    to_unit = None
+    source_list, target_list = [], []
 
 
-    for unit in WIKIDATA_UNITS.values():
-        if unit['symbol'] == from_unit_key:
-            from_unit = unit
+    for symbol, si_name, from_si, to_si, orig_symbol in symbol_to_si():
 
 
-        if unit['symbol'] == to_unit_key:
-            to_unit = unit
+        if symbol == measured.group('unit'):
+            source_list.append((si_name, to_si))
+        if symbol == to_query:
+            target_list.append((si_name, from_si, orig_symbol))
 
 
-        if from_unit and to_unit:
-            break
+    if not (source_list and target_list):
+        return
+
+    source_to_si = target_from_si = target_symbol = None
+
+    # second: find the right unit by comparing list of from-units with list of to-units
 
 
-    if from_unit is None or to_unit is None or to_unit.get('si_name') != from_unit.get('si_name'):
+    for source in source_list:
+        for target in target_list:
+            if source[0] == target[0]:  # compare si_name
+                source_to_si = source[1]
+                target_from_si = target[1]
+                target_symbol = target[2]
+
+    if not (source_to_si and target_from_si):
         return
         return
 
 
-    result = _convert(float(from_value), from_unit['to_si_factor'], to_unit['to_si_factor'])
-    search.result_container.answers['conversion'] = {'answer': f"{result:g} {to_unit['symbol']}"}
+    _locale = get_locale() or 'en_US'
+
+    value = measured.group('sign') + measured.group('number') + (measured.group('E') or '')
+    value = babel.numbers.parse_decimal(value, locale=_locale)
+
+    # convert value to SI unit
+
+    if isinstance(source_to_si, (float, int)):
+        value = float(value) * source_to_si
+    else:
+        value = source_to_si(float(value))
+
+    # convert value from SI unit to target unit
+
+    if isinstance(target_from_si, (float, int)):
+        value = float(value) * target_from_si
+    else:
+        value = target_from_si(float(value))
+
+    if measured.group('E'):
+        # when incomming notation is scientific, outgoing notation is scientific
+        result = babel.numbers.format_scientific(value, locale=_locale)
+    else:
+        result = babel.numbers.format_decimal(value, locale=_locale, format='#,##0.##########;-#')
+
+    search.result_container.answers['conversion'] = {'answer': f'{result} {target_symbol}'}
 
 
 
 
 def post_search(_request, search):
 def post_search(_request, search):
@@ -69,8 +253,8 @@ def post_search(_request, search):
     for query_part in query_parts:
     for query_part in query_parts:
         for keyword in CONVERT_KEYWORDS:
         for keyword in CONVERT_KEYWORDS:
             if query_part == keyword:
             if query_part == keyword:
-                keyword_split = query.split(keyword, 1)
-                _parse_text_and_convert(search, keyword_split)
+                from_query, to_query = query.split(keyword, 1)
+                _parse_text_and_convert(search, from_query.strip(), to_query.strip())
                 return True
                 return True
 
 
     return True
     return True