Browse Source

[fix] update wikidata units - remove URL prefix from Q-name

Sometimes the URL prefix switches from a http to a https, this patch harden the
code that removes the URL prefix from wikidata Q-name, issue has been reported
in [1].

[1] https://github.com/searxng/searxng/pull/3437#issuecomment-2082121730

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
11fe88bb40
2 changed files with 138 additions and 6 deletions
  1. 132 2
      searx/data/wikidata_units.json
  2. 6 4
      searxng_extra/update/update_wikidata_units.py

+ 132 - 2
searx/data/wikidata_units.json

@@ -1404,6 +1404,136 @@
         "symbol": "cm H₂O",
         "symbol": "cm H₂O",
         "to_si_factor": 98.0665
         "to_si_factor": 98.0665
     },
     },
+    "Q125387265": {
+        "si_name": "Q11574",
+        "symbol": "qs",
+        "to_si_factor": 1e-30
+    },
+    "Q125387281": {
+        "si_name": "Q11574",
+        "symbol": "rs",
+        "to_si_factor": 1e-27
+    },
+    "Q125389370": {
+        "si_name": "Q11579",
+        "symbol": "rK",
+        "to_si_factor": 1e-27
+    },
+    "Q125389387": {
+        "si_name": "Q11579",
+        "symbol": "qK",
+        "to_si_factor": 1e-30
+    },
+    "Q125389519": {
+        "si_name": "Q11579",
+        "symbol": "RK",
+        "to_si_factor": 1e+27
+    },
+    "Q125389534": {
+        "si_name": "Q11579",
+        "symbol": "QK",
+        "to_si_factor": 1e+30
+    },
+    "Q125390959": {
+        "si_name": "Q41509",
+        "symbol": "rmol",
+        "to_si_factor": 1e-27
+    },
+    "Q125390987": {
+        "si_name": "Q41509",
+        "symbol": "qmol",
+        "to_si_factor": 1e-30
+    },
+    "Q125392001": {
+        "si_name": "Q41509",
+        "symbol": "Rmol",
+        "to_si_factor": 1e+27
+    },
+    "Q125392014": {
+        "si_name": "Q41509",
+        "symbol": "Qmol",
+        "to_si_factor": 1e+30
+    },
+    "Q125470272": {
+        "si_name": "Q102573",
+        "symbol": "rBq",
+        "to_si_factor": 1e-27
+    },
+    "Q125470277": {
+        "si_name": "Q102573",
+        "symbol": "qBq",
+        "to_si_factor": 1e-30
+    },
+    "Q125470426": {
+        "si_name": "Q102573",
+        "symbol": "RBq",
+        "to_si_factor": 1e+27
+    },
+    "Q125470445": {
+        "si_name": "Q102573",
+        "symbol": "QBq",
+        "to_si_factor": 1e+30
+    },
+    "Q125470704": {
+        "si_name": "Q25406",
+        "symbol": "rC",
+        "to_si_factor": 1e-27
+    },
+    "Q125470716": {
+        "si_name": "Q25406",
+        "symbol": "qC",
+        "to_si_factor": 1e-30
+    },
+    "Q125471094": {
+        "si_name": "Q25406",
+        "symbol": "RC",
+        "to_si_factor": 1e+27
+    },
+    "Q125471109": {
+        "si_name": "Q25406",
+        "symbol": "QC",
+        "to_si_factor": 1e+30
+    },
+    "Q125471199": {
+        "si_name": null,
+        "symbol": "r°C",
+        "to_si_factor": null
+    },
+    "Q125471200": {
+        "si_name": null,
+        "symbol": "q°C",
+        "to_si_factor": null
+    },
+    "Q125471246": {
+        "si_name": null,
+        "symbol": "R°C",
+        "to_si_factor": null
+    },
+    "Q125471247": {
+        "si_name": null,
+        "symbol": "Q°C",
+        "to_si_factor": null
+    },
+    "Q125471334": {
+        "si_name": "Q131255",
+        "symbol": "rF",
+        "to_si_factor": 1e-27
+    },
+    "Q125471344": {
+        "si_name": "Q131255",
+        "symbol": "qF",
+        "to_si_factor": 1e-30
+    },
+    "Q125471409": {
+        "si_name": "Q131255",
+        "symbol": "RF",
+        "to_si_factor": 1e+27
+    },
+    "Q125471423": {
+        "si_name": "Q131255",
+        "symbol": "QF",
+        "to_si_factor": 1e+30
+    },
     "Q12714022": {
     "Q12714022": {
         "si_name": "Q11570",
         "si_name": "Q11570",
         "symbol": "cwt",
         "symbol": "cwt",
@@ -4506,7 +4636,7 @@
     },
     },
     "Q829073": {
     "Q829073": {
         "si_name": "Q33680",
         "si_name": "Q33680",
-        "symbol": null,
+        "symbol": "\"",
         "to_si_factor": 4.84813681109536e-06
         "to_si_factor": 4.84813681109536e-06
     },
     },
     "Q83216": {
     "Q83216": {
@@ -6274,4 +6404,4 @@
         "symbol": "m Hg",
         "symbol": "m Hg",
         "to_si_factor": 133322.0
         "to_si_factor": 133322.0
     }
     }
-}
+}

+ 6 - 4
searxng_extra/update/update_wikidata_units.py

@@ -51,16 +51,18 @@ WHERE
 ORDER BY ?item DESC(?rank) ?symbol
 ORDER BY ?item DESC(?rank) ?symbol
 """
 """
 
 
-_wikidata_url = "https://www.wikidata.org/entity/"
-
 
 
 def get_data():
 def get_data():
     results = collections.OrderedDict()
     results = collections.OrderedDict()
     response = wikidata.send_wikidata_query(SARQL_REQUEST)
     response = wikidata.send_wikidata_query(SARQL_REQUEST)
     for unit in response['results']['bindings']:
     for unit in response['results']['bindings']:
-        name = unit['item']['value'].replace(_wikidata_url, '')
+
         symbol = unit['symbol']['value']
         symbol = unit['symbol']['value']
-        si_name = unit.get('tosiUnit', {}).get('value', '').replace(_wikidata_url, '')
+        name = unit['item']['value'].rsplit('/', 1)[1]
+        si_name = unit.get('tosiUnit', {}).get('value', '')
+        if si_name:
+            si_name = si_name.rsplit('/', 1)[1]
+
         to_si_factor = unit.get('tosi', {}).get('value', '')
         to_si_factor = unit.get('tosi', {}).get('value', '')
         if name not in results:
         if name not in results:
             # ignore duplicate: always use the first one
             # ignore duplicate: always use the first one