Browse Source

[mod] comprehensive revision of the searxng_extra/update/ scripts

- pylint all scripts
- fix some errors reported by pyright
- from searx.data import data_dir (Path.open)
- fix import from pygments.formatters.html

NOTE: none functional changes!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
ce4aaf6cad

+ 5 - 9
searxng_extra/update/update_ahmia_blacklist.py

@@ -11,11 +11,10 @@ Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
 """
 # pylint: disable=use-dict-literal
 
-from os.path import join
-
 import requests
-from searx import searx_dir
+from searx.data import data_dir
 
+DATA_FILE = data_dir / 'ahmia_blacklist.txt'
 URL = 'https://ahmia.fi/blacklist/banned/'
 
 
@@ -23,15 +22,12 @@ def fetch_ahmia_blacklist():
     resp = requests.get(URL, timeout=3.0)
     if resp.status_code != 200:
         # pylint: disable=broad-exception-raised
-        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
+        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)  # type: ignore
     return resp.text.split()
 
 
-def get_ahmia_blacklist_filename():
-    return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
-
-
 if __name__ == '__main__':
     blacklist = fetch_ahmia_blacklist()
-    with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f:
+    blacklist.sort()
+    with DATA_FILE.open("w", encoding='utf-8') as f:
         f.write('\n'.join(blacklist))

+ 5 - 10
searxng_extra/update/update_currencies.py

@@ -15,12 +15,11 @@ import re
 import unicodedata
 import json
 
-# set path
-from os.path import join
-
-from searx import searx_dir
 from searx.locales import LOCALE_NAMES, locales_initialize
 from searx.engines import wikidata, set_loggers
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'currencies.json'
 
 set_loggers(wikidata, 'wikidata')
 locales_initialize()
@@ -133,10 +132,6 @@ def fetch_db():
     return db
 
 
-def get_filename():
-    return join(join(searx_dir, "data"), "currencies.json")
-
-
 def main():
 
     db = fetch_db()
@@ -156,8 +151,8 @@ def main():
         if len(db['names'][name]) == 1:
             db['names'][name] = db['names'][name][0]
 
-    with open(get_filename(), 'w', encoding='utf8') as f:
-        json.dump(db, f, ensure_ascii=False, indent=4)
+    with DATA_FILE.open('w', encoding='utf8') as f:
+        json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
 
 
 if __name__ == '__main__':

+ 5 - 2
searxng_extra/update/update_engine_descriptions.py

@@ -24,6 +24,9 @@ from searx import searx_dir
 from searx.utils import gen_useragent, detect_language
 import searx.search
 import searx.network
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'engine_descriptions.json'
 
 set_loggers(wikidata, 'wikidata')
 locales_initialize()
@@ -362,8 +365,8 @@ def main():
     fetch_website_descriptions()
 
     output = get_output()
-    with open(get_engine_descriptions_filename(), 'w', encoding='utf8') as f:
-        f.write(json.dumps(output, indent=1, separators=(',', ':'), ensure_ascii=False))
+    with DATA_FILE.open('w', encoding='utf8') as f:
+        f.write(json.dumps(output, indent=1, separators=(',', ':'), sort_keys=True, ensure_ascii=False))
 
 
 if __name__ == "__main__":

+ 3 - 3
searxng_extra/update/update_engine_traits.py

@@ -144,9 +144,9 @@ def write_languages_file(sxng_tag_list):
 
         item = (
             sxng_tag,
-            sxng_locale.get_language_name().title(),
+            sxng_locale.get_language_name().title(),  # type: ignore
             sxng_locale.get_territory_name() or '',
-            sxng_locale.english_name.split(' (')[0],
+            sxng_locale.english_name.split(' (')[0] if sxng_locale.english_name else '',
             UnicodeEscape(flag),
         )
 
@@ -154,7 +154,7 @@ def write_languages_file(sxng_tag_list):
 
     language_codes = tuple(language_codes)
 
-    with open(languages_file, 'w', encoding='utf-8') as new_file:
+    with languages_file.open('w', encoding='utf-8') as new_file:
         file_content = "{header} {language_codes}{footer}".format(
             header=languages_file_header,
             language_codes=pformat(language_codes, width=120, indent=4)[1:-1],

+ 4 - 7
searxng_extra/update/update_external_bangs.py

@@ -8,20 +8,17 @@ from :py:obj:`BANGS_URL`.
 
 """
 
-from pathlib import Path
 import json
-
 import httpx
 
-from searx import searx_dir
 from searx.external_bang import LEAF_KEY
+from searx.data import data_dir
 
+DATA_FILE = data_dir / 'external_bangs.json'
 
 BANGS_URL = 'https://duckduckgo.com/bang.js'
 """JSON file which contains the bangs."""
 
-BANGS_DATA_FILE = Path(searx_dir) / 'data' / 'external_bangs.json'
-
 HTTPS_COLON = 'https:'
 HTTP_COLON = 'http:'
 
@@ -36,8 +33,8 @@ def main():
         'version': 0,
         'trie': trie,
     }
-    with open(BANGS_DATA_FILE, 'w', encoding="utf8") as f:
-        json.dump(output, f, sort_keys=True, ensure_ascii=False, indent=4)
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(output, f, indent=4, sort_keys=True, ensure_ascii=False)
 
 
 def merge_when_no_leaf(node):

+ 6 - 9
searxng_extra/update/update_firefox_version.py

@@ -11,13 +11,14 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
 
 import json
 import re
-from os.path import join
 from urllib.parse import urlparse, urljoin
 from packaging.version import parse
 
 import requests
 from lxml import html
-from searx import searx_dir
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'useragents.json'
 
 URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
 RELEASE_PATH = '/pub/firefox/releases/'
@@ -41,7 +42,7 @@ def fetch_firefox_versions():
     resp = requests.get(URL, timeout=2.0)
     if resp.status_code != 200:
         # pylint: disable=broad-exception-raised
-        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
+        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)  # type: ignore
     dom = html.fromstring(resp.text)
     versions = []
 
@@ -74,11 +75,7 @@ def fetch_firefox_last_versions():
     return result
 
 
-def get_useragents_filename():
-    return join(join(searx_dir, "data"), "useragents.json")
-
-
 if __name__ == '__main__':
     useragents["versions"] = fetch_firefox_last_versions()
-    with open(get_useragents_filename(), "w", encoding='utf-8') as f:
-        json.dump(useragents, f, indent=4, ensure_ascii=False)
+    with DATA_FILE.open('w', encoding='utf-8') as f:
+        json.dump(useragents, f, indent=4, sort_keys=True, ensure_ascii=False)

+ 7 - 7
searxng_extra/update/update_locales.py

@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# lint: pylint
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Update locale names in :origin:`searx/data/locales.json` used by
 :ref:`searx.locales`
@@ -6,12 +7,12 @@
 - :py:obj:`searx.locales.RTL_LOCALES`
 - :py:obj:`searx.locales.LOCALE_NAMES`
 """
+# pylint: disable=invalid-name
 from __future__ import annotations
 
 from typing import Set
 import json
 from pathlib import Path
-import os
 
 import babel
 import babel.languages
@@ -61,7 +62,7 @@ def main():
         "RTL_LOCALES": sorted(RTL_LOCALES),
     }
 
-    with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
+    with LOCALE_DATA_FILE.open('w', encoding='utf-8') as f:
         json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
 
 
@@ -84,11 +85,10 @@ def get_locale_descr(locale: babel.Locale, tr_locale):
             return native_language
         return native_language + ' (' + english_language + ')'
 
-    else:
-        result = native_language + ', ' + native_territory + ' (' + english_language
-        if english_territory:
-            return result + ', ' + english_territory + ')'
-        return result + ')'
+    result = native_language + ', ' + native_territory + ' (' + english_language
+    if english_territory:
+        return result + ', ' + english_territory + ')'
+    return result + ')'
 
 
 def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:

+ 5 - 8
searxng_extra/update/update_osm_keys_tags.py

@@ -45,13 +45,14 @@ Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
 
 import json
 import collections
-from pathlib import Path
 
-from searx import searx_dir
 from searx.network import set_timeout_for_thread
 from searx.engines import wikidata, set_loggers
 from searx.sxng_locales import sxng_locales
 from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'osm_keys_tags.json'
 
 set_loggers(wikidata, 'wikidata')
 
@@ -203,10 +204,6 @@ def optimize_keys(data):
     return data
 
 
-def get_osm_tags_filename():
-    return Path(searx_dir) / "data" / "osm_keys_tags.json"
-
-
 if __name__ == '__main__':
 
     set_timeout_for_thread(60)
@@ -214,5 +211,5 @@ if __name__ == '__main__':
         'keys': optimize_keys(get_keys()),
         'tags': optimize_tags(get_tags()),
     }
-    with open(get_osm_tags_filename(), 'w', encoding="utf8") as f:
-        json.dump(result, f, indent=4, ensure_ascii=False, sort_keys=True)
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False)

+ 5 - 3
searxng_extra/update/update_pygments.py

@@ -1,14 +1,16 @@
 #!/usr/bin/env python
+# lint: pylint
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Update pygments style
 
 Call this script after each upgrade of pygments
 
 """
+# pylint: disable=too-few-public-methods
 
 from pathlib import Path
 import pygments
-from pygments.formatters import HtmlFormatter
+from pygments.formatters.html import HtmlFormatter
 
 from searx import searx_dir
 
@@ -41,7 +43,7 @@ END_DARK_THEME = """
 """
 
 
-class Formatter(HtmlFormatter):
+class Formatter(HtmlFormatter):  # pylint: disable=missing-class-docstring
     @property
     def _pre_style(self):
         return 'line-height: 100%;'
@@ -67,5 +69,5 @@ def generat_css(light_style, dark_style) -> str:
 
 if __name__ == '__main__':
     print("update: %s" % LESS_FILE)
-    with open(LESS_FILE, 'w') as f:
+    with LESS_FILE.open('w', encoding='utf8') as f:
         f.write(generat_css('default', 'lightbulb'))

+ 6 - 3
searxng_extra/update/update_wikidata_units.py

@@ -18,6 +18,9 @@ from os.path import join
 
 from searx import searx_dir
 from searx.engines import wikidata, set_loggers
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'wikidata_units.json'
 
 set_loggers(wikidata, 'wikidata')
 
@@ -58,9 +61,9 @@ def get_data():
 
 
 def get_wikidata_units_filename():
-    return join(join(searx_dir, "data"), "wikidata_units.json")
+    return join(join(searx_dir, "data"), "")
 
 
 if __name__ == '__main__':
-    with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
-        json.dump(get_data(), f, indent=4, ensure_ascii=False)
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)