Browse Source

[mod] comprehensive revision of the searxng_extra/update/ scripts

- pylint all scripts
- fix some errors reported by pyright
- from searx.data import data_dir (Path.open)
- fix import from pygments.formatters.html

NOTE: none functional changes!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
ce4aaf6cad

+ 5 - 9
searxng_extra/update/update_ahmia_blacklist.py

@@ -11,11 +11,10 @@ Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
 """
 """
 # pylint: disable=use-dict-literal
 # pylint: disable=use-dict-literal
 
 
-from os.path import join
-
 import requests
 import requests
-from searx import searx_dir
+from searx.data import data_dir
 
 
+DATA_FILE = data_dir / 'ahmia_blacklist.txt'
 URL = 'https://ahmia.fi/blacklist/banned/'
 URL = 'https://ahmia.fi/blacklist/banned/'
 
 
 
 
@@ -23,15 +22,12 @@ def fetch_ahmia_blacklist():
     resp = requests.get(URL, timeout=3.0)
     resp = requests.get(URL, timeout=3.0)
     if resp.status_code != 200:
     if resp.status_code != 200:
         # pylint: disable=broad-exception-raised
         # pylint: disable=broad-exception-raised
-        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
+        raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)  # type: ignore
     return resp.text.split()
     return resp.text.split()
 
 
 
 
-def get_ahmia_blacklist_filename():
-    return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
-
-
 if __name__ == '__main__':
 if __name__ == '__main__':
     blacklist = fetch_ahmia_blacklist()
     blacklist = fetch_ahmia_blacklist()
-    with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f:
+    blacklist.sort()
+    with DATA_FILE.open("w", encoding='utf-8') as f:
         f.write('\n'.join(blacklist))
         f.write('\n'.join(blacklist))

+ 5 - 10
searxng_extra/update/update_currencies.py

@@ -15,12 +15,11 @@ import re
 import unicodedata
 import unicodedata
 import json
 import json
 
 
-# set path
-from os.path import join
-
-from searx import searx_dir
 from searx.locales import LOCALE_NAMES, locales_initialize
 from searx.locales import LOCALE_NAMES, locales_initialize
 from searx.engines import wikidata, set_loggers
 from searx.engines import wikidata, set_loggers
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'currencies.json'
 
 
 set_loggers(wikidata, 'wikidata')
 set_loggers(wikidata, 'wikidata')
 locales_initialize()
 locales_initialize()
@@ -133,10 +132,6 @@ def fetch_db():
     return db
     return db
 
 
 
 
-def get_filename():
-    return join(join(searx_dir, "data"), "currencies.json")
-
-
 def main():
 def main():
 
 
     db = fetch_db()
     db = fetch_db()
@@ -156,8 +151,8 @@ def main():
         if len(db['names'][name]) == 1:
         if len(db['names'][name]) == 1:
             db['names'][name] = db['names'][name][0]
             db['names'][name] = db['names'][name][0]
 
 
-    with open(get_filename(), 'w', encoding='utf8') as f:
-        json.dump(db, f, ensure_ascii=False, indent=4)
+    with DATA_FILE.open('w', encoding='utf8') as f:
+        json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 5 - 2
searxng_extra/update/update_engine_descriptions.py

@@ -24,6 +24,9 @@ from searx import searx_dir
 from searx.utils import gen_useragent, detect_language
 from searx.utils import gen_useragent, detect_language
 import searx.search
 import searx.search
 import searx.network
 import searx.network
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'engine_descriptions.json'
 
 
 set_loggers(wikidata, 'wikidata')
 set_loggers(wikidata, 'wikidata')
 locales_initialize()
 locales_initialize()
@@ -362,8 +365,8 @@ def main():
     fetch_website_descriptions()
     fetch_website_descriptions()
 
 
     output = get_output()
     output = get_output()
-    with open(get_engine_descriptions_filename(), 'w', encoding='utf8') as f:
-        f.write(json.dumps(output, indent=1, separators=(',', ':'), ensure_ascii=False))
+    with DATA_FILE.open('w', encoding='utf8') as f:
+        f.write(json.dumps(output, indent=1, separators=(',', ':'), sort_keys=True, ensure_ascii=False))
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":

+ 3 - 3
searxng_extra/update/update_engine_traits.py

@@ -144,9 +144,9 @@ def write_languages_file(sxng_tag_list):
 
 
         item = (
         item = (
             sxng_tag,
             sxng_tag,
-            sxng_locale.get_language_name().title(),
+            sxng_locale.get_language_name().title(),  # type: ignore
             sxng_locale.get_territory_name() or '',
             sxng_locale.get_territory_name() or '',
-            sxng_locale.english_name.split(' (')[0],
+            sxng_locale.english_name.split(' (')[0] if sxng_locale.english_name else '',
             UnicodeEscape(flag),
             UnicodeEscape(flag),
         )
         )
 
 
@@ -154,7 +154,7 @@ def write_languages_file(sxng_tag_list):
 
 
     language_codes = tuple(language_codes)
     language_codes = tuple(language_codes)
 
 
-    with open(languages_file, 'w', encoding='utf-8') as new_file:
+    with languages_file.open('w', encoding='utf-8') as new_file:
         file_content = "{header} {language_codes}{footer}".format(
         file_content = "{header} {language_codes}{footer}".format(
             header=languages_file_header,
             header=languages_file_header,
             language_codes=pformat(language_codes, width=120, indent=4)[1:-1],
             language_codes=pformat(language_codes, width=120, indent=4)[1:-1],

+ 4 - 7
searxng_extra/update/update_external_bangs.py

@@ -8,20 +8,17 @@ from :py:obj:`BANGS_URL`.
 
 
 """
 """
 
 
-from pathlib import Path
 import json
 import json
-
 import httpx
 import httpx
 
 
-from searx import searx_dir
 from searx.external_bang import LEAF_KEY
 from searx.external_bang import LEAF_KEY
+from searx.data import data_dir
 
 
+DATA_FILE = data_dir / 'external_bangs.json'
 
 
 BANGS_URL = 'https://duckduckgo.com/bang.js'
 BANGS_URL = 'https://duckduckgo.com/bang.js'
 """JSON file which contains the bangs."""
 """JSON file which contains the bangs."""
 
 
-BANGS_DATA_FILE = Path(searx_dir) / 'data' / 'external_bangs.json'
-
 HTTPS_COLON = 'https:'
 HTTPS_COLON = 'https:'
 HTTP_COLON = 'http:'
 HTTP_COLON = 'http:'
 
 
@@ -36,8 +33,8 @@ def main():
         'version': 0,
         'version': 0,
         'trie': trie,
         'trie': trie,
     }
     }
-    with open(BANGS_DATA_FILE, 'w', encoding="utf8") as f:
-        json.dump(output, f, sort_keys=True, ensure_ascii=False, indent=4)
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(output, f, indent=4, sort_keys=True, ensure_ascii=False)
 
 
 
 
 def merge_when_no_leaf(node):
 def merge_when_no_leaf(node):

+ 6 - 9
searxng_extra/update/update_firefox_version.py

@@ -11,13 +11,14 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
 
 
 import json
 import json
 import re
 import re
-from os.path import join
 from urllib.parse import urlparse, urljoin
 from urllib.parse import urlparse, urljoin
 from packaging.version import parse
 from packaging.version import parse
 
 
 import requests
 import requests
 from lxml import html
 from lxml import html
-from searx import searx_dir
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'useragents.json'
 
 
 URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
 URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
 RELEASE_PATH = '/pub/firefox/releases/'
 RELEASE_PATH = '/pub/firefox/releases/'
@@ -41,7 +42,7 @@ def fetch_firefox_versions():
     resp = requests.get(URL, timeout=2.0)
     resp = requests.get(URL, timeout=2.0)
     if resp.status_code != 200:
     if resp.status_code != 200:
         # pylint: disable=broad-exception-raised
         # pylint: disable=broad-exception-raised
-        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
+        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)  # type: ignore
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
     versions = []
     versions = []
 
 
@@ -74,11 +75,7 @@ def fetch_firefox_last_versions():
     return result
     return result
 
 
 
 
-def get_useragents_filename():
-    return join(join(searx_dir, "data"), "useragents.json")
-
-
 if __name__ == '__main__':
 if __name__ == '__main__':
     useragents["versions"] = fetch_firefox_last_versions()
     useragents["versions"] = fetch_firefox_last_versions()
-    with open(get_useragents_filename(), "w", encoding='utf-8') as f:
-        json.dump(useragents, f, indent=4, ensure_ascii=False)
+    with DATA_FILE.open('w', encoding='utf-8') as f:
+        json.dump(useragents, f, indent=4, sort_keys=True, ensure_ascii=False)

+ 7 - 7
searxng_extra/update/update_locales.py

@@ -1,4 +1,5 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
+# lint: pylint
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Update locale names in :origin:`searx/data/locales.json` used by
 """Update locale names in :origin:`searx/data/locales.json` used by
 :ref:`searx.locales`
 :ref:`searx.locales`
@@ -6,12 +7,12 @@
 - :py:obj:`searx.locales.RTL_LOCALES`
 - :py:obj:`searx.locales.RTL_LOCALES`
 - :py:obj:`searx.locales.LOCALE_NAMES`
 - :py:obj:`searx.locales.LOCALE_NAMES`
 """
 """
+# pylint: disable=invalid-name
 from __future__ import annotations
 from __future__ import annotations
 
 
 from typing import Set
 from typing import Set
 import json
 import json
 from pathlib import Path
 from pathlib import Path
-import os
 
 
 import babel
 import babel
 import babel.languages
 import babel.languages
@@ -61,7 +62,7 @@ def main():
         "RTL_LOCALES": sorted(RTL_LOCALES),
         "RTL_LOCALES": sorted(RTL_LOCALES),
     }
     }
 
 
-    with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
+    with LOCALE_DATA_FILE.open('w', encoding='utf-8') as f:
         json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
         json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
 
 
 
 
@@ -84,11 +85,10 @@ def get_locale_descr(locale: babel.Locale, tr_locale):
             return native_language
             return native_language
         return native_language + ' (' + english_language + ')'
         return native_language + ' (' + english_language + ')'
 
 
-    else:
-        result = native_language + ', ' + native_territory + ' (' + english_language
-        if english_territory:
-            return result + ', ' + english_territory + ')'
-        return result + ')'
+    result = native_language + ', ' + native_territory + ' (' + english_language
+    if english_territory:
+        return result + ', ' + english_territory + ')'
+    return result + ')'
 
 
 
 
 def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:
 def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:

+ 5 - 8
searxng_extra/update/update_osm_keys_tags.py

@@ -45,13 +45,14 @@ Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
 
 
 import json
 import json
 import collections
 import collections
-from pathlib import Path
 
 
-from searx import searx_dir
 from searx.network import set_timeout_for_thread
 from searx.network import set_timeout_for_thread
 from searx.engines import wikidata, set_loggers
 from searx.engines import wikidata, set_loggers
 from searx.sxng_locales import sxng_locales
 from searx.sxng_locales import sxng_locales
 from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
 from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'osm_keys_tags.json'
 
 
 set_loggers(wikidata, 'wikidata')
 set_loggers(wikidata, 'wikidata')
 
 
@@ -203,10 +204,6 @@ def optimize_keys(data):
     return data
     return data
 
 
 
 
-def get_osm_tags_filename():
-    return Path(searx_dir) / "data" / "osm_keys_tags.json"
-
-
 if __name__ == '__main__':
 if __name__ == '__main__':
 
 
     set_timeout_for_thread(60)
     set_timeout_for_thread(60)
@@ -214,5 +211,5 @@ if __name__ == '__main__':
         'keys': optimize_keys(get_keys()),
         'keys': optimize_keys(get_keys()),
         'tags': optimize_tags(get_tags()),
         'tags': optimize_tags(get_tags()),
     }
     }
-    with open(get_osm_tags_filename(), 'w', encoding="utf8") as f:
-        json.dump(result, f, indent=4, ensure_ascii=False, sort_keys=True)
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False)

+ 5 - 3
searxng_extra/update/update_pygments.py

@@ -1,14 +1,16 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
+# lint: pylint
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Update pygments style
 """Update pygments style
 
 
 Call this script after each upgrade of pygments
 Call this script after each upgrade of pygments
 
 
 """
 """
+# pylint: disable=too-few-public-methods
 
 
 from pathlib import Path
 from pathlib import Path
 import pygments
 import pygments
-from pygments.formatters import HtmlFormatter
+from pygments.formatters.html import HtmlFormatter
 
 
 from searx import searx_dir
 from searx import searx_dir
 
 
@@ -41,7 +43,7 @@ END_DARK_THEME = """
 """
 """
 
 
 
 
-class Formatter(HtmlFormatter):
+class Formatter(HtmlFormatter):  # pylint: disable=missing-class-docstring
     @property
     @property
     def _pre_style(self):
     def _pre_style(self):
         return 'line-height: 100%;'
         return 'line-height: 100%;'
@@ -67,5 +69,5 @@ def generat_css(light_style, dark_style) -> str:
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     print("update: %s" % LESS_FILE)
     print("update: %s" % LESS_FILE)
-    with open(LESS_FILE, 'w') as f:
+    with LESS_FILE.open('w', encoding='utf8') as f:
         f.write(generat_css('default', 'lightbulb'))
         f.write(generat_css('default', 'lightbulb'))

+ 6 - 3
searxng_extra/update/update_wikidata_units.py

@@ -18,6 +18,9 @@ from os.path import join
 
 
 from searx import searx_dir
 from searx import searx_dir
 from searx.engines import wikidata, set_loggers
 from searx.engines import wikidata, set_loggers
+from searx.data import data_dir
+
+DATA_FILE = data_dir / 'wikidata_units.json'
 
 
 set_loggers(wikidata, 'wikidata')
 set_loggers(wikidata, 'wikidata')
 
 
@@ -58,9 +61,9 @@ def get_data():
 
 
 
 
 def get_wikidata_units_filename():
 def get_wikidata_units_filename():
-    return join(join(searx_dir, "data"), "wikidata_units.json")
+    return join(join(searx_dir, "data"), "")
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
-        json.dump(get_data(), f, indent=4, ensure_ascii=False)
+    with DATA_FILE.open('w', encoding="utf8") as f:
+        json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)