Browse Source

[docs] add documentation for the scripts in searxng_extra/update

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
ffea5d8ef5

+ 5 - 4
docs/dev/searxng_extra/index.rst

@@ -1,14 +1,15 @@
 .. _searxng_extra:
 
-======================================================
-Tooling box ``searxng_extra`` for developers and users
-======================================================
+=============================
+Tooling box ``searxng_extra``
+=============================
 
-In the folder :origin:`searxng_extra/` we maintain some tools useful for
+In the folder :origin:`searxng_extra/` we maintain some tools useful for CI and
 developers.
 
 .. toctree::
    :maxdepth: 2
    :caption: Contents
 
+   update
    standalone_searx.py

+ 88 - 0
docs/dev/searxng_extra/update.rst

@@ -0,0 +1,88 @@
+=========================
+``searxng_extra/update/``
+=========================
+
+:origin:`[source] <searxng_extra/update/__init__.py>`
+
+Scripts to update static data in :origin:`searx/data/`
+
+.. _update_ahmia_blacklist.py:
+
+``update_ahmia_blacklist.py``
+=============================
+
+:origin:`[source] <searxng_extra/update/update_ahmia_blacklist.py>`
+
+.. automodule:: searxng_extra.update.update_ahmia_blacklist
+  :members:
+
+
+``update_currencies.py``
+========================
+
+:origin:`[source] <searxng_extra/update/update_currencies.py>`
+
+.. automodule:: searxng_extra.update.update_currencies
+  :members:
+
+``update_engine_descriptions.py``
+=================================
+
+:origin:`[source] <searxng_extra/update/update_engine_descriptions.py>`
+
+.. automodule:: searxng_extra.update.update_engine_descriptions
+  :members:
+
+
+``update_external_bangs.py``
+============================
+
+:origin:`[source] <searxng_extra/update/update_external_bangs.py>`
+
+.. automodule:: searxng_extra.update.update_external_bangs
+  :members:
+
+
+``update_firefox_version.py``
+=============================
+
+:origin:`[source] <searxng_extra/update/update_firefox_version.py>`
+
+.. automodule:: searxng_extra.update.update_firefox_version
+  :members:
+
+
+``update_languages.py``
+=======================
+
+:origin:`[source] <searxng_extra/update/update_languages.py>`
+
+.. automodule:: searxng_extra.update.update_languages
+  :members:
+
+
+``update_osm_keys_tags.py``
+===========================
+
+:origin:`[source] <searxng_extra/update/update_osm_keys_tags.py>`
+
+.. automodule:: searxng_extra.update.update_osm_keys_tags
+  :members:
+
+
+``update_pygments.py``
+======================
+
+:origin:`[source] <searxng_extra/update/update_pygments.py>`
+
+.. automodule:: searxng_extra.update.update_pygments
+  :members:
+
+
+``update_wikidata_units.py``
+============================
+
+:origin:`[source] <searxng_extra/update/update_wikidata_units.py>`
+
+.. automodule:: searxng_extra.update.update_wikidata_units
+  :members:

+ 11 - 6
searxng_extra/update/update_ahmia_blacklist.py

@@ -1,10 +1,14 @@
 #!/usr/bin/env python
 # SPDX-License-Identifier: AGPL-3.0-or-later
+"""This script saves `Ahmia's blacklist`_ for onion sites.
 
-# This script saves Ahmia's blacklist for onion sites.
-# More info in https://ahmia.fi/blacklist/
+Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
+...  <.github/workflows/data-update.yml>`).
+
+.. _Ahmia's blacklist: https://ahmia.fi/blacklist/
+
+"""
 
-# set path
 from os.path import join
 
 import requests
@@ -26,6 +30,7 @@ def get_ahmia_blacklist_filename():
     return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
 
 
-blacklist = fetch_ahmia_blacklist()
-with open(get_ahmia_blacklist_filename(), "w") as f:
-    f.write('\n'.join(blacklist))
+if __name__ == '__main__':
+    blacklist = fetch_ahmia_blacklist()
+    with open(get_ahmia_blacklist_filename(), "w") as f:
+        f.write('\n'.join(blacklist))

+ 6 - 0
searxng_extra/update/update_currencies.py

@@ -1,6 +1,12 @@
 #!/usr/bin/env python
 # SPDX-License-Identifier: AGPL-3.0-or-later
 
+"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+"""
 import re
 import unicodedata
 import json

+ 7 - 0
searxng_extra/update/update_engine_descriptions.py

@@ -1,6 +1,13 @@
 #!/usr/bin/env python
 # SPDX-License-Identifier: AGPL-3.0-or-later
 
+"""Fetch website description from websites and from
+:origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/engine_descriptions.json`.
+
+"""
+
 import json
 from urllib.parse import urlparse
 from os.path import join

+ 8 - 5
searxng_extra/update/update_external_bangs.py

@@ -1,17 +1,20 @@
 #!/usr/bin/env python
 # lint: pylint
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
-Update searx/data/external_bangs.json using the duckduckgo bangs.
+"""Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs
+(:origin:`CI Update data ... <.github/workflows/data-update.yml>`).
+
+https://duckduckgo.com/newbang loads:
 
-https://duckduckgo.com/newbang loads
 * a javascript which provides the bang version ( https://duckduckgo.com/bv1.js )
 * a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example )
 
 This script loads the javascript, then the bangs.
 
-The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ),
-but most probably it will requires to update RE_BANG_VERSION
+The javascript URL may change in the future ( for example
+https://duckduckgo.com/bv2.js ), but most probably it will requires to update
+RE_BANG_VERSION
+
 """
 # pylint: disable=C0116
 

+ 11 - 3
searxng_extra/update/update_firefox_version.py

@@ -1,6 +1,13 @@
 #!/usr/bin/env python
 # SPDX-License-Identifier: AGPL-3.0-or-later
 
+"""Fetch firefox useragent signatures
+
+Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+"""
+
 import json
 import requests
 import re
@@ -66,6 +73,7 @@ def get_useragents_filename():
     return join(join(searx_dir, "data"), "useragents.json")
 
 
-useragents["versions"] = fetch_firefox_last_versions()
-with open(get_useragents_filename(), "w") as f:
-    json.dump(useragents, f, indent=4, ensure_ascii=False)
+if __name__ == '__main__':
+    useragents["versions"] = fetch_firefox_last_versions()
+    with open(get_useragents_filename(), "w", encoding='utf-8') as f:
+        json.dump(useragents, f, indent=4, ensure_ascii=False)

+ 7 - 3
searxng_extra/update/update_languages.py

@@ -1,9 +1,13 @@
 #!/usr/bin/env python
 # SPDX-License-Identifier: AGPL-3.0-or-later
+"""This script generates languages.py from intersecting each engine's supported
+languages.
 
-# This script generates languages.py from intersecting each engine's supported languages.
-#
-# Output files: searx/data/engines_languages.json and searx/languages.py
+Output files: :origin:`searx/data/engines_languages.json` and
+:origin:`searx/languages.py` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+"""
 
 import json
 from pathlib import Path

+ 4 - 1
searxng_extra/update/update_osm_keys_tags.py

@@ -5,7 +5,10 @@
 
 To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for
 example `OSM tags API`_ (sidenote: the actual change log from
-map.atownsend.org.uk_ might be useful to normalize OSM tags)
+map.atownsend.org.uk_ might be useful to normalize OSM tags).
+
+Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
 
 .. _Wikidata Query Service: https://query.wikidata.org/
 .. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc

+ 10 - 2
searxng_extra/update/update_wikidata_units.py

@@ -3,6 +3,13 @@
 # lint: pylint
 # pylint: disable=missing-module-docstring
 
+"""Fetch units from :origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data
+...  <.github/workflows/data-update.yml>`).
+
+"""
+
 import json
 import collections
 
@@ -54,5 +61,6 @@ def get_wikidata_units_filename():
     return join(join(searx_dir, "data"), "wikidata_units.json")
 
 
-with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
-    json.dump(get_data(), f, indent=4, ensure_ascii=False)
+if __name__ == '__main__':
+    with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
+        json.dump(get_data(), f, indent=4, ensure_ascii=False)