update_ahmia_blacklist.py 955 B

123456789101112131415161718192021222324252627282930313233343536
  1. #!/usr/bin/env python
  2. # SPDX-License-Identifier: AGPL-3.0-or-later
  3. """This script saves `Ahmia's blacklist`_ for onion sites.
  4. Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
  5. ... <.github/workflows/data-update.yml>`).
  6. .. _Ahmia's blacklist: https://ahmia.fi/blacklist/
  7. """
  8. from os.path import join
  9. import requests
  10. from searx import searx_dir
  11. URL = 'https://ahmia.fi/blacklist/banned/'
  12. def fetch_ahmia_blacklist():
  13. resp = requests.get(URL, timeout=3.0)
  14. if resp.status_code != 200:
  15. raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
  16. else:
  17. blacklist = resp.text.split()
  18. return blacklist
  19. def get_ahmia_blacklist_filename():
  20. return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
  21. if __name__ == '__main__':
  22. blacklist = fetch_ahmia_blacklist()
  23. with open(get_ahmia_blacklist_filename(), "w") as f:
  24. f.write('\n'.join(blacklist))