update_tracker_patterns.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Fetch trackers"""
  3. import json
  4. import httpx
  5. from searx.data import data_dir
  6. DATA_FILE = data_dir / "tracker_patterns.json"
  7. CLEAR_LIST_URL = "https://raw.githubusercontent.com/ClearURLs/Rules/refs/heads/master/data.min.json"
  8. def fetch_clear_url_filters():
  9. resp = httpx.get(CLEAR_LIST_URL)
  10. if resp.status_code != 200:
  11. # pylint: disable=broad-exception-raised
  12. raise Exception(f"Error fetching ClearURL filter lists, HTTP code {resp.status_code}")
  13. providers = resp.json()["providers"]
  14. rules = []
  15. for rule in providers.values():
  16. rules.append(
  17. {
  18. "urlPattern": rule["urlPattern"].replace("\\\\", "\\"), # fix javascript regex syntax
  19. "exceptions": [exc.replace("\\\\", "\\") for exc in rule["exceptions"]],
  20. "trackerParams": rule["rules"],
  21. }
  22. )
  23. return rules
  24. if __name__ == '__main__':
  25. filter_list = fetch_clear_url_filters()
  26. with DATA_FILE.open("w", encoding='utf-8') as f:
  27. json.dump(filter_list, f, indent=4, sort_keys=True, ensure_ascii=False)