tracker_url_remover.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring
  3. from __future__ import annotations
  4. import typing
  5. import re
  6. from urllib.parse import urlunparse, parse_qsl, urlencode
  7. from flask_babel import gettext
  8. from searx.plugins import Plugin, PluginInfo
  9. if typing.TYPE_CHECKING:
  10. from searx.search import SearchWithPlugins
  11. from searx.extended_types import SXNG_Request
  12. from searx.result_types import Result
  13. from searx.plugins import PluginCfg
  14. regexes = {
  15. re.compile(r'utm_[^&]+'),
  16. re.compile(r'(wkey|wemail)[^&]*'),
  17. re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'),
  18. re.compile(r'&$'),
  19. }
  20. class SXNGPlugin(Plugin):
  21. """Remove trackers arguments from the returned URL"""
  22. id = "tracker_url_remover"
  23. def __init__(self, plg_cfg: "PluginCfg") -> None:
  24. super().__init__(plg_cfg)
  25. self.info = PluginInfo(
  26. id=self.id,
  27. name=gettext("Tracker URL remover"),
  28. description=gettext("Remove trackers arguments from the returned URL"),
  29. preference_section="privacy",
  30. )
  31. def on_result(
  32. self, request: "SXNG_Request", search: "SearchWithPlugins", result: Result
  33. ) -> bool: # pylint: disable=unused-argument
  34. if not result.parsed_url:
  35. return True
  36. parsed_query: list[tuple[str, str]] = parse_qsl(result.parsed_url.query)
  37. for name_value in list(parsed_query):
  38. param_name = name_value[0]
  39. for reg in regexes:
  40. if reg.match(param_name):
  41. parsed_query.remove(name_value)
  42. result.parsed_url = result.parsed_url._replace(query=urlencode(parsed_query))
  43. result.url = urlunparse(result.parsed_url)
  44. break
  45. return True