tracker_url_remover.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
  13. '''
  14. from flask_babel import gettext
  15. import re
  16. from urllib.parse import urlunparse, parse_qsl, urlencode
  17. regexes = {
  18. re.compile(r'utm_[^&]+'),
  19. re.compile(r'(wkey|wemail)[^&]*'),
  20. re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'),
  21. re.compile(r'&$'),
  22. }
  23. name = gettext('Tracker URL remover')
  24. description = gettext('Remove trackers arguments from the returned URL')
  25. default_on = True
  26. preference_section = 'privacy'
  27. def on_result(request, search, result):
  28. if 'parsed_url' not in result:
  29. return True
  30. query = result['parsed_url'].query
  31. if query == "":
  32. return True
  33. parsed_query = parse_qsl(query)
  34. changes = 0
  35. for i, (param_name, _) in enumerate(list(parsed_query)):
  36. for reg in regexes:
  37. if reg.match(param_name):
  38. parsed_query.pop(i - changes)
  39. changes += 1
  40. result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query))
  41. result['url'] = urlunparse(result['parsed_url'])
  42. break
  43. return True