hostname_replace.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring
  3. import re
  4. from urllib.parse import urlunparse, urlparse
  5. from flask_babel import gettext
  6. from searx import settings
  7. from searx.plugins import logger
  8. name = gettext('Hostname replace')
  9. description = gettext('Rewrite result hostnames or remove results based on the hostname')
  10. default_on = False
  11. preference_section = 'general'
  12. plugin_id = 'hostname_replace'
  13. replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if plugin_id in settings else {}
  14. logger = logger.getChild(plugin_id)
  15. parsed = 'parsed_url'
  16. _url_fields = ['iframe_src', 'audio_src']
  17. def on_result(_request, _search, result):
  18. for pattern, replacement in replacements.items():
  19. if parsed in result:
  20. if pattern.search(result[parsed].netloc):
  21. # to keep or remove this result from the result list depends
  22. # (only) on the 'parsed_url'
  23. if not replacement:
  24. return False
  25. result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
  26. result['url'] = urlunparse(result[parsed])
  27. for url_field in _url_fields:
  28. if result.get(url_field):
  29. url_src = urlparse(result[url_field])
  30. if pattern.search(url_src.netloc):
  31. if not replacement:
  32. del result[url_field]
  33. else:
  34. url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc))
  35. result[url_field] = urlunparse(url_src)
  36. return True