hostnames.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring
  3. import re
  4. from urllib.parse import urlunparse, urlparse
  5. from flask_babel import gettext
  6. from searx import settings
  7. from searx.plugins import logger
  8. name = gettext('Hostnames plugin')
  9. description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname')
  10. default_on = False
  11. preference_section = 'general'
  12. plugin_id = 'hostnames'
  13. replacements = {
  14. re.compile(p): r
  15. for (p, r) in (settings.get(plugin_id, {}).get('replace', settings.get('hostname_replace', {})).items())
  16. }
  17. removables = {re.compile(p) for p in settings[plugin_id].get('remove', [])}
  18. high_priority = {re.compile(p) for p in settings[plugin_id].get('high_priority', [])}
  19. low_priority = {re.compile(p) for p in settings[plugin_id].get('low_priority', [])}
  20. logger = logger.getChild(plugin_id)
  21. parsed = 'parsed_url'
  22. _url_fields = ['iframe_src', 'audio_src']
  23. def _matches_parsed_url(result, pattern):
  24. return parsed in result and pattern.search(result[parsed].netloc)
  25. def on_result(_request, _search, result):
  26. for pattern, replacement in replacements.items():
  27. if _matches_parsed_url(result, pattern):
  28. logger.debug(result['url'])
  29. result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
  30. result['url'] = urlunparse(result[parsed])
  31. logger.debug(result['url'])
  32. for url_field in _url_fields:
  33. if not result.get(url_field):
  34. continue
  35. url_src = urlparse(result[url_field])
  36. if pattern.search(url_src.netloc):
  37. url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc))
  38. result[url_field] = urlunparse(url_src)
  39. for pattern in removables:
  40. if _matches_parsed_url(result, pattern):
  41. return False
  42. for url_field in _url_fields:
  43. if not result.get(url_field):
  44. continue
  45. url_src = urlparse(result[url_field])
  46. if pattern.search(url_src.netloc):
  47. del result[url_field]
  48. for pattern in low_priority:
  49. if _matches_parsed_url(result, pattern):
  50. result['priority'] = 'low'
  51. for pattern in high_priority:
  52. if _matches_parsed_url(result, pattern):
  53. result['priority'] = 'high'
  54. return True