Browse Source

[enh] add hostname_replace plugin

* backport of https://github.com/searx/searx/pull/2724
* allow to remove result if the replacement is the boolean value false
Alexandre Flament 3 years ago
parent
commit
0f43b39eac
4 changed files with 49 additions and 5 deletions
  1. 4 4
      docs/dev/search_api.rst
  2. 2 0
      searx/plugins/__init__.py
  3. 32 0
      searx/plugins/hostname_replace.py
  4. 11 1
      searx/settings.yml

+ 4 - 4
docs/dev/search_api.rst

@@ -100,17 +100,17 @@ Parameters
   :default: ``HTTPS_rewrite``, ``Self_Informations``,
     ``Search_on_category_select``, ``Tracker_URL_remover``
 
-  :values: [ ``DOAI_rewrite``, ``HTTPS_rewrite``, ``Infinite_scroll``,
+  :values: ``DOAI_rewrite``, ``HTTPS_rewrite``, ``Infinite_scroll``,
     ``Vim-like_hotkeys``, ``Self_Informations``, ``Tracker_URL_remover``,
-    ``Search_on_category_select`` ]
+    ``Search_on_category_select``, ``Hostname_replace``
 
 ``disabled_plugins``: optional
   List of disabled plugins.
 
-  :default: ``DOAI_rewrite``, ``Infinite_scroll``, ``Vim-like_hotkeys``
+  :default: ``DOAI_rewrite``, ``Infinite_scroll``, ``Vim-like_hotkeys``, ``Hostname_replace``
   :values: ``DOAI_rewrite``, ``HTTPS_rewrite``, ``Infinite_scroll``,
     ``Vim-like_hotkeys``, ``Self_Informations``, ``Tracker_URL_remover``,
-    ``Search_on_category_select``
+    ``Search_on_category_select``, ``Hostname_replace``
 
 ``enabled_engines`` : optional : *all* :origin:`engines <searx/engines>`
   List of enabled engines.

+ 2 - 0
searx/plugins/__init__.py

@@ -31,6 +31,7 @@ from searx.plugins import (oa_doi_rewrite,
                            hash_plugin,
                            infinite_scroll,
                            self_info,
+                           hostname_replace,
                            search_on_category_select,
                            tracker_url_remover,
                            vim_hotkeys)
@@ -182,6 +183,7 @@ plugins.register(oa_doi_rewrite)
 plugins.register(hash_plugin)
 plugins.register(infinite_scroll)
 plugins.register(self_info)
+plugins.register(hostname_replace)
 plugins.register(search_on_category_select)
 plugins.register(tracker_url_remover)
 plugins.register(vim_hotkeys)

+ 32 - 0
searx/plugins/hostname_replace.py

@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import re
+from urllib.parse import urlunparse
+from searx import settings
+from searx.plugins import logger
+from flask_babel import gettext
+
+name = gettext('Hostname replace')
+description = gettext('Rewrite result hostnames or remove results based on the hostname')
+default_on = False
+preference_section = 'general'
+
+plugin_id = 'hostname_replace'
+
+replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if plugin_id in settings else {}
+
+logger = logger.getChild(plugin_id)
+parsed = 'parsed_url'
+
+
+def on_result(request, search, result):
+    if parsed not in result:
+        return True
+    for (pattern, replacement) in replacements.items():
+        if pattern.search(result[parsed].netloc):
+            if not replacement:
+                return False
+            result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
+            result['url'] = urlunparse(result[parsed])
+
+    return True

+ 11 - 1
searx/settings.yml

@@ -150,7 +150,17 @@ outgoing:
 #
 # enabled_plugins:
 #   - "HTTPS rewrite"
-#   - ...
+#   - "Hostname replace"  # see configuration below
+
+# "Hostname replace" plugin configuration example:
+# hostname_replace:
+#   '(.*\.)?youtube\.com$':           'invidious.example.com'
+#   '(.*\.)?youtu\.be$':              'invidious.example.com'
+#   '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com'
+#   '(.*\.)?reddit\.com$':            'teddit.example.com'
+#   '(.*\.)?redd\.it$':               'teddit.example.com'
+#   '(www\.)?twitter\.com$':          'nitter.example.com'
+#   'spam\.example\.com':             false  # remove results from spam.example.com
 
 checker:
   # disable checker when in debug mode