Browse Source

Merge branch 'remove_trackers_plugin' of https://github.com/Cqoicebordel/searx into Cqoicebordel-remove_trackers_plugin

Conflicts:
	searx/plugins/__init__.py
Adam Tauber 10 years ago
parent
commit
6424a7702d
2 changed files with 47 additions and 1 deletions
  1. 3 1
      searx/plugins/__init__.py
  2. 44 0
      searx/plugins/tracker_url_remover.py

+ 3 - 1
searx/plugins/__init__.py

@@ -21,7 +21,8 @@ logger = logger.getChild('plugins')
 
 
 from searx.plugins import (https_rewrite,
 from searx.plugins import (https_rewrite,
                            self_info,
                            self_info,
-                           search_on_category_select)
+                           search_on_category_select,
+                           tracker_url_remover)
 
 
 required_attrs = (('name', str),
 required_attrs = (('name', str),
                   ('description', str),
                   ('description', str),
@@ -73,3 +74,4 @@ plugins = PluginStore()
 plugins.register(https_rewrite)
 plugins.register(https_rewrite)
 plugins.register(self_info)
 plugins.register(self_info)
 plugins.register(search_on_category_select)
 plugins.register(search_on_category_select)
+plugins.register(tracker_url_remover)

+ 44 - 0
searx/plugins/tracker_url_remover.py

@@ -0,0 +1,44 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
+'''
+
+from flask.ext.babel import gettext
+import re
+from urlparse import urlunparse
+
+regexes = {re.compile(r'utm_[^&]+&?'),
+           re.compile(r'(wkey|wemail)[^&]+&?'),
+           re.compile(r'&$')}
+
+name = gettext('Tracker URL remover')
+description = gettext('Remove trackers arguments from the returned URL')
+default_on = True
+
+
+def on_result(request, ctx):
+    query = ctx['result']['parsed_url'].query
+
+    if query == "":
+        return True
+
+    for reg in regexes:
+        query = reg.sub('', query)
+
+    if query != ctx['result']['parsed_url'].query:
+        ctx['result']['parsed_url'] = ctx['result']['parsed_url']._replace(query=query)
+        ctx['result']['url'] = urlunparse(ctx['result']['parsed_url'])
+
+    return True