commit: 617495cca8b2799945be2c2b042dcc7ce905741a
parent 4e28f12bfcc73b47f382bceedb5b65ee314674ae
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Tue, 9 Jun 2015 16:16:07 +0200
Add a plugin to remove trackers from results URLs
Diffstat:
2 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py
@@ -21,7 +21,8 @@ logger = logger.getChild('plugins')
from searx.plugins import (https_rewrite,
self_ip,
- search_on_category_select)
+ search_on_category_select,
+ tracker_url_remover)
required_attrs = (('name', str),
('description', str),
@@ -73,3 +74,4 @@ plugins = PluginStore()
plugins.register(https_rewrite)
plugins.register(self_ip)
plugins.register(search_on_category_select)
+plugins.register(tracker_url_remover)
diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py
@@ -0,0 +1,40 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
+'''
+
+from flask.ext.babel import gettext
+import re
+
+re1 = re.compile(r'utm_[^&]+&?')
+re2 = re.compile(r'(wkey|wemail)[^&]+&?')
+re3 = re.compile(r'&$')
+re4 = re.compile(r'^\?$')
+
+name = gettext('Tracker URL remover')
+description = gettext('Remove trackers arguments from the returned URL')
+default_on = True
+
+
+def on_result(request, ctx):
+ url = ctx['result']['url']
+
+ url = re1.sub('', url)
+ url = re2.sub('', url)
+ url = re3.sub('', url)
+ url = re4.sub('', url)
+
+ ctx['result']['url'] = url
+ return True