logo

searx

Unnamed repository; edit this file 'description' to name the repository.
commit: f21070989ac539931029e16ffb97f526544e88f8
parent: ff322d3373c6de934a5a47c12effce2d336d2c6d
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sun, 21 Jun 2015 16:56:35 +0200

Merge branch 'Cqoicebordel-remove_trackers_plugin'

Diffstat:

searx/plugins/__init__.py | 4+++-
searx/plugins/tracker_url_remover.py | 44++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py @@ -21,7 +21,8 @@ logger = logger.getChild('plugins') from searx.plugins import (https_rewrite, self_info, - search_on_category_select) + search_on_category_select, + tracker_url_remover) required_attrs = (('name', str), ('description', str), @@ -73,3 +74,4 @@ plugins = PluginStore() plugins.register(https_rewrite) plugins.register(self_info) plugins.register(search_on_category_select) +plugins.register(tracker_url_remover) diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py @@ -0,0 +1,44 @@ +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2015 by Adam Tauber, <asciimoo@gmail.com> +''' + +from flask.ext.babel import gettext +import re +from urlparse import urlunparse + +regexes = {re.compile(r'utm_[^&]+&?'), + re.compile(r'(wkey|wemail)[^&]+&?'), + re.compile(r'&$')} + +name = gettext('Tracker URL remover') +description = gettext('Remove trackers arguments from the returned URL') +default_on = True + + +def on_result(request, ctx): + query = ctx['result']['parsed_url'].query + + if query == "": + return True + + for reg in regexes: + query = reg.sub('', query) + + if query != ctx['result']['parsed_url'].query: + ctx['result']['parsed_url'] = ctx['result']['parsed_url']._replace(query=query) + ctx['result']['url'] = urlunparse(ctx['result']['parsed_url']) + + return True