logo

searx

Unnamed repository; edit this file 'description' to name the repository.
commit: 20400c40c34b6122621476c46460c5a3a8624c89
parent: 840945f498cd07d38cb198cc0735b6445f44802c
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sun, 19 Oct 2014 12:06:34 +0200

Merge pull request #97 from pointhi/https

Implementing https rewrite support

Diffstat:

searx/__init__.py | 10++++++++++
searx/https_rewrite.py | 143++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
searx/https_rules/00README | 17+++++++++++++++++
searx/https_rules/Bing.xml | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/Dailymotion.xml | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/Deviantart.xml | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/DuckDuckGo.xml | 38++++++++++++++++++++++++++++++++++++++
searx/https_rules/Flickr.xml | 44++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/Github-Pages.xml | 11+++++++++++
searx/https_rules/Github.xml | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/Google-mismatches.xml | 26++++++++++++++++++++++++++
searx/https_rules/Google.org.xml | 15+++++++++++++++
searx/https_rules/GoogleAPIs.xml | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/GoogleCanada.xml | 6++++++
searx/https_rules/GoogleImages.xml | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/GoogleMainSearch.xml | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/GoogleMaps.xml | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/GoogleMelange.xml | 6++++++
searx/https_rules/GoogleSearch.xml | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/GoogleServices.xml | 345+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/GoogleShopping.xml | 28++++++++++++++++++++++++++++
searx/https_rules/GoogleSorry.xml | 7+++++++
searx/https_rules/GoogleTranslate.xml | 8++++++++
searx/https_rules/GoogleVideos.xml | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/GoogleWatchBlog.xml | 18++++++++++++++++++
searx/https_rules/Google_App_Engine.xml | 22++++++++++++++++++++++
searx/https_rules/Googleplex.com.xml | 16++++++++++++++++
searx/https_rules/OpenStreetMap.xml | 15+++++++++++++++
searx/https_rules/Rawgithub.com.xml | 14++++++++++++++
searx/https_rules/Soundcloud.xml | 101+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/ThePirateBay.xml | 36++++++++++++++++++++++++++++++++++++
searx/https_rules/Torproject.xml | 18++++++++++++++++++
searx/https_rules/Twitter.xml | 169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/Vimeo.xml | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/WikiLeaks.xml | 14++++++++++++++
searx/https_rules/Wikimedia.xml | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/Yahoo.xml | 2450+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/https_rules/YouTube.xml | 46++++++++++++++++++++++++++++++++++++++++++++++
searx/settings_robot.yml | 3+++
searx/webapp.py | 57++++++++++++++++++++++++++++++++++++++++++++++++++++-----
40 files changed, 4695 insertions(+), 13 deletions(-)

diff --git a/searx/__init__.py b/searx/__init__.py @@ -17,6 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from os import environ from os.path import realpath, dirname, join, abspath +from searx.https_rewrite import load_https_rules try: from yaml import load except: @@ -34,7 +35,16 @@ if 'SEARX_SETTINGS_PATH' in environ: else: settings_path = join(searx_dir, 'settings.yml') +if 'SEARX_HTTPS_REWRITE_PATH' in environ: + https_rewrite_path = environ['SEARX_HTTPS_REWRITE_PATH'] +else: + https_rewrite_path = join(searx_dir, 'https_rules') # load settings with open(settings_path) as settings_yaml: settings = load(settings_yaml) + +# load https rules only if https rewrite is enabled +if settings.get('server', {}).get('https_rewrite'): + # loade https rules + load_https_rules(https_rewrite_path) diff --git a/searx/https_rewrite.py b/searx/https_rewrite.py @@ -1,14 +1,141 @@ +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2013- by Adam Tauber, <asciimoo@gmail.com> +''' + import re +from lxml import etree +from os import listdir +from os.path import isfile, join + # https://gitweb.torproject.org/\ # pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules # HTTPS rewrite rules -https_rules = ( - # from - (re.compile(r'^http://(www\.|m\.|)?xkcd\.(?:com|org)/', re.I | re.U), - # to - r'https://\1xkcd.com/'), - (re.compile(r'^https?://(?:ssl)?imgs\.xkcd\.com/', re.I | re.U), - r'https://sslimgs.xkcd.com/'), -) +https_rules = [] + + +# load single ruleset from a xml file +def load_single_https_ruleset(filepath): + ruleset = () + + # init parser + parser = etree.XMLParser() + + # load and parse xml-file + try: + tree = etree.parse(filepath, parser) + except: + # TODO, error message + return () + + # get root node + root = tree.getroot() + + #print(etree.tostring(tree)) + + # check if root is a node with the name ruleset + # TODO improve parsing + if root.tag != 'ruleset': + return () + + # check if rule is deactivated by default + if root.attrib.get('default_off'): + return () + + # check if rule does only work for specific platforms + if root.attrib.get('platform'): + return () + + hosts = [] + rules = [] + exclusions = [] + + # parse childs from ruleset + for ruleset in root: + # this child define a target + if ruleset.tag == 'target': + # check if required tags available + if not ruleset.attrib.get('host'): + continue + + # convert host-rule to valid regex + host = ruleset.attrib.get('host').replace('.', '\.').replace('*', '.*') + + # append to host list + hosts.append(host) + + # this child define a rule + elif ruleset.tag == 'rule': + # check if required tags available + if not ruleset.attrib.get('from')\ + or not ruleset.attrib.get('to'): + continue + + # TODO hack, which convert a javascript regex group into a valid python regex group + rule_from = ruleset.attrib.get('from').replace('$', '\\') + rule_to = ruleset.attrib.get('to').replace('$', '\\') + + # TODO, not working yet because of the hack above, currently doing that in webapp.py + #rule_from_rgx = re.compile(rule_from, re.I) + + # append rule + rules.append((rule_from, rule_to)) + + # this child define an exclusion + elif ruleset.tag == 'exclusion': + # check if required tags available + if not ruleset.attrib.get('pattern'): + continue + + exclusion_rgx = re.compile(ruleset.attrib.get('pattern')) + + # append exclusion + exclusions.append(exclusion_rgx) + + # convert list of possible hosts to a simple regex + # TODO compress regex to improve performance + try: + target_hosts = re.compile('^(' + '|'.join(hosts) + ')', re.I | re.U) + except: + return () + + # return ruleset + return (target_hosts, rules, exclusions) + + +# load all https rewrite rules +def load_https_rules(rules_path): + # add / to path if not set yet + if rules_path[-1:] != '/': + rules_path += '/' + + # search all xml files which are stored in the https rule directory + xml_files = [ join(rules_path,f) for f in listdir(rules_path) if isfile(join(rules_path,f)) and f[-4:] == '.xml' ] + + # load xml-files + for ruleset_file in xml_files: + # calculate rewrite-rules + ruleset = load_single_https_ruleset(ruleset_file) + + # skip if no ruleset returned + if not ruleset: + continue + + # append ruleset + https_rules.append(ruleset) + + print(' * {n} https-rules loaded'.format(n=len(https_rules))) diff --git a/searx/https_rules/00README b/searx/https_rules/00README @@ -0,0 +1,17 @@ +<!-- +This directory contains web site rewriting rules for the +HTTPS Everywhere software, available from +https://www.eff.org/https-everywhere + +These rules were contributed to the project by users and aim to +enable routine secure access to as many different web sites as +possible. They are automatically installed together with the +HTTPS Everywhere software. The presence of these rules does not +mean that an HTTPS Everywhere user accessed, or intended to +access, any particular web site. + +For information about how to create additional HTTPS Everywhere +rewriting rules to add support for new sites, please see + +https://www.eff.org/https-everywhere/rulesets +--> diff --git a/searx/https_rules/Bing.xml b/searx/https_rules/Bing.xml @@ -0,0 +1,56 @@ +<!-- + For other Microsoft coverage, see Microsoft.xml. + + + CDN buckets: + + - a134.lm.akamai.net + + - akam.bing.com + - *.mm.bing.net + + + Nonfunctional domains: + + - m2.cn.bing.com + - origin.bj1.bing.com + - blogs.bing.com + + + Fully covered domains: + + - bing.com subdomains: + + - (www.) + - c.bing (tracking beacons) + - cn.bing + - h.bing + - ssl + - testfamilysafety.bing + - udc.bing + - (www.)bing + + - *.mm.bing.net + - api.bing.com + +--> +<ruleset name="Bing"> + + <target host="bing.com" /> + <target host="*.bing.com" /> + <target host="*.mm.bing.net" /> + + + <securecookie host=".*\.bing\.com$" name=".+" /> + + + <rule from="^http://((?:c|cn|h|ssl|testfamilysafety|udc|www)\.)?bing\.com/" + to="https://$1bing.com/" /> + + <rule from="^http://([^/:@]*)\.mm\.bing\.net/" + to="https://$1.mm.bing.com/"/> + + <rule from="^http://([^/:@]*)\.api\.bing\.net/" + to="https://$1.api.bing.com/"/> + +</ruleset> diff --git a/searx/https_rules/Dailymotion.xml b/searx/https_rules/Dailymotion.xml @@ -0,0 +1,69 @@ +<!-- + Nonfunctional domains: + + - blog.dailymotion.com + - press.dailymotion.com (shows steaw.com, CN: www.steaw.com) + - proxy-46.dailymotion.com + - publicite.dailymotion.com + - publisher.dailymotion.com (reset) + - vid.ak.dmcdn.net (403, Akamai) + - vid2.ak.dmcdn.net (504, akamai) + + + Problematic domains: + + - ak2.static.dailymotion.com (mismatched, CN: *.dmcdn.net) + - support.dmcloud.net (mismatched, CN: *.zendesk.com) + + + Partially covered domains: + + - (www.)dailymotion.com + + - cdn/manifest/video/\w+.mnft 403s + - crossdomain.xml breaks videos + +--> +<ruleset name="Dailymotion (default off)" default_off="breaks some embedded videos"> + + <target host="dailymotion.com" /> + <!-- + * for cross-domain cookie. + --> + <target host="*.dailymotion.com" /> + <!-- + https://mail1.eff.org/pipermail/https-everywhere-rules/2012-July/001241.html + --> + <exclusion pattern="^http://(?:www\.)?dailymotion\.com/(?:cdn/[\w-]+/video/|crossdomain\.xml$)" /> + <target host="ak2.static.dailymotion.com" /> + <target host="*.dmcdn.net" /> + <target host="dmcloud.net" /> + <target host="*.dmcloud.net" /> + + + <!-- Testing wrt embedded breakage. + + securecookie host="^.*\.dailymotion\.com$" name=".+" /--> + <!-- + Omniture tracking cookies: + --> + <securecookie host="^\.dailymotion\.com$" name="^s_\w+$" /> + <securecookie host="^www\.dailymotion\.com$" name=".+" /> + + + <rule from="^http://(erroracct\.|www\.)?dailymotion\.com/" + to="https://$1dailymotion.com/" /> + + <rule from="^http://(s\d|static(?:\d|s\d-ssl))\.dmcdn\.net/" + to="https://$1.dmcdn.net/" /> + + <rule from="^https?://ak2\.static\.dailymotion\.com/" + to="https://static1-ssl.dmcdn.net/" /> + + <rule from="^http://(s\.|www\.)?dmcloud\.net/" + to="https://$1dmcloud.net/" /> + + <rule from="^https?://support\.dmcloud\.net/" + to="https://dmcloud.zendesk.com/" /> + +</ruleset> diff --git a/searx/https_rules/Deviantart.xml b/searx/https_rules/Deviantart.xml @@ -0,0 +1,53 @@ +<!-- + For problematic rules, see Deviantart-mismatches.xml. + + + Other deviantArt rulesets: + + - Sta.sh.xml + + + ToDo: Find edgecast URL for /(fc|th)\d+. + + + Mixed content: + + - Images on *.....com from e.deviantart.net * + + * Secured by us + +--> +<ruleset name="DeviantArt (pending)" default_off="site operator says not ready yet"> + + <target host="deviantart.com" /> + <target host="*.deviantart.com" /> + <target host="deviantart.net" /> + <target host="*.deviantart.net" /> + + + <!-- Not secured by server: + --> + <!--securecookie host="^\.deviantart\.com$" name="^userinfo$" /--> + + <securecookie host="^\.deviantart\.com$" name=".*" /> + + + <!-- Redirects from com to net, but does so successfully by itself. + --> + <rule from="^http://([aei]|fc\d\d|s[ht]|th\d\d)\.deviantart\.(com|net)/" + to="https://$1.deviantart.$2/" /> + + <!-- This handles everything that isn't in the first rule. + Namely, usernames, backend, fc, th, and (www.). + These domains present a cert that is only + valid for .com. + Note that .net isn't used on DA, but.net does + redirect to .com, and we shouldn't break what would + otherwise work. + Mustn't rewrite from https here, as doing so + would conflict with the first rule. + --> + <rule from="^http://([^/:@\.]+\.)?deviantart\.(?:com|net)/" + to="https://$1deviantart.com/" /> + +</ruleset> diff --git a/searx/https_rules/DuckDuckGo.xml b/searx/https_rules/DuckDuckGo.xml @@ -0,0 +1,38 @@ +<!-- + Problematic domains: + + - www.dukgo.com (mismatched, CN: dukgo.com) + + + Fully covered domains: + + - (www.)dukgo.com (www → ^) + +--> +<ruleset name="DuckDuckGo"> + <target host="duckduckgo.com" /> + <target host="*.duckduckgo.com" /> + <target host="ddg.gg" /> + <target host="duck.co" /> + <target host="i.duck.co" /> + <target host="dukgo.com" /> + <target host="www.dukgo.com" /> + + <exclusion pattern="^http://(help|meme)\.duckduckgo\.com/" /> + + <securecookie host="^duck\.co$" name=".*"/> + + <rule from="^http://duckduckgo\.com/" to="https://duckduckgo.com/"/> + <rule from="^http://([^/:@\.]+)\.duckduckgo\.com/" to="https://$1.duckduckgo.com/"/> + <!-- TODO: What does ddg.gg/foo do? Runs query foo, redirects to homepage, or error? --> + <rule from="^http://ddg\.gg/$" to="https://duckduckgo.com/" /> + + <rule from="^http://duck\.co/" to="https://duck.co/" /> + + <rule from="^http://i\.duck\.co/" + to="https://duckduckgo.com/"/> + + <rule from="^http://(?:www\.)?dukgo\.com/" + to="https://dukgo.com/" /> + +</ruleset> diff --git a/searx/https_rules/Flickr.xml b/searx/https_rules/Flickr.xml @@ -0,0 +1,44 @@ +<!-- + For other Yahoo coverage, see Yahoo.xml. + + + These altnames don't exist: + + - www.blog.flickr.net + - www.code.flickr.net + +--> +<ruleset name="Flickr"> + + <target host="flic.kr" /> + <target host="*.flic.kr" /> + <target host="flickr.com" /> + <target host="*.flickr.com" /> + <target host="*.flickr.net" /> + <target host="*.staticflickr.com" /> + + + <!-- Not secured by server: + --> + <!--securecookie host="^\.flic\.kr$" name="^BX$" /--> + + <securecookie host="^\.flic\.kr$" name=".+" /> + <securecookie host=".*\.flickr\.com$" name=".+" /> + + + <rule from="^http://flic\.kr/" + to="https://flic.kr/" /> + + <rule from="^http://(api\.|www\.)?flickr\.com/" + to="https://$1flickr.com/" /> + + <rule from="^http://s(ecure|tatic)\.flickr\.com/" + to="https://s$1.flickr.com/" /> + + <rule from="^http://(c2|farm\d+)\.static(\.)?flickr\.com/" + to="https://$1.static$2flickr.com/" /> + + <rule from="^http://(blog|code)\.flickr\.net/" + to="https://$1.flickr.net/" /> + +</ruleset> diff --git a/searx/https_rules/Github-Pages.xml b/searx/https_rules/Github-Pages.xml @@ -0,0 +1,11 @@ +<!-- + For other GitHub coverage, see Github.xml. +--> +<ruleset name="GitHub Pages"> + + <target host="*.github.io" /> + + <rule from="^http://([^/@:\.]+)\.github\.io/" + to="https://$1.github.io/" /> + +</ruleset> diff --git a/searx/https_rules/Github.xml b/searx/https_rules/Github.xml @@ -0,0 +1,94 @@ +<!-- + Other GitHub rulesets: + + - Github-Pages.xml + - Guag.es.xml + - Speaker_Deck.com.xml + + + CDN buckets: + + - github-images.s3.amazonaws.com + - github.global.ssl.fastly.net + - a248.e.akamai.net/assets.github.com/ + - a248.e.akamai.net/camo.github.com/ + - s3.amazonaws.com/github/ | d24z2fz21y4fag.cloudfront.net + - github.myshopify.com + + + Fully covered domains: + + - github.com subdomains: + + - (www.) + - assets\d+ + - assets-cdn + - bounty + - cloud + - f.cloud + - codeload + - developer + - eclipse + - enterprise + - gist + - gist-assets + - help + - identicons + - jobs + - mac + - mobile + - nodeload + - octodex + - pages + - raw + - rg3 + - shop + - status + - support + - training + - try + - wiki + - windows + + - collector.githubapp.com + + - githubusercontent.com + +--> +<ruleset name="GitHub"> + + <target host="github.com" /> + <target host="*.github.com" /> + <target host="github.io" /> + <target host="*.githubusercontent.com" /> + <target host="collector.githubapp.com" /> + + + <!-- Secured by server: + --> + <!--securecookie host="^github\.com$" name="^(_gh_sess|tz|user_session)$" /--> + <!--securecookie host="^\.github\.com$" name="^(dotcom_user|logged_in)$" /--> + <!--securecookie host="^enterprise\.github\.com$" name="^(_enterprise_web|request_method)$" /--> + <!--securecookie host="^gist\.github\.com$" name="^_gist_session$" /--> + <!--securecookie host="^help\.github\.com$" name="^_help_session$" /--> + <!-- + Not secured by server: + --> + <!--securecookie host="^status\.github\.com$" name="^rack\.session$" /--> + + <securecookie host="^(?:.*\.)?github\.com$" name=".+" /> + + + <rule from="^http://((?:assets\d+|assets-cdn|bounty|cloud|f\.cloud|codeload|developer|eclipse|enterprise|gist|gist-assets|help|identicons|jobs|mac|mobile|nodeload|octodex|pages|raw|rg3|shop|status|support|training|try|wiki|windows|www)\.)?github\.com/" + to="https://$1github.com/" /> + + <rule from="^http://collector\.githubapp\.com/" + to="https://collector.githubapp.com/" /> + + <rule from="^https?://github\.io/" + to="https://pages.github.com/" /> + + <rule from="^http://([^/@:\.]+)\.githubusercontent\.com/" + to="https://$1.githubusercontent.com/" /> + +</ruleset> diff --git a/searx/https_rules/Google-mismatches.xml b/searx/https_rules/Google-mismatches.xml @@ -0,0 +1,26 @@ +<!-- + + Problematic domains: + + - (www.)apture.com (works, mismatched, CN: *.google.com) + +--> +<ruleset name="Google (mismatches)" default_off="mismatches"> + + <!-- Akamai --> + <target host="js.admeld.com"/> + <target host="apture.com" /> + <target host="www.apture.com" /> + <target host="googleartproject.com"/> + <target host="www.googleartproject.com"/> + + <rule from="^http://js\.admeld\.com/" + to="https://js.admeld.com/"/> + + <rule from="^https?://(?:www\.)?apture\.com/" + to="https://apture.com/" /> + + <rule from="^http://(?:www\.)?googleartproject\.com/" + to="https://www.googleartproject.com/"/> + +</ruleset> diff --git a/searx/https_rules/Google.org.xml b/searx/https_rules/Google.org.xml @@ -0,0 +1,14 @@ +<!-- + For other Google coverage, see GoogleServices.xml. + +--> +<ruleset name="Google.org"> + + <target host="google.org" /> + <target host="www.google.org" /> + + + <rule from="^http://(www\.)?google\.org/" + to="https://$1google.org/" /> + +</ruleset>+ \ No newline at end of file diff --git a/searx/https_rules/GoogleAPIs.xml b/searx/https_rules/GoogleAPIs.xml @@ -0,0 +1,143 @@ +<!-- + For other Google coverage, see GoogleServices.xml. + + + Nonfunctional domains: + + - hosted.gmodules.com * + - img0.gmodules.com * + - p.gmodules.com * + + * 404; mismatched, CN: *.googleusercontent.com + + + Problematic domains: + + - gmodules.com (503, CN: www.google.com) + - www.gmodules.com (503, CN: *.googleusercontent.com) + - gstatic.com (404, valid cert) + - api.recaptcha.net (works; mismatched, CN: google.com) + + + Partially covered domains: + + - (www.)gmodules.com (→ www.google.com) + - (www.)google.com + - chart.apis.google.com (→ chart.googleapis.com) + + + Fully covered domains: + + - api.google.com + + - *.clients.google.com: + + - linkhelp + + - ssl.google-analytics.com + - www.google-analytics.com + + - googleapis.com subdomains: + + - ajax + - chart + - *.commondatastorage + - fonts + - *.storage + - www + + - gstatic.com subdomains: + + - (www.) (^ → www) + - csi + - encrypted-tbn\d + - g0 + - *.metric + - ssl + - t\d + + - api.recaptcha.net (→ www.google.com) + - api-secure.recaptcha.net + - gdata.youtube.com + + + ssl.google-analytics.com/ga.js sets __utm\w wildcard + cookies on whichever domain it is loaded from. + +--> +<ruleset name="Google APIs"> + + <target host="gmodules.com" /> + <target host="www.gmodules.com" /> + <target host="google.com" /> + <target host="apis.google.com" /> + <target host="*.apis.google.com" /> + <target host="*.clients.google.com" /> + <target host="www.google.com" /> + <target host="*.google-analytics.com" /> + <target host="*.googleapis.com" /> + <target host="gstatic.com" /> + <target host="*.gstatic.com" /> + <!-- Captive portal detection redirects to this URL, and many captive + portals break TLS, so exempt this redirect URL. + See GitHub bug #368 + --> + <exclusion pattern="^http://www\.gstatic\.com/generate_204" /> + <target host="*.recaptcha.net" /> + <target host="gdata.youtube.com" /> + <exclusion pattern="^http://gdata\.youtube\.com/crossdomain\.xml" /> + + + <securecookie host="^ssl\.google-analytics\.com$" name=".+" /> + + + <rule from="^http://(?:www\.)?gmodules\.com/ig/images/" + to="https://www.google.com/ig/images/" /> + + <!-- jsapi was causing problems on some sites that embed google maps: + https://trac.torproject.org/projects/tor/ticket/2335 + Apparently now fixed; thanks, Google! + --> + <rule from="^http://(?:www\.)?google\.com/(afsonline/|chart|jsapi|recaptcha/|uds)" + to="https://www.google.com/$1" /> + + <rule from="^http://(api|[\w-]+\.client)s\.google\.com/" + to="https://$1s.google.com/" /> + + <rule from="^http://chart\.apis\.google\.com/chart" + to="https://chart.googleapis.com/chart" /> + + <rule from="^http://(ssl|www)\.google-analytics\.com/" + to="https://$1.google-analytics.com/" /> + + <rule from="^http://(ajax|chart|fonts|www)\.googleapis\.com/" + to="https://$1.googleapis.com/" /> + + <rule from="^http://([^@:\./]+\.)?(commondata)?storage\.googleapis\.com/" + to="https://$1$2storage.googleapis.com/" /> + + <!-- There is an interesting question about whether we should + append &strip=1 to all cache URLs. This causes them to load + without images and styles, which is more secure but can look + worse. + Without &strip=1, the images and styles from the cached + pages still load from the original, typically unencrypted, page. + With &strip=1, the cached page will be text-only and + will come exclusively from Google's HTTPS server. + --> + <rule from="^http://(?:www\.)?gstatic\.com/" + to="https://www.gstatic.com/" /> + + <rule from="^http://(csi|encrypted-tbn\d|g0|[\w-]+\.metric|ssl|t\d)\.gstatic\.com/" + to="https://$1.gstatic.com/" /> + + <rule from="^http://api\.recaptcha\.net/" + to="https://www.google.com/recaptcha/api/" /> + + <rule from="^http://api-secure\.recaptcha\.net/" + to="https://api-secure.recaptcha.net/" /> + + <rule from="^http://gdata\.youtube\.com/" + to="https://gdata.youtube.com/" /> + +</ruleset> diff --git a/searx/https_rules/GoogleCanada.xml b/searx/https_rules/GoogleCanada.xml @@ -0,0 +1,6 @@ +<ruleset name="GoogleCanada"> + <target host="google.ca" /> + <target host="*.google.ca" /> + <rule from="^http://([^/:@\.]+)\.google\.ca/finance" to="https://$1.google.ca/finance"/> +</ruleset> + diff --git a/searx/https_rules/GoogleImages.xml b/searx/https_rules/GoogleImages.xml @@ -0,0 +1,65 @@ +<!-- + For other Google coverage, see GoogleServices.xml. + + + Problematic domains: + + - www.google.bo * + - www.google.co * + - www.google.ec * + - www.google.in * + - www.google.kr * + - www.google.com.kz ** + - www.google.com.lk * + - www.google.mx ** + - www.google.sg * + - www.google.sl * + - www.google.ug * + - www.google.vn * + + * 404; mismatched, CN: google.com + ** Works; mismatched, CN: google.com + +--> +<ruleset name="Google Images"> + + <target host="google.*" /> + <target host="www.google.*" /> + <target host="google.co.*" /> + <target host="www.google.co.*" /> + <target host="google.com" /> + <target host="images.google.com" /> + <target host="google.com.*" /> + <target host="www.google.com.*" /> + <!-- + Only handle image-related paths in this ruleset: + --> + <exclusion pattern="^http://(?:www\.)?google(?:\.com?)?\.\w{2,3}/(?!(?:advanced_image_search|imghp|.*tb(?:m=isch|s=sbi)))" /> + + + <rule from="^http://(?:www\.)?google\.com/" + to="https://www.google.com/" /> + + <rule from="^http://images\.google\.com/" + to="https://images.google.com/" /> + + <!-- First handle problematic domains: + --> + <rule from="^http://(?:www\.)?google\.co/" + to="https://www.google.com/" /> + + <rule from="^http://(?:www\.)?google\.(?:co\.)?(in|kr|ug)/" + to="https://www.google.co.$1/" /> + + <rule from="^http://(?:www\.)?google\.(?:com\.)?(kz|lk)/" + to="https://www.google.$1/" /> + + <rule from="^http://(?:www\.)?google\.(?:com\.)?(bo|ec|mx|sg|sl|vn)/" + to="https://www.google.com.$1/" /> + + <!-- And then the rest: + --> + <rule from="^http://(?:www\.)?google\.(com?\.)?(ae|ar|at|au|bg|bh|br|ca|ch|cl|co|cr|cu|de|eg|es|fi|fr|gh|gt|hr|id|ie|il|it|jo|jp|jm|ke|kw|lb|ly|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|se|sv|th|tr|uk|uy|ve|za|zw)/" + to="https://www.google.$1$2/" /> + +</ruleset> diff --git a/searx/https_rules/GoogleMainSearch.xml b/searx/https_rules/GoogleMainSearch.xml @@ -0,0 +1,78 @@ +<ruleset name="Search www.google.com"> + +<!-- +Enabling this ruleset should cause searches to go to +https://www.google.com rather than https://encrypted.google.com. Note that +the filename is important; it must be before GoogleSearch.xml in a bash +expansion of src/chrome/content/rules/*.xml in order to take precedence. +--> + + <target host="*.google.com" /> + <target host="google.com" /> + <target host="www.google.com.*" /> + <target host="google.com.*" /> + <target host="www.google.co.*" /> + <target host="google.co.*" /> + <target host="www.google.*" /> + <target host="google.*" /> + <!-- beyond clients1 these do not currently exist in the ccTLDs, + but just in case... --> + <target host="clients1.google.com.*" /> + <target host="clients2.google.com.*" /> + <target host="clients3.google.com.*" /> + <target host="clients4.google.com.*" /> + <target host="clients5.google.com.*" /> + <target host="clients6.google.com.*" /> + <target host="clients1.google.co.*" /> + <target host="clients2.google.co.*" /> + <target host="clients3.google.co.*" /> + <target host="clients4.google.co.*" /> + <target host="clients5.google.co.*" /> + <target host="clients6.google.co.*" /> + <target host="clients1.google.*" /> + <target host="clients2.google.*" /> + <target host="clients3.google.*" /> + <target host="clients4.google.*" /> + <target host="clients5.google.*" /> + <target host="clients6.google.*" /> + + <rule from="^http://www\.google\.com/$" + to="https://www.google.com/"/> + + <!-- The most basic case. --> + + <rule from="^http://(?:www\.)?google\.com/search" + to="https://www.google.com/search"/> + + <!-- A very annoying exception that we seem to need for the basic case --> + + <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" /> + <exclusion pattern="^http://clients[0-9]\.google\.com/.*client=products.*" /> + <exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" /> + + <!-- https://trac.torproject.org/projects/tor/ticket/9713 --> + + <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" /> + + <!-- This is necessary for image results links from web search results --> + + <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" /> + + <rule from="^http://(?:www\.)?google\.com/webhp" + to="https://www.google.com/webhp"/> + + <rule from="^http://(?:www\.)?google\.com/#" + to="https://www.google.com/#"/> + + <rule from="^http://(?:www\.)?google\.com/$" + to="https://www.google.com/"/> + + <!-- Completion urls look like this: + +http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n + + --> + <rule from="^http://clients[0-9]\.google\.com/complete/search" + to="https://clients1.google.com/complete/search"/> + +</ruleset> diff --git a/searx/https_rules/GoogleMaps.xml b/searx/https_rules/GoogleMaps.xml @@ -0,0 +1,67 @@ +<!-- + Problematic domains: + + - khms * + - khms[0-3] * + + * $ 404s + + + Fully covered domains: + + - google.com subdomains: + + - khms + - khms[0-3] + +--> +<ruleset name="Google Maps"> + + <target host="maps.google.*" /> + <!-- + https://trac.torproject.org/projects/tor/ticket/8627 + --> + <exclusion pattern="^http://maps\.google\.com/local_url" /> + <exclusion pattern="^http://maps\.google\.gr/transitathens" /> + <target host="maps.google.co.*" /> + <target host="khms.google.com" /> + <target host="khms0.google.com" /> + <target host="khms1.google.com" /> + <target host="khms2.google.com" /> + <target host="khms3.google.com" /> + <target host="maps-api-ssl.google.com" /> + <target host="mw2.google.com" /> + <target host="maps.google.com.*" /> + <target host="maps.googleapis.com" /> + <!-- + https://mail1.eff.org/pipermail/https-everywhere-rules/2012-September/001317.html + --> + <!--exclusion pattern="^http://maps\.googleapis\.com/map(files/lib/map_1_20\.swf|sapi/publicapi\?file=flashapi)" /--> + <exclusion pattern="^http://maps\.googleapis\.com/map(?:files/lib/map_\d+_\d+\.swf|sapi/publicapi\?file=flashapi)" /> + <target host="maps.gstatic.com" /> + + + <!--securecookie host="^maps\.google\.(com?\.)?(au|ca|gh|ie|in|jm|ke|lk|my|n[agz]|pk|rw|sl|sg|ug|uk|za|zw)$" name=".+" /--> + <securecookie host="^maps\.google\.[\w.]{2,6}$" name=".+" /> + <securecookie host="^maps\.g(?:oogle|oogleapis|static)\.com$" name=".+" /> + <securecookie host="^maps-api-ssl\.google\.com$" name=".+" /> + + + <rule from="^http://maps\.google\.([^/]+)/" + to="https://maps.google.$1/" /> + + <!-- http://khms.../$ 404s: + --> + <rule from="^http://khms\d?\.google\.com/+\??$" + to="https://www.google.com/" /> + + <rule from="^http://(khms\d?|maps-api-ssl|mw2)\.google\.com/" + to="https://$1.google.com/" /> + + <rule from="^http://maps\.g(oogleapis|static)\.com/" + to="https://maps.g$1.com/" /> + + <rule from="^https://maps\.googleapis\.com/map(?=files/lib/map_\d+_\d+\.swf|sapi/publicapi\?file=flashapi)" + to="http://maps.googleapis.com/map" downgrade="1" /> + +</ruleset> diff --git a/searx/https_rules/GoogleMelange.xml b/searx/https_rules/GoogleMelange.xml @@ -0,0 +1,6 @@ +<ruleset name="GoogleMelange"> + <target host="www.google-melange.com" /> + <target host="google-melange.com" /> + + <rule from="^http://(www\.)?google-melange\.com/" to="https://www.google-melange.com/" /> +</ruleset> diff --git a/searx/https_rules/GoogleSearch.xml b/searx/https_rules/GoogleSearch.xml @@ -0,0 +1,135 @@ +<ruleset name="Google Search"> + + <target host="google.com" /> + <target host="*.google.com" /> + <target host="google.com.*" /> + <target host="www.google.com.*" /> + <target host="google.co.*" /> + <target host="www.google.co.*" /> + <target host="google.*" /> + <target host="www.google.*" /> + <!-- + Beyond clients1 these do not currently + exist in the ccTLDs, but just in case... + --> + <target host="clients1.google.com.*" /> + <target host="clients2.google.com.*" /> + <target host="clients3.google.com.*" /> + <target host="clients4.google.com.*" /> + <target host="clients5.google.com.*" /> + <target host="clients6.google.com.*" /> + <target host="clients1.google.co.*" /> + <target host="clients2.google.co.*" /> + <target host="clients3.google.co.*" /> + <target host="clients4.google.co.*" /> + <target host="clients5.google.co.*" /> + <target host="clients6.google.co.*" /> + <target host="clients1.google.*" /> + <target host="clients2.google.*" /> + <target host="clients3.google.*" /> + <target host="clients4.google.*" /> + <target host="clients5.google.*" /> + <target host="clients6.google.*" /> + + + <!-- Some Google pages can generate naive links back to the + unencrypted version of encrypted.google.com, which is + a 301 but theoretically vulnerable to SSL stripping. + --> + <rule from="^http://encrypted\.google\.com/" + to="https://encrypted.google.com/" /> + + <!-- The most basic case. + --> + <rule from="^http://(?:www\.)?google\.com/search" + to="https://encrypted.google.com/search" /> + + <!-- A very annoying exception that we + seem to need for the basic case + --> + <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" /> + <exclusion pattern="^http://clients\d\.google\.com/.*client=products.*" /> + <exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" /> + + <!-- https://trac.torproject.org/projects/tor/ticket/9713 + --> + + <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" /> + + + <!-- This is necessary for image results + links from web search results + --> + <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" /> + + <rule from="^http://(?:www\.)?google\.com/about" + to="https://www.google.com/about" /> + + <!-- There are two distinct cases for these firefox searches --> + + <rule from="^http://(?:www\.)?google(?:\.com?)?\.[a-z]{2}/firefox/?$" + to="https://encrypted.google.com/" /> + + <rule from="^http://(?:www\.)?google(?:\.com?)?\.[a-z]{2}/firefox" + to="https://encrypted.google.com/webhp" /> + + <rule from="^http://(?:www\.)?google\.com/webhp" + to="https://encrypted.google.com/webhp" /> + + <rule from="^http://codesearch\.google\.com/" + to="https://codesearch.google.com/" /> + + <rule from="^http://(?:www\.)?google\.com/codesearch" + to="https://www.google.com/codesearch" /> + + <rule from="^http://(?:www\.)?google\.com/#" + to="https://encrypted.google.com/#" /> + + <rule from="^http://(?:www\.)?google\.com/$" + to="https://encrypted.google.com/" /> + + <!-- Google supports IPv6 search, including + HTTPS with a valid certificate! --> + <rule from="^http://ipv6\.google\.com/" + to="https://ipv6.google.com/" /> + + <!-- most google international sites look like + "google.fr", some look like "google.co.jp", + and some crazy ones like "google.com.au" --> + + <rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/(search\?|#)" + to="https://$1google$2.$3/$4" /> + + <!-- Language preference setting --> + <rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/setprefs" + to="https://$1google$2.$3/setprefs" /> + + <!-- Completion urls look like this: + +http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n + + --> + <rule from="^http://clients\d\.google\.com/complete/search" + to="https://clients1.google.com/complete/search" /> + + <rule from="^http://clients\d\.google(\.com?\.[a-z]{2})/complete/search" + to="https://clients1.google.$1/complete/search" /> + + <rule from="^http://clients\d\.google\.([a-z]{2})/complete/search" + to="https://clients1.google.$1/complete/search" /> + + <rule from="^http://suggestqueries\.google\.com/complete/search" + to="https://clients1.google.com/complete/search" /> + + <rule from="^http://(www\.)?google\.(com?\.)?([a-z]{2})/(?:webhp)?$" + to="https://$1google.$2$3/" /> + + <!-- If there are URL parameters, keep them. --> + <rule from="^http://(www\.)?google\.(com?\.)?([a-z]{2})/(?:webhp)?\?" + to="https://$1google.$2$3/webhp?" /> + + <!-- teapot --> + <rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/teapot" + to="https://$1google$2.$3/teapot" /> + +</ruleset> diff --git a/searx/https_rules/GoogleServices.xml b/searx/https_rules/GoogleServices.xml @@ -0,0 +1,345 @@ +<!-- + Other Google rulesets: + + - 2mdn.net.xml + - Admeld.xml + - ChannelIntelligence.com.xml + - Doubleclick.net.xml + - FeedBurner.xml + - Google.org.xml + - GoogleAPIs.xml + - Google_App_Engine.xml + - GoogleImages.xml + - GoogleShopping.xml + - Ingress.xml + - Meebo.xml + - Orkut.xml + - Postini.xml + - WebM_Project.org.xml + + + Nonfunctional domains: + + - feedproxy.google.com (404, valid cert) + - partnerpage.google.com * + - safebrowsing.clients.google.com (404, mismatched) + - (www.)googlesyndicatedsearch.com (404; mismatched, CN: google.com) + - buttons.googlesyndication.com * + + * 404, valid cert + + + Nonfunctional google.com paths: + + - analytics (redirects to http) + - imgres + - gadgets * + - hangouts (404) + - u/ (404) + + * Redirects to http + + + Problematic domains: + + - www.goo.gl (404; mismatched, CN: *.google.com) + + - google.com subdomains: + + - books (googlebooks/, images/, & intl/ 404, but works when rewritten to www) + - cbks0 **** + - earth * + - gg ($ 404s) + - knoll * + - scholar ** + - trends * + + - news.google.cctld ** + - scholar.google.cctld ** + - *-opensocial.googleusercontent.com *** + + **** $ 404s + * 404, valid cert + ** Redirects to http, valid cert + *** Breaks followers widget - https://trac.torproject.org/projects/tor/ticket/7294 + + + Partially covered domains: + + - google.cctld subdomains: + + - scholar (→ www) + + - google.com subdomains: + + - (www.) + - cbks0 ($ 404s) + - gg ($ 404s) + - news (→ www) + - scholar (→ www) + + - *.googleusercontent.com (*-opensocial excluded) + + + Fully covered domains: + + - lh[3-6].ggpht.com + - (www.)goo.gl (www → ^) + + - google.com subdomains: + + - accounts + - adwords + - apis + - appengine + - books (→ encrypted) + - calendar + - checkout + - chrome + - clients[12] + - code + - *.corp + - developers + - dl + - docs + - docs\d + - \d.docs + - drive + - earth (→ www) + - encrypted + - encrypted-tbn[123] + - feedburner + - fiber + - finance + - glass + - groups + - health + - helpouts + - history + - hostedtalkgadget + - id + - investor + - knol + - knoll (→ knol) + - lh\d + - mail + - chatenabled.mail + - pack + - picasaweb + - pki + - play + - plus + - plusone + - productforums + - profiles + - safebrowsing-cache + - cert-test.sandbox + - plus.sandbox + - sb-ssl + - script + - security + - services + - servicessites + - sites + - spreadsheets + - spreadsheets\d + - support + - talk + - talkgadget + - tbn2 (→ encrypted-tbn2) + - tools + - trends (→ www) + + - partner.googleadservices.com + - (www.)googlecode.com + - *.googlecode.com (per-project subdomains) + - googlesource.com + - *.googlesource.com + - pagead2.googlesyndication.com + - tpc.googlesyndication.com + - mail-attachment.googleusercontent.com + - webcache.googleusercontent.com + + + XXX: Needs more testing + +--> +<ruleset name="Google Services"> + + <target host="*.ggpht.com" /> + <target host="gmail.com" /> + <target host="www.gmail.com" /> + <target host="goo.gl" /> + <target host="www.goo.gl" /> + <target host="google.*" /> + <target host="accounts.google.*" /> + <target host="adwords.google.*" /> + <target host="finance.google.*" /> + <target host="groups.google.*" /> + <target host="it.google.*" /> + <target host="news.google.*" /> + <exclusion pattern="^http://(?:news\.)?google\.com/(?:archivesearch|newspapers)" /> + <target host="picasaweb.google.*" /> + <target host="scholar.google.*" /> + <target host="www.google.*" /> + <target host="*.google.ca" /> + <target host="google.co.*" /> + <target host="accounts.google.co.*" /> + <target host="adwords.google.co.*" /> + <target host="finance.google.co.*" /> + <target host="groups.google.co.*" /> + <target host="id.google.co.*" /> + <target host="news.google.co.*" /> + <target host="picasaweb.google.co.*" /> + <target host="scholar.google.co.*" /> + <target host="www.google.co.*" /> + <target host="google.com" /> + <target host="*.google.com" /> + <exclusion pattern="^http://(?:www\.)?google\.com/analytics/*(?:/[^/]+)?(?:\?.*)?$" /> + <!--exclusion pattern="^http://books\.google\.com/(?!books/(\w+\.js|css/|javascript/)|favicon\.ico|googlebooks/|images/|intl/)" /--> + <exclusion pattern="^http://cbks0\.google\.com/(?:$|\?)" /> + <exclusion pattern="^http://gg\.google\.com/(?!csi(?:$|\?))" /> + <target host="google.com.*" /> + <target host="accounts.google.com.*" /> + <target host="adwords.google.com.*" /> + <target host="groups.google.com.*" /> + <target host="id.google.com.*" /> + <target host="news.google.com.*" /> + <target host="picasaweb.google.com.*" /> + <target host="scholar.google.com.*" /> + <target host="www.google.com.*" /> + <target host="partner.googleadservices.com" /> + <target host="googlecode.com" /> + <target host="*.googlecode.com" /> + <target host="googlemail.com" /> + <target host="www.googlemail.com" /> + <target host="googlesource.com" /> + <target host="*.googlesource.com" /> + <target host="*.googlesyndication.com" /> + <target host="www.googletagservices.com" /> + <target host="googleusercontent.com" /> + <target host="*.googleusercontent.com" /> + <!-- + Necessary for the Followers widget: + + https://trac.torproject.org/projects/tor/ticket/7294 + --> + <exclusion pattern="http://[^@:\./]+-opensocial\.googleusercontent\.com" /> + + + <!-- Can we secure any of these wildcard cookies safely? + --> + <!--securecookie host="^\.google\.com$" name="^(hl|I4SUserLocale|NID|PREF|S)$" /--> + <!--securecookie host="^\.google\.[\w.]{2,6}$" name="^(hl|I4SUserLocale|NID|PREF|S|S_awfe)$" /--> + <securecookie host="^(?:accounts|adwords|\.code|login\.corp|developers|docs|\d\.docs|fiber|mail|picasaweb|plus|\.?productforums|support)\.google\.[\w.]{2,6}$" name=".+" /> + <securecookie host="^www\.google\.com$" name="^GoogleAccountsLocale_session$" /> + <securecookie host="^mail-attachment\.googleusercontent\.com$" name=".+" /> + <securecookie host="^gmail\.com$" name=".+" /> + <securecookie host="^www\.gmail\.com$" name=".+" /> + <securecookie host="^googlemail\.com$" name=".+" /> + <securecookie host="^www\.googlemail\.com$" name=".+" /> + + + <!-- - lh 3-6 exist + - All appear identical + - Identical to lh\d.googleusercontent.com + --> + <rule from="^http://lh(\d)\.ggpht\.com/" + to="https://lh$1.ggpht.com/" /> + + <rule from="^http://lh(\d)\.google\.ca/" + to="https://lh$1.google.ca/" /> + + + <rule from="^http://(www\.)?g(oogle)?mail\.com/" + to="https://$1g$2mail.com/" /> + + <rule from="^http://(?:www\.)?goo\.gl/" + to="https://goo.gl/" /> + + + <!-- Redirects to http when rewritten to www: + --> + <rule from="^http://books\.google\.com/" + to="https://encrypted.google.com/" /> + + <!-- tisp$ 404s: + --> + <rule from="^http://(?:www\.)?google\.((?:com?\.)?\w{2,3})/tisp(?=$|\?)" + to="https://www.google.$1/tisp/" /> + + <!-- Paths that work on all in google.* + --> + <rule from="^http://(?:www\.)?google\.((?:com?\.)?\w{2,3})/(accounts|adplanner|ads|adsense|adwords|analytics|bookmarks|chrome|contacts|coop|cse|css|culturalinstitute|doodles|earth|favicon\.ico|finance|get|goodtoknow|googleblogs|grants|green|hostednews|images|intl|js|landing|logos|mapmaker|newproducts|news|nexus|patents|policies|prdhp|profiles|products|reader|s2|settings|shopping|support|tisp|tools|transparencyreport|trends|urchin|webmasters)(?=$|[?/])" + to="https://www.google.$1/$2" /> + + <!-- Paths that 404 on .ccltd, but work on .com: + --> + <rule from="^http://(?:www\.)?google\.(?:com?\.)?\w{2,3}/(?=calendar|dictionary|doubleclick|help|ideas|pacman|postini|powermeter|url)" + to="https://www.google.com/" /> + + <rule from="^http://(?:www\.)?google\.(?:com?\.)?\w{2,3}/custom" + to="https://www.google.com/cse" /> + + <!-- Paths that only exist/work on .com + --> + <rule from="^http://(?:www\.)?google\.com/(\+|appsstatus|books|buzz|extern_js|glass|googlebooks|ig|insights|moderator|phone|safebrowsing|videotargetting|webfonts)(?=$|[?/])" + to="https://www.google.com/$1" /> + + <!-- Subdomains that work on all in google.* + --> + <rule from="^http://(accounts|adwords|finance|groups|id|picasaweb|)\.google\.((?:com?\.)?\w{2,3})/" + to="https://$1.google.$2/" /> + + <!-- Subdomains that only exist/work on .com + --> + <rule from="^http://(apis|appengine|books|calendar|cbks0|chat|checkout|chrome|clients[12]|code|[\w-]+\.corp|developers|dl|docs\d?|\d\.docs|drive|encrypted|encrypted-tbn[123]|feedburner|fiber|fonts|gg|glass||health|helpouts|history|(?:hosted)?talkgadget|investor|lh\d|(?:chatenabled\.)?mail|pack|pki|play|plus(?:\.sandbox)?|plusone|productforums|profiles|safebrowsing-cache|cert-test\.sandbox|sb-ssl|script|security|services|servicessites|sites|spreadsheets\d?|support|talk|tools)\.google\.com/" + to="https://$1.google.com/" /> + + <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp"/> + + <rule from="^http://earth\.google\.com/" + to="https://www.google.com/earth/" /> + + <rule from="^http://scholar\.google\.((?:com?\.)?\w{2,3})/intl/" + to="https://www.google.$1/intl/" /> + + <rule from="^http://(?:encrypted-)?tbn2\.google\.com/" + to="https://encrypted-tbn2.google.com/" /> + + + <rule from="^http://knoll?\.google\.com/" + to="https://knol.google.com/" /> + + + <rule from="^http://news\.google\.(?:com?\.)?\w{2,3}/(?:$|news|newshp)" + to="https://www.google.com/news" /> + + <rule from="^http://trends\.google\.com/" + to="https://www.google.com/trends" /> + + + <rule from="^http://([^/:@\.]+\.)?googlecode\.com/" + to="https://$1googlecode.com/" /> + + <rule from="^http://([^\./]\.)?googlesource\.com/" + to="https://$1googlesource.com/" /> + + + <rule from="^http://partner\.googleadservices\.com/" + to="https://partner.googleadservices.com/" /> + + <rule from="^http://(pagead2|tpc)\.googlesyndication\.com/" + to="https://$1.googlesyndication.com/" /> + + <!-- !www doesn't exist. + --> + <rule from="^http://www\.googletagservices\.com/tag/js/" + to="https://www.googletagservices.com/tag/js/" /> + + + <rule from="^http://([^@:\./]+)\.googleusercontent\.com/" + to="https://$1.googleusercontent.com/" /> + + +</ruleset> diff --git a/searx/https_rules/GoogleShopping.xml b/searx/https_rules/GoogleShopping.xml @@ -0,0 +1,28 @@ +<!-- + For other Google coverage, see GoogleServices.xml. + +--> +<ruleset name="Google Shopping"> + + <target host="google.*" /> + <target host="www.google.*" /> + <target host="google.co.*" /> + <target host="www.google.co.*" /> + <target host="*.google.com" /> + <target host="google.com.*" /> + <target host="www.google.com.*" /> + + + <rule from="^http://encrypted\.google\.com/(prdhp|shopping)" + to="https://www.google.com/$1" /> + + <rule from="^http://shopping\.google\.com/" + to="https://shopping.google.com/" /> + + <rule from="^http://(?:encrypted|www)\.google\.com/(.*tbm=shop)" + to="https://www.google.com/$1" /> + + <rule from="^http://(?:www\.)?google\.((?:com?\.)?(?:ae|ar|at|au|bg|bh|bo|br|ca|ch|cl|cr|co|cu|de|ec|eg|es|fi|fr|gh|gt|hr|id|ie|il|in|it|jm|jo|jp|ke|kr|kw|kz|lb|lk|ly|mx|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|sg|sl|se|sv|th|tr|ug|uk|uy|ve|vn|za|zw))/(?=prdhp|shopping)" + to="https://www.google.com/$1" /> + +</ruleset> diff --git a/searx/https_rules/GoogleSorry.xml b/searx/https_rules/GoogleSorry.xml @@ -0,0 +1,7 @@ +<ruleset name="GoogleSorry"> + <target host="sorry.google.com" /> + <target host="www.google.com" /> + <target host="google.com" /> + + <rule from="^http://((sorry|www)\.)?google\.com/sorry/" to="https://sorry.google.com/sorry/" /> +</ruleset> diff --git a/searx/https_rules/GoogleTranslate.xml b/searx/https_rules/GoogleTranslate.xml @@ -0,0 +1,8 @@ +<ruleset name="Google Translate (broken)" default_off="redirect loops"> + <target host="translate.googleapis.com" /> + <target host="translate.google.com" /> + + <rule from="^http://translate\.googleapis\.com/" to="https://translate.googleapis.com/"/> + <rule from="^http://translate\.google\.com/" + to="https://translate.google.com/" /> +</ruleset> diff --git a/searx/https_rules/GoogleVideos.xml b/searx/https_rules/GoogleVideos.xml @@ -0,0 +1,83 @@ +<ruleset name="Google Videos"> + <target host="*.google.com" /> + <target host="google.com" /> + <target host="www.google.com.*" /> + <target host="google.com.*" /> + <target host="www.google.co.*" /> + <target host="google.co.*" /> + <target host="www.google.*" /> + <target host="google.*" /> + + <rule from="^http://encrypted\.google\.com/videohp" + to="https://encrypted.google.com/videohp" /> + + <!-- https://videos.google.com is currently broken; work around that... --> + <rule from="^https?://videos?\.google\.com/$" + to="https://encrypted.google.com/videohp" /> + <rule from="^http://(?:www\.)?google\.com/videohp" + to="https://encrypted.google.com/videohp" /> + <rule from="^http://(?:images|www|encrypted)\.google\.com/(.*tbm=isch)" + to="https://encrypted.google.com/$1" /> + + <rule + from="^http://(?:www\.)?google\.(?:com?\.)?(?:au|ca|gh|ie|in|jm|ke|lk|my|na|ng|nz|pk|rw|sl|sg|ug|uk|za|zw)/videohp" + to="https://encrypted.google.com/videohp" /> + <rule + from="^http://(?:www\.)?google\.(?:com?\.)?(?:ar|bo|cl|co|cu|cr|ec|es|gt|mx|pa|pe|py|sv|uy|ve)/videohp$" + to="https://encrypted.google.com/videohp?hl=es" /> + <rule + from="^http://(?:www\.)?google\.(?:com\.)?(?:ae|bh|eg|jo|kw|lb|ly|om|qa|sa)/videohp$" + to="https://encrypted.google.com/videohp?hl=ar" /> + <rule from="^http://(?:www\.)?google\.(?:at|ch|de)/videohp$" + to="https://encrypted.google.com/videohp?hl=de" /> + <rule from="^http://(?:www\.)?google\.(fr|nl|it|pl|ru|bg|pt|ro|hr|fi|no)/videohp$" + to="https://encrypted.google.com/videohp?hl=$1" /> + <rule from="^http://(?:www\.)?google\.com?\.(id|th|tr)/videohp$" + to="https://encrypted.google.com/videohp?hl=$1" /> + <rule from="^http://(?:www\.)?google\.com\.il/videohp$" + to="https://encrypted.google.com/videohp?hl=he" /> + <rule from="^http://(?:www\.)?google\.com\.kr/videohp$" + to="https://encrypted.google.com/videohp?hl=ko" /> + <rule from="^http://(?:www\.)?google\.com\.kz/videohp$" + to="https://encrypted.google.com/videohp?hl=kk" /> + <rule from="^http://(?:www\.)?google\.com\.jp/videohp$" + to="https://encrypted.google.com/videohp?hl=ja" /> + <rule from="^http://(?:www\.)?google\.com\.vn/videohp$" + to="https://encrypted.google.com/videohp?hl=vi" /> + <rule from="^http://(?:www\.)?google\.com\.br/videohp$" + to="https://encrypted.google.com/videohp?hl=pt-BR" /> + <rule from="^http://(?:www\.)?google\.se/videohp$" + to="https://encrypted.google.com/videohp?hl=sv" /> + +<!-- If there are URL parameters, keep them. --> + <rule + from="^http://(?:www\.)?google\.(?:com?\.)?(?:ar|bo|cl|co|cu|cr|ec|es|gt|mx|pa|pe|py|sv|uy|ve)/videohp\?" + to="https://encrypted.google.com/videohp?hl=es&#38;" /> + <rule + from="^http://(?:www\.)?google\.(?:com\.)?(?:ae|bh|eg|jo|kw|lb|ly|om|qa|sa)/videohp\?" + to="https://encrypted.google.com/videohp?hl=ar&#38;" /> + <rule from="^http://(?:www\.)?google\.(?:at|ch|de)/videohp\?" + to="https://encrypted.google.com/videohp?hl=de&#38;" /> + <rule from="^http://(?:www\.)?google\.(fr|nl|it|pl|ru|bg|pt|ro|hr|fi|no)/videohp\?" + to="https://encrypted.google.com/videohp?hl=$1&#38;" /> + <rule from="^http://(?:www\.)?google\.com?\.(id|th|tr)/videohp\?" + to="https://encrypted.google.com/videohp?hl=$1&#38;" /> + <rule from="^http://(?:www\.)?google\.com\.il/videohp\?" + to="https://encrypted.google.com/videohp?hl=he&#38;" /> + <rule from="^http://(?:www\.)?google\.com\.kr/videohp\?" + to="https://encrypted.google.com/videohp?hl=ko&#38;" /> + <rule from="^http://(?:www\.)?google\.com\.kz/videohp\?" + to="https://encrypted.google.com/videohp?hl=kk&#38;" /> + <rule from="^http://(?:www\.)?google\.com\.jp/videohp\?" + to="https://encrypted.google.com/videohp?hl=ja&#38;" /> + <rule from="^http://(?:www\.)?google\.com\.vn/videohp\?" + to="https://encrypted.google.com/videohp?hl=vi&#38;" /> + <rule from="^http://(?:www\.)?google\.com\.br/videohp\?" + to="https://encrypted.google.com/videohp?hl=pt-BR&#38;" /> + <rule from="^http://(?:www\.)?google\.se/videohp\?" + to="https://encrypted.google.com/videohp?hl=sv&#38;" /> + + <rule from="^http://video\.google\.com/ThumbnailServer2" + to="https://video.google.com/ThumbnailServer2" /> + +</ruleset> diff --git a/searx/https_rules/GoogleWatchBlog.xml b/searx/https_rules/GoogleWatchBlog.xml @@ -0,0 +1,17 @@ +<!-- + gwbhrd.appspot.com + +--> +<ruleset name="GoogleWatchBlog"> + + <target host="googlewatchblog.de" /> + <target host="*.googlewatchblog.de" /> + + + <securecookie host="^(?:www)?\.googlewatchblog\.de$" name=".+" /> + + + <rule from="^http://(static\.|www\.)?googlewatchblog\.de/" + to="https://$1googlewatchblog.de/" /> + +</ruleset>+ \ No newline at end of file diff --git a/searx/https_rules/Google_App_Engine.xml b/searx/https_rules/Google_App_Engine.xml @@ -0,0 +1,21 @@ +<!-- + For other Google coverage, see GoogleServices.xml. + +--> +<ruleset name="Google App Engine"> + + <target host="appspot.com" /> + <target host="*.appspot.com" /> + <!-- + Redirects to http for some reason. + --> + <exclusion pattern="^http://photomunchers\.appspot\.com/" /> + + + <securecookie host="^.+\.appspot\.com$" name=".+" /> + + + <rule from="^http://([^@:\./]+\.)?appspot\.com/" + to="https://$1appspot.com/" /> + +</ruleset>+ \ No newline at end of file diff --git a/searx/https_rules/Googleplex.com.xml b/searx/https_rules/Googleplex.com.xml @@ -0,0 +1,16 @@ +<!-- This rule was automatically generated based on an HSTS + preload rule in the Chromium browser. See + https://src.chromium.org/viewvc/chrome/trunk/src/net/base/transport_security_state.cc + for the list of preloads. Sites are added to the Chromium HSTS + preload list on request from their administrators, so HTTPS should + work properly everywhere on this site. + + Because Chromium and derived browsers automatically force HTTPS for + every access to this site, this rule applies only to Firefox. --> +<ruleset name="Googleplex.com (default off)" platform="firefox" default_off="Certificate error"> + <target host="googleplex.com" /> + + <securecookie host="^googleplex\.com$" name=".+" /> + + <rule from="^http://googleplex\.com/" to="https://googleplex.com/" /> +</ruleset> diff --git a/searx/https_rules/OpenStreetMap.xml b/searx/https_rules/OpenStreetMap.xml @@ -0,0 +1,15 @@ +<ruleset name="OpenStreetMap"> + + <target host="openstreetmap.org"/> + <target host="*.openstreetmap.org"/> + + <rule from="^http://(?:www\.)?openstreetmap\.org/" + to="https://www.openstreetmap.org/"/> + + <rule from="^http://tile\.openstreetmap\.org/" + to="https://a.tile.openstreetmap.org/"/> + + <rule from="^http://(blog|help|lists|nominatim|piwik|taginfo|[abc]\.tile|trac|wiki)\.openstreetmap\.org/" + to="https://$1.openstreetmap.org/"/> + +</ruleset> diff --git a/searx/https_rules/Rawgithub.com.xml b/searx/https_rules/Rawgithub.com.xml @@ -0,0 +1,14 @@ +<!-- + www: cert only matches ^rawgithub.com + +--> +<ruleset name="rawgithub.com"> + + <target host="rawgithub.com" /> + <target host="www.rawgithub.com" /> + + + <rule from="^http://(?:www\.)?rawgithub\.com/" + to="https://rawgithub.com/" /> + +</ruleset> diff --git a/searx/https_rules/Soundcloud.xml b/searx/https_rules/Soundcloud.xml @@ -0,0 +1,101 @@ +<!-- + + CDN buckets: + + - akmedia-a.akamaihd.net + + - soundcloud.assistly.com + + - help.soundcloud.com + + - cs70.wac.edgecastcdn.net + + - a1.sndcdn.com + - i1.sndcdn.com + - w1.sndcdn.com + + - wpc.658D.edgecastcdn.net + - m-a.sndcdn.com.edgesuite.net + - soundcloud.gettyimages.com + + - scbackstage.wpengine.netdna-cdn.com + + - ssl doesn't exist + - backstage.soundcloud.com + + - soundcloud.wpengine.netdna-cdn.com + + - -ssl doesn't exist + - blog.soundcloud.com + + - gs1.wpc.v2cdn.netcdn.net + - gs1.wpc.v2cdn.net + + - ec-media.soundcloud.com + + Nonfunctional soundcloud.com subdomains: + + - help (redirects to http, mismatched, CN: *.assistly.com) + - m (redirects to http) + - media + - status (times out) + + + Problematic domains: + + - m-a.sndcdn.com (works, akamai) + + + Partially covered domains: + + - backstage.soundcloud.com + + + Fully covered domains: + + - sndcdn.com subdomains: + + - a[12] + - api + - i[1-4] + - w[12] + - wis + + - soundcloud.com subdomains: + + - (www.) + - api + - blog + - connect + - developers + - ec-media + - eventlogger + - help-assets + - media + - visuals + - w + +--> +<ruleset name="Soundcloud (partial)"> + + <target host="scbackstage.wpengine.netdna-cdn.com" /> + <target host="soundcloud.wpengine.netdna-cdn.com" /> + <target host="*.sndcdn.com" /> + <target host="soundcloud.com" /> + <target host="*.soundcloud.com" /> + <exclusion pattern="^https?://(?:scbackstage\.wpengine\.netdna-cdn|backstage\.soundcloud)\.com/(?!wp-content/)" /> + + + <rule from="^http://([aiw]\d|api|wis)\.sndcdn\.com/" + to="https://$1.sndcdn.com/" /> + + <rule from="^http://((?:api|backstage|blog|connect|developers|ec-media|eventlogger|help-assets|media|visuals|w|www)\.)?soundcloud\.com/" + to="https://$1soundcloud.com/" /> + + <rule from="^https?://scbackstage\.wpengine\.netdna-cdn\.com/" + to="https://backstage.soundcloud.com/" /> + + <rule from="^https?://soundcloud\.wpengine\.netdna-cdn\.com/" + to="https://blog.soundcloud.com/" /> + +</ruleset> diff --git a/searx/https_rules/ThePirateBay.xml b/searx/https_rules/ThePirateBay.xml @@ -0,0 +1,36 @@ +<!-- + Nonfunctional: + + - image.bayimg.com + - (www.)thepiratebay.sx (http reply) + + + For problematic rules, see ThePirateBay-mismatches.xml. + +--> +<ruleset name="The Pirate Bay (partial)"> + + <target host="suprbay.org" /> + <target host="*.suprbay.org" /> + <!-- * for cross-domain cookie --> + <target host="*.forum.suprbay.org" /> + <target host="thepiratebay.org"/> + <target host="*.thepiratebay.org"/> + <target host="thepiratebay.se"/> + <target host="*.thepiratebay.se"/> + + <securecookie host="^.*\.suprbay\.org$" name=".*" /> + <securecookie host="^(.*\.)?thepiratebay\.se$" name=".*"/> + + + <!-- Cert doesn't match (www.), redirects like so. --> + <rule from="^https?://(?:forum\.|www\.)?suprbay\.org/" + to="https://forum.suprbay.org/" /> + + <rule from="^http://(?:www\.)?thepiratebay\.(?:org|se)/" + to="https://thepiratebay.se/"/> + + <rule from="^http://(rss|static|torrents)\.thepiratebay\.(?:org|se)/" + to="https://$1.thepiratebay.se/"/> + +</ruleset> diff --git a/searx/https_rules/Torproject.xml b/searx/https_rules/Torproject.xml @@ -0,0 +1,18 @@ +<ruleset name="Tor Project"> + + <target host="torproject.org" /> + <target host="*.torproject.org" /> + <exclusion pattern="^http://torperf\.torproject\.org/" /> + + + <!-- Not secured by server: + --> + <!--securecookie host="^\.blog\.torproject\.org$" name="^SESS[0-9a-f]{32}$" /--> + + <securecookie host="^(?:.*\.)?torproject\.org$" name=".+" /> + + + <rule from="^http://([^/:@\.]+\.)?torproject\.org/" + to="https://$1torproject.org/" /> + +</ruleset> diff --git a/searx/https_rules/Twitter.xml b/searx/https_rules/Twitter.xml @@ -0,0 +1,169 @@ +<!-- + Other Twitter rulesets: + + - Twitter_Community.com.xml + + + Nonfunctional domains: + + - status.twitter.com * + - status.twitter.jp * + + * Tumblr + + + CDN buckets: + + - a1095.g.akamai.net/=/1095/134446/1d/platform.twitter.com/ | platform2.twitter.com.edgesuite.net + + - platform2.twitter.com + + - twitter-any.s3.amazonaws.com + - twitter-blog.s3.amazonaws.com + + - d2rdfnizen5apl.cloudfront.net + + - s.twimg.com + + - ssl2.twitter.com.edgekey.net + - twitter.github.com + + + Problematic domains: + + - twimg.com subdomains: + + - a5 * + - s (cloudfront) + + - twitter.com subdomains: + + - platform[0-3] (403, akamai) + + * akamai + + + Fully covered domains: + + - (www.)t.co (www → ^) + + - twimg.com subdomains: + + - a[5-9] (→ si0) + - a\d + - abs + - dnt + - ea + - g + - g2 + - gu + - hca + - jp + - ma + - ma[0123] + - o + - p + - pbs + - r + - s (→ d2rdfnizen5apl.cloudfront.net) + - si[0-5] + - syndication + - cdn.syndication + - tailfeather + - ton + - v + - widgets + + - twitter.com subdomains: + + - (www.) + - 201[012] + - about + - ads + - analytics + - api + - cdn.api + - urls.api + - blog + - business + - preview.cdn + - preview-dev.cdn + - preview-stage.cdn + - de + - dev + - en + - engineering + - es + - firefox + - fr + - it + - ja + - jp + - m + - media + - mobile + - music + - oauth + - p + - pic + - platform + - platform[0-3] (→ platform) + - widgets.platform + - search + - static + - support + - transparency + - upload + + + These altnames don't exist: + + - i3.twimg.com + - p-dev.twimg.com + - vmtc.twimg.com + + - cdn-dev.api.twitter.com + +--> +<ruleset name="Twitter"> + + <target host="t.co" /> + <target host="*.t.co" /> + <target host="*.twimg.com" /> + <target host="twitter.com" /> + <target host="*.twitter.com" /> + + + <!-- Secured by server: + --> + <!--securecookie host="^\.twitter\.com$" name="^_twitter_sess$" /--> + <!--securecookie host="^support\.twitter\.com$" name="^_help_center_session$" /--> + <!-- + Not secured by server: + --> + <!--securecookie host="^\.t\.co$" name="^muc$" /--> + <!--securecookie host="^\.twitter\.com$" name="^guest_id$" /--> + + <securecookie host="^\.t\.co$" name=".+" /> + <securecookie host="^(?:.*\.)?twitter\.com$" name=".+" /> + + + <rule from="^http://(?:www\.)?t\.co/" + to="https://t.co/" /> + + <rule from="^http://a[5-9]\.twimg\.com/" + to="https://si0.twimg.com/" /> + + <rule from="^http://(abs|a\d|dnt|ea|g[2u]?|hca|jp|ma\d?|o|p|pbs|r|si\d|(?:cdn\.)?syndication|tailfeather|ton|v|widgets)\.twimg\.com/" + to="https://$1.twimg.com/" /> + + <rule from="^http://s\.twimg\.com/" + to="https://d2rdfnizen5apl.cloudfront.net/" /> + + <rule from="^http://((?:201\d|about|ads|analytics|blog|(?:cdn\.|urls\.)?api|business|preview(?:-dev|-stage)?\.cdn|de|dev|engineering|en|es|firefox|fr|it|ja|jp|m|media|mobile|music|oauth|p|pic|platform|widgets\.platform|search|static|support|transparency|upload|www)\.)?twitter\.com/" + to="https://$1twitter.com/" /> + + <rule from="^http://platform\d\.twitter\.com/" + to="https://platform.twitter.com/" /> + +</ruleset> diff --git a/searx/https_rules/Vimeo.xml b/searx/https_rules/Vimeo.xml @@ -0,0 +1,75 @@ +<!-- + CDN buckets: + + - av.vimeo.com.edgesuite.net + + - a808.g.akamai.net + + - pdl.vimeocdn.com.edgesuite.net + + - a1189.g.akamai.net + + + Problematic subdomains: + + - av (pdl.../crossdomain.xml restricts to port 80) + - pdl (works, akamai) + + + Partially covered subdomains: + + - developer (some pages redirect to http) + - pdl (→ akamai) + + + Fully covered subdomains: + + - (www.) + - secure + + +Default off per https://trac.torproject.org/projects/tor/ticket/7569 --> +<ruleset name="Vimeo (default off)" default_off="breaks some video embedding"> + + <target host="vimeo.com" /> + <target host="*.vimeo.com" /> + <exclusion pattern="^http://av\.vimeo\.com/crossdomain\.xml" /> + <!--exclusion pattern="^http://developer\.vimeo\.com/($|\?|(apps|guidelines|help|player)($|[?/]))" /--> + <exclusion pattern="^http://developer\.vimeo\.com/(?!apis(?:$|[?/])|favicon\.ico)" /> + <target host="*.vimeocdn.com" /> + <!-- + Uses crossdomain.xml from s3.amazonaws.com, which sets secure="false" + + https://mail1.eff.org/pipermail/https-everywhere/2012-October/001583.html + --> + <exclusion pattern="^http://a\.vimeocdn\.com/p/flash/moogaloop/" /> + + <!-- We cannot secure streams because crossdomain.xml + restricts to port 80 :( + --> + <exclusion pattern="^http://pdl\.vimeocdn\.com/(?!crossdomain\.xml)" /> + + + <!-- Tracking cookies: + --> + <securecookie host="^\.(?:player\.)?vimeo\.com$" name="^__utm\w$" /> + + + <rule from="^http://((?:developer|player|secure|www)\.)?vimeo\.com/" + to="https://$1vimeo.com/" /> + + <rule from="^http://av\.vimeo\.com/" + to="https://a248.e.akamai.net/f/808/9207/8m/av.vimeo.com/" /> + + <!-- a & b: Akamai --> + <rule from="^http://(?:secure-)?([ab])\.vimeocdn\.com/" + to="https://secure-$1.vimeocdn.com/" /> + + <rule from="^http://i\.vimeocdn\.com/" + to="https://i.vimeocdn.com/" /> + + <rule from="^http://pdl\.vimeocdn\.com/" + to="https://a248.e.akamai.net/f/1189/4415/8d/pdl.vimeocdn.com/" /> + +</ruleset> + diff --git a/searx/https_rules/WikiLeaks.xml b/searx/https_rules/WikiLeaks.xml @@ -0,0 +1,13 @@ +<ruleset name="WikiLeaks"> + + <target host="wikileaks.org" /> + <target host="*.wikileaks.org" /> + + + <securecookie host="^(?:w*\.)?wikileaks\.org$" name=".+" /> + + + <rule from="^http://((?:chat|search|shop|www)\.)?wikileaks\.org/" + to="https://$1wikileaks.org/" /> + +</ruleset>+ \ No newline at end of file diff --git a/searx/https_rules/Wikimedia.xml b/searx/https_rules/Wikimedia.xml @@ -0,0 +1,107 @@ +<!-- + Wikipedia and other Wikimedia Foundation wikis previously had no real HTTPS support, and + URLs had to be rewritten to https://secure.wikimedia.org/$wikitype/$language/ . This is no + longer the case, see https://blog.wikimedia.org/2011/10/03/native-https-support-enabled-for-all-wikimedia-foundation-wikis/ , + so this file is a lot simpler these days. + + + Mixed content: + + - Images, on: + + - stats.wikimedia.org from upload.wikimedia.org * + - stats.wikimedia.org from wikimediafoundation.org * + + * Secured by us + +--> +<ruleset name="Wikimedia"> + + <target host="enwp.org" /> + <target host="frwp.org" /> + + <target host="mediawiki.org" /> + <target host="www.mediawiki.org" /> + <target host="wikimedia.org" /> + <target host="*.wikimedia.org" /> + <exclusion pattern="^http://(?:apt|cs|cz|parsoid-lb\.eqiad|status|torrus|ubuntu)\.wikimedia\.org" /> + <!-- https://mail1.eff.org/pipermail/https-everywhere-rules/2012-June/001189.html --> + <exclusion pattern="^http://lists\.wikimedia\.org/pipermail(?:$|/)" /> + <target host="wikimediafoundation.org" /> + <target host="www.wikimediafoundation.org" /> + + <!-- Wikimedia projects (also some wikimedia.org subdomains) --> + <target host="wikibooks.org" /> + <target host="*.wikibooks.org" /> + <target host="wikidata.org" /> + <target host="*.wikidata.org" /> + <target host="wikinews.org" /> + <target host="*.wikinews.org" /> + <target host="wikipedia.org" /> + <target host="*.wikipedia.org" /> + <target host="wikiquote.org" /> + <target host="*.wikiquote.org" /> + <target host="wikisource.org" /> + <target host="*.wikisource.org" /> + <target host="wikiversity.org" /> + <target host="*.wikiversity.org" /> + <target host="wikivoyage.org" /> + <target host="*.wikivoyage.org" /> + <target host="wiktionary.org" /> + <target host="*.wiktionary.org" /> + + <!-- Wikimedia chapters --> + <target host="wikimedia.ca" /> + <target host="www.wikimedia.ca" /> + + <!-- Wikimedia Tool Labs --> + <target host="tools.wmflabs.org" /> + <target host="icinga.wmflabs.org" /> + <target host="ganglia.wmflabs.org" /> + + <!-- Not secured by server: + --> + <!--securecookie host="^\.wiki(books|ipedia)\.org$" name="^GeoIP$" /--> + + <securecookie host="^^\.wik(?:ibooks|idata|imedia|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org$" name="^GeoIP$" /> + <securecookie host="^([^@:/]+\.)?wik(ibooks|idata|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org$" name=".*" /> + <securecookie host="^(species|commons|meta|incubator|wikitech).wikimedia.org$" name=".*" /> + <securecookie host="^(?:www\.)?mediawiki\.org$" name=".*" /> + <securecookie host="^wikimediafoundation.org$" name=".*" /> + + <rule from="^http://(en|fr)wp\.org/" + to="https://$1.wikipedia.org/wiki/" /> + + <rule from="^http://(?:www\.)?mediawiki\.org/" + to="https://www.mediawiki.org/" /> + + <rule from="^https?://download\.wikipedia\.org/" + to="https://dumps.wikimedia.org/" /> + + <rule from="^https?://(download|dataset2|sitemap)\.wikimedia\.org/" + to="https://dumps.wikimedia.org/" /> + + <rule from="^https?://(labs-ns[01]|virt0)\.wikimedia\.org/" + to="https://wikitech.wikimedia.org/" /> + + <rule from="^https?://noboard\.chapters\.wikimedia\.org/" + to="https://noboard-chapters.wikimedia.org/" /> + + <rule from="^https?://wg\.en\.wikipedia\.org/" + to="https://wg-en.wikipedia.org/" /> + + <rule from="^https?://arbcom\.(de|en|fi|nl)\.wikipedia\.org/" + to="https://arbcom-$1.wikipedia.org/" /> + + <rule from="^http://([^@:/]+\.)?wik(ibooks|idata|imedia|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org/" + to="https://$1wik$2.org/" /> + + <rule from="^http://(www\.)?wikimediafoundation\.org/" + to="https://$1wikimediafoundation.org/" /> + + <rule from="^http://(www\.)?wikimedia\.ca/" + to="https://wikimedia.ca/" /> + + <rule from="^http://([^@:/]+)\.wmflabs\.org/" + to="https://$1.wmflabs.org/" /> +</ruleset> diff --git a/searx/https_rules/Yahoo.xml b/searx/https_rules/Yahoo.xml @@ -0,0 +1,2450 @@ +<!-- + Other Yahoo rulesets: + + - Flickr.xml + - Lexity.com.xml + - Right-Media.xml + - Yahoo.com.tw.xml + - Yahoo.net.xml + - Yahoo_APIs.xml + - Yahoo_Japan.xml + - Yho.com.xml + - Yimg.com.xml + - YUI_Library.xml + + + CDN buckets: + + - ipgcdn-a.akamaihd.net + - yahootv.flyingfishes.com.br + - yahoosports.teamfanshop.com + + + Nonfunctional domains: + + - yahoo.com subdomains: + + - account ⁵ + - cn.adspecs ¹ + - tw.adspecs ¹ + - alerts ¹ + + - co.astrology ⁵ + - espanol.astrology ⁵ + - mx.astrology ⁵ + + - auction ¹ + + - biz subdomains: + + - au.rss ¹ + - nz.rss ¹ + + - bookmarks ⁵ + - buzz ¹ + + - cn subdomains: + + - ^ ¹ + - help ¹ + - news ¹ + + - docs subdomains: + + - ^ ⁵ + - ar ⁵ + - fr ⁵ + - uk ⁵ + + - au.rss.food (403, valid cert) + - au.forums ¹ + - ar.games ⁵ + - help.cc.hk ⁵ + - hsrd ¹ + - labs ¹ + + - lifestyle subdomains: + + - tw.ipeen ¹ + - au.rss ³ + - nz.rss ³ + - tw ⁵ + + - cn.overview.mail ¹ + + - cf.maps (404; mismatched, CN: www.yahoo.com) + - gws2.maps ¹ + - kr.mobile ⁵ + - tw.music ⁵ + + - my subdomains: + + - ar ⁵ + - au ² + - br ² + - ca ² + - de ² + - es ² + - fr ² + - hk ² + - ie ¹ + - in ² + - it ² + - kr ¹ + - mx ² + - nz ² + - qc ² + - sg ² + - tw ² + - cm.tw ⁸ + - uk ² + + - \w\w.news: + + - cn ¹ + - kr ¹ + - se ¹ + + - opi ¹ + - au.pfinance ² + - ar.rd ¹ + - research ¹ + - rightmedia (shows speakers.watersmartinnovations.com; mismatched, CN: *.watersmartinnovations.com) + + - search subdomains: + + - us.recipes ¹ + - gossip-ss.us ¹ + + - \w\w.yhs: + + - ar ¹ + - au ¹ + - br ¹ + - ca ¹ + - de ¹ + - es ¹ + - fr ¹ + - hk ¹ + - in ¹ + - it ¹ + - kr ¹ + - mx ¹ + - my ¹ + - nz ¹ + - ph ¹ + - se ¹ + - sg ¹ + - tw ¹ + - uk ¹ + - us ¹ + - vn ¹ + + - searchmarketing ¹ + - au.shopping ⁹ + - es.shopping ⁵ + - suggestions ⁵ + - au.rss.thehype ³ + + - video subdomains: + + - malaysia ¹ + - my ¹ + - ph ¹ + - sg ¹ + - tw ¹ + + - voices ⁵ + - cn.weather ¹ + - visit.webhosting ⁵ + - count.yisou ¹ + + - youth subdomains: + + - au.rss ³ + - nz.rss ³ + + - ypolicyblog.com (reset) + - www.ypolicyblog.com + + ¹ Refused + ² Redirects to http, valid cert + ³ 404, valid cert + ⁴ Redirects to http; mismatched, CN: www.yahoo.com + ⁵ Dropped + ⁶ Recursive redirect + ⁷ 404; mismatched, CN: *.news.yahoo.com + ⁸ Redirects to http; mismatched, CN: *.news.yahoo.com + ⁹ "Incorrect Host in URL" + + Problematic domains: + + - i.acdn.us ¹ + - cm.npc-morris.overture.com ² + - cm.npc-nydn.overture.com ² + - totaltravel.co.uk ³ + - www.totaltravel.co.uk ⁴ + - totaltravel.com ³ + - www.totaltravel.com ⁴ + + yahoo.com subdomains: + + - fr.actualites ⁴ + - advertisingcentral ⁴ + + - cl.answers ⁴ + - co.answers ⁴ + - pe.answers ⁴ + - ve.answers ⁴ + + - au.astrology ⁷ + - ca.astrology ⁴ + - nz.astrology ⁷ + + - ar.autos ⁴ + - de.autos ⁴ + - fr.autos ⁴ + - mx.autos ⁴ + + - axis ¹ + - id.berita ⁵ + + - au.biz ⁷ + - nz.biz ⁷ + + - \w\w.careers: (works; mismatched, CN: www.yahoo.com) + + - au + - ca + - de + - fr + - hk + - id + - ie + - in + - it + - jp + - my + - no + - ph + - qc ¹ + - sg + - tw + - uk + - us + - vn + + - malaysia.careers ¹ + - cars ¹ + - tw.help.cc ¹ + - cine ¹ + - cn (reset) + - connectedtv (works; mismatched, CN: smarttv.yahoo.com) + - cl.deportes ⁴ + - co.deportes ⁴ + - es.deportes ⁴ + - pe.deportes ⁴ + - ve.deportes ⁴ + - au.dir ⁷ + - au.docs (works; mismatched, CN: *.yahoo7.com.au) + - hk.ent ⁴ + - br.esportes ⁴ + - es.everything ⁴ + - fr.eurosport ⁴ + - fr.divertissement ⁵ + - dk ⁴ + - fantasysports ⁴ + - es.laliga.fantasysports ⁴ + - tw.fashion ⁵ + - feedback ⁴ + - chart.finance ⁴ + - ichart.finance ⁴ + - ie.finance ⁴ + - kr.finance (404, valid cert) + - au.food (403; mismatched, CN: *.yahoo7.com.au) + - nz.food (403; mismatched, CN: *.yahoo7.com.au) + - au.forums ⁷ + + - games subdomains: + + - br ⁴ + - de ⁴ + - es ⁴ + - fr ⁴ + - id ⁴ + - it ⁴ + - malaysia ⁴ + - nz ⁴ + - ph ⁴ + + - it.giochi ⁵ + - ie.groups ⁴ + - kr.gugi ⁴ + - au.gwn7 (mixed css from l.yimg.com) + - fr.help ⁴ + - help.cc.hk ⁴ + - fr.jeux ⁵ + - es.juegos ⁵ + - kr ⁴ + + - lifestyle subdomains: + + - ar ⁴ + - br ⁴ + - ca ⁴ + - es ⁴ + - es-us ⁴ + - fr ⁴ + - ie ⁴ + - it ⁴ + + - ca.local (dropped, redirect destination cert mismatched) + - fr.local ⁴ + - es.maps ⁴ + - in.maps ⁴ + - kr.maps ⁴ + - mx.maps ⁴ + - nz.maps ⁴ + + - external.global.media ⁵ + - au.messages ⁷ + - ie.messenger ⁴ + - nz.messenger ⁷ + - tw.messenger ⁴ + - dk.mobile ⁴ + - ie.mobile ⁴ + - no.mobile ⁴ + - webservices.mobile (works, self-signed) + - tw.atm.money (works; mismatched, CN: tw.campaign.money.yahoo.com) + + - br.movies ¹ + - fr.movies ¹ + - es.movies ⁴ + - es-us.movies ⁴ + - it.movies ⁴ + + - br.mulher ⁵ + - hk.music ¹ + - tw.music ⁵ + - fr.musique ⁵ + + - news subdomains: + + - ar ⁴ + - br ⁴ + - cl ⁴ + - co ⁴ + - de ⁴ + - dk ⁴ + - id ⁴ + - ie ⁴ + - it ⁴ + - mx ⁴ + - pe ⁴ + - qc ⁴ + - au.rss (mixed css from l.yimg.com) + - ve ⁴ + + - no ⁴ + - notepad (works; mismatched, CN: *.calendar.yahoo.com) + - it.notizie ⁵ + + - on ⁴ + - it.oroscopo ⁵ + - fr.pourelles ⁵ + - br.esporteinterativo ⁵ + - id.olahraga ⁵ + - au.prime7 (mixed css from l.yimg.com) + - ru ⁴ + + - safely subdomains: ⁴ + + - ar + - br + - cl + - es + - es-us + - malaysia + - pe + - ve + - vn + + - cn.search ⁴ + - my.images.search ⁴ + - kr.images.search ⁴ + - nz.maps.search ⁴ + - my.search ⁴ + - my.video.search ⁴ + - kr.searchad ¹ + + - ph.she ⁵ + - fr.sites ⁵ + + - de.solutions ¹ + - es.solutions ¹ + - fr.solutions ¹ + - it.solutions ¹ + - nz.solutions ⁷ + - uk.solutions ¹ + + - sport ⁴ + + - sports subdomains: + + - ar ⁴ + - br ⁴ + - de ⁴ + - es ⁴ + - id ⁴ + - in ⁴ + - uk ⁴ + + - br.tempo ⁵ + - es.tendencias ⁵ + - au.todaytonight (403, valid cert) + + - au.travel ⁷ + - ca.travel ⁴ + - id.travel ⁴ + - my.travel ⁴ + - nz.travel ⁷ + - ph.travel ⁴ + - uk.travel ⁴ + - ca.tv ⁴ + - pe.tv ⁴ + + - video subdomains: + + - ^ ⁴ + - ar ⁴ + - au ⁴ + - br ⁴ + - ca ⁴ + - co ⁴ + - de ⁴ + - es ⁴ + - es-us ⁴ + - fr ⁴ + - hk ⁴ + - in ⁴ + - it ⁴ + - pe ⁴ + - mx ⁴ + - uk ⁴ + - ve ⁴ + + - fr.voyage (works; expired 2013-01-08, mismatched, CN: uk.travel.yahoo.com) + + - weather subdomains: + + - ar ⁴ + - au ⁴ + - br ⁴ + - cl ⁴ + - co ⁴ + - de ⁴ + - es ⁴ + - espanol ⁴ + - fr ⁴ + - it ⁴ + - kr ⁴ + - mx ⁴ + - pe ⁴ + - tw ⁴ + - mx ⁴ + - ve ⁴ + + - widgets (works; mismatched, CN: smarttv.yahoo.com) + - au.youth (works; mismatched, CN: yahoo.com.au) + + + - (www.)yhoo.it ⁴ (bit.ly alias) + + ¹ Dropped + ² Works, mismatched, CN: *.ysm.yahoo.com + ³ Works; mismatched, CN: builder.totaltravel.com + ⁴ Refused + ⁵ Works; mismatched, CN: *.news.yahoo.com + ⁶ Works; mismatched, CN: address.yahoo.com + ⁷ "Incorrect Host in URL" + + + Partially covered domains: + + - (www.)totaltravel.com (→ au.totaltravel.yahoo.com, haven't found images/) + + - yahoo.com subdomains: + + - advertisingcentral ¹ (→ advertising) + - fantasysports ¹ (-> sports) + - in.sports (→ cricket, /*(?!$) doesn't redirect) + - nz.video (→ nz.news, \w.* 404s) + + ¹ Some paths other than root don't redirect + ⁵ Avoiding false/broken MCB + + + Fully covered domains: + + - i.acdn.us (→ s.yimg.com/ck/) + + - (www.)totaltravel.co.uk (→ au.totaltravel.yahoo.com) + + - yahoo.com subdomains: + + - (www.) + + - \w\w: + + - ar + - au + - br + - ca + - cl + - cn (→ sg) + - co + - de + - dk (→ www) + - e1 (→ espanol) + - es + - fr + - gr + - hk + - id + - ie + - in + - it + - kr (→ tools.search) + - mx + - no (→ www) + - nz + - pe + - ph + - qc + - ru (→ www) + - se + - sg + - tw + - ve + - vn + - uk + - us + + - fr.actualites (→ fr.news) + - fr-ca.actualites + - address + + - \w\w.address: + + - ca + - e1 + - fr + - hk + - nz + + - admanager + + - \w\w.adserver: + + - au + - uk + - us + + - global.adserver + - adspecs + + - \w+.adspecs: + + - au + - de + - es + - fr + - hk + - in + - it + - latam + - nz + - sea + - uk + + - \w+.adspecs-new: + + - in + - sea + + - advertising + + - \w\w.advertising: + + - au + - ca + - fr + - nz + + - beap.adx + - c5.ah + - c5a.ah + - cookex.amp + - s-cookex.amp + + - analytics subdomains: + + - [aoyz] + - apac + - y3 + + - anc + - answers + + - \w\w.answers: + + - ar + - au + - br + - ca + - cl (→ espanol.answers) + - co (→ espanol.answers) + - de + - es + - fr + - id + - in + - it + - mx + - nz + - pe (→ espanol.answers) + - ph + - qc + - sg + - uk + - ve (→ espanol.answers) + - vn + + - espanol.answers + - malaysia.answers + + - antispam + + - \w\w.antispam: + + - ca + - dk + - fr + - in + + - vn.antoan + - au.apps + - global.ard + + - \w\w.astrology: + + - au (→ au.lifestyle) + - ca (→ ca.shine) + - es + - fr + - nz (→ nz.lifestyle) + - uk + + - auctions subdomains: + + - hk + - hk.info + - hk.f1.master + - hk.f1.page + - hk.search + - hk.store + - hk.edit.store + - hk.user + + - autos + + - \w\w.autos: + + - ca + - ar (→ ar.autocosmos.yahoo.net) + - de (→ de.cars) + - fr (→ fr.cars) + - mx (→ mx.autocosmos.yahoo.net) + - tw + + - bc subdomains: + + - clicks.beap + - csc.beap + - pn1 + - row + - us + + - axis (→ www) + - ar.ayuda + + - bid subdomains: + + - tw.campaign + - tw.master + - tw.mb + - tw.page + - tw.search + - tw.store + - tw + - tw.user + + - tw.bigdeals + - m.tw.bigdeals + - tw.billing + - biz + - au.biz (→ au.finance) + - nz.biz (→ nz.finance) + - boss + - tw.partner.buy + - tw.buy + - calendar + + - \w\w.calendar: + + - ar + - au + - br + - ca + - de + - dk + - es + - fr + - gr + - hk + - ie + - in + - it + - no + - nz + - se + - sg + - tw + - uk + - us + + - careers + + - \w\w.careers (→ careers) + + - ar + - au + - br + - ca + - cl + - de + - fr + - es + - hk + - id + - ie + - in + - it + - jp + - mx + - my + - no + - ph + - qc + - nz + - sg + - tw + - uk + - us + - vn + + - malaysia.careers (→ careers) + + - cars (→ autos) + + - \w\w.cars: + + - de + - es + - fr + - it + - uk + + - \w\w.celebridades: + + - ar + - br + - co + - mx + + - es-us.celebridades + + - celebrity + + - \w\w.celebrity: + + - ca + - es + - gr + - id + - in + - it + - hk + - ph + - tw + - uk + + - tw.help.cc (→ help) + - tw.charity + - chart + - cine (→ es-us.cine) + + - \w\w.cine: + + - cl + - co + - es + - mx + - pe + - ve + + - es-us.cine + + - \w\w.cinema: + + - br + - fr + - it + + - \w\w.clima: + + - cl + - co + - mx + - pe + - ve + + - es-us.clima + - migration.cn + - commercecentral + - developers.commercecentral + - connectedtv (→ smarttv) + - br.contribuidores + - contributor + - uk.contributor + - cricket + - au.dating + + - \w\w.deportes: + + - ar + - cl (→ es-us.deportes) + - co (→ es-us.deportes) + - es (→ es.eurosport) + - mx + - pe (→ pe-us.deportes) + - ve (→ ve-us.deportes) + + - es-us.deportes + - developer + - tw.dictionary + - dir + - au.dir (→ au.search) + - downloads + - s-b.dp + + - edit subdomains: + + - ^ + - eu + - na + - sa + - tw + + - tw.emarketing + - tw.ysm.emarketing + - en-maktoob + - hk.ent (→ hk.celebrity) + + - \w\w.entertainment: + + - my + - nz + + - espanol + - edit.europe + - java.europe (→ adgallery.zenfs.com) + + - eurosport subdomains: + + - ^ + - de + - es + - fr (→ fr.sports) + - it + - uk + + - everything + + - \w\w.everything: + + - ca + - es (→ es.todo) + - nz + - ph + - pt + - tw + - uk + + - au.fango + + - \w+.fantasysports: + + - baseball + - football + - golf + - hockey + - racing + + - es.laliga.fantasysports (→ es.eurosport) + - tw.fashion + - feedback (→ yahoo.uservoice.com) + - br.financas + - finance + + - \w\w.finance: + + - ar + - au + - br + - ca + - de + - es + - fr + - hk + - ie (→ uk.finance) + - in + - it + - kr (→ tools.search) + - mx + - nz + - sg + - tw + - uk + + - chart.finance (→ chart) + - tw.chart.finance + - espanol.finance + - tw.futures.finance + - ichart.finance (→ ichart) + - streamerapi.finance + + - \w\w.finanzas: + + - ar + - mx + + - es-us.finanzas + + - food subdomains: + + - au (→ au.lifestyle) + - nz (→ nz.lifestyle) + - nz.rss + + - au.forums (→ au.answers) + - nz.forums + + - games subdomains: + + - ^ + - au + - ca + - de (→ de.spiele) + - id (→ games) + - malaysia (→ games) + - nz.games (→ games) + - ph (→ games) + - uk + + - geo + - gma + - groups + + - \w\w.groups: + + - ar + - au + - br + - ca + - de + - dk + - es + - fr + - hk + - ie (→ uk.groups) + - in + - it + - kr + - mx + - nz + - ph + - sg + - tw + - uk + - us + + - asia.groups + - espanol.groups + - es-us.groups + - fr-ca.groups + - moderators.groups + - kr.gugi (→ tools.search) + - health + - help + + - \w\w.help: + + - au + - br + - ca + - dk + - fr (→ help) + - hk + - io + - tw + - uk + + - secure.help + - help.cc.hk (→ help) + - homes + - tw.house + - tw.v2.house + - ichart + - info + + - \w\w.info: + + - tw + + - tw.tool.ks + - au.launch + - legalredirect + + - \w\w.lifestyle: + + - ar (→ ar.mujer) + - au + - ca (→ ca.shine) + - de + - hk + - ie (→ uk.lifestyle) + - in + - it + - mx (→ mx.mujer) + - nz + - uk + + - es-us.lifestyle (→ ar.mujer) + - login + - gh.bouncer.login + - us.lrd + - local + + - \w\w.local: + + - au + - de + - fr (→ fr) + - uk + + - m + - r.m + + - \w\w.m: + + - ar + - au + - br + - ca + - cn + - de + - es + - fr + - hk + - id + - ie + - in + - it + - kr + - ph + - qc + - se + - sg + - mx + - tw + - uk + - us + - vn + + - mail + + - *.mail: + + - ar + - au + - co + - e1 + - es + - fr + - it + - mrd + - my + - overview + + - \w\w.overview: + + - br + - ca + - co + - e1 + - hk + - ph + - tw + - uk + - us + + - ph + - th + - tw + - us-mg6 + - vn + - c.c.yom + - \w+-c.c.yom + + - maktoob + - malaysia + - tw.mall + - tw.user.mall + - maps + + - \w\w.maps: + + - au + - ca + - de + - es (→ es.search) + - fr + - in (→ maps) + - it + - kr (→ tools.search) + - mx (→ espanol.maps) + - nz (→ nz.search) + - qc + - tw + - uk + + - espanol.maps + - sgws2.maps + - au.messages (→ au.answers) + - messenger + + - \w\w.messenger: + + - ar + - au + - br + - ca + - cf + - cl + - co + - de + - e1 + - es + - fr + - hk + - id + - ie (→ uk.messenger) + - in + - it + - kr + - mx + - my + - nz (→ messenger) + - pe + - ph + - qc + - sg + - th + - tw (→ hk) + - uk + - us + - ve + - vn + + - malaysia.messenger + - \w\w.meteo: + + - fr + - it + + - mlogin + - mobile + + - \w\w.mobile: + + - ar + - au + - br + - ca + - de + - dk (→ www) + - es + - fr + - hk + - id + - ie (→ uk.mobile) + - in + - it + - mx + - my + - nz + - no (→ www) + - ph + - qc + - sg + - th + - tw + - uk + - us + - vn + + - espanol.mobile + - malaysia.mobile + - tw.campaign.money + - tw.money + + - tw.movie + + - movies subdomains: + + - ^ + - au + - br (→ br.cinema) + - ca + - es (→ es.cine) + - espanol (→ es-us.cine) + - fr (→ fr.cinema) + - it (→ it.cinema) + - nz + - au.rss + - nz.rss + - tw + - uk + + - *.msg: + + - dps (→ ycpi-mail-dps) + - prod2.rest-core + - prod1.rest-notify + - ycpi-mail-dps + - ycpi-mail-preferences + - ycpi-mail-rest-core + - ycpi-mail-rest-core2 + + - \w\w.mujer: + + - ar + - co + - mx + + - es-us.mujer + + - music subdomains: + + - ^ + - ca + - hk (→ hk.celebrity) + - tw (→ tw.music.yahoo.net) + + - [\w-]+\.musica: + + - es-us + - mx + + - my + - us.my + - de.nachrichten + - ucs.netsvs + + - news subdomains: + + - ^ + - ar (→ ar.noticias) + - au + - br (→ br.noticias) + - au + - ca + - cl (→ cl.noticias) + - co (→ co.noticias) + - dk (→ www) + - es (→ es.noticias) + - fr + - gr + - hk + - ie (→ uk.news) + - in + - mx (→ mx.noticias) + - my + - nz + - pe (→ pe.noticias) + - ph + - nz.rss + - sg + - tw + - uk + - ve (→ ve.noticias) + - vn + + - cookiex.ngd + + - \w\w.noticias + + - ar + - br + - cl + - co + - es + - mx + - pe + - ve + + - es-us.noticias + - omg + + - \w\w.omg: + + - ar + - br + - co + - es + - it + - mx + - ph + - tw + + - es-us.omg + - on (→ pilotx1) + - au.oztips + - rtb.pclick + - pilotx1 + - pipes + - play + - playerio + - privacy + - profile + - tw.promo + + - au.promotions + - hk.promotions + - nz.promotions + + - publishing + + - query subdomains: + + - analytics + - mailapps + - media + - ucs + - us-locdrop + - video + + - tw.rd + - us.rd + + - safely + + - \w\w.safely: + + - ar (→ ar.seguridad) + - au + - ca + - cl (→ cl.seguridad) + - co + - de + - fr + - hk + - id + - in + - it + - mx (→ mx.seguridad) + - my + - nz + - pe (→ pe.seguridad) + - ph + - sg + - tw + - uk + - ve (→ ve.seguridad) + + - es-us.safely (→ es.us.seguridad) + - fr-ca.safely + - malaysia.safely (→ my.safely) + + - screen + + - \w\w.screen: + + - ar + - br + - ca + - co + - de + - es + - fr + - hk + - in + - it + - mx + - tw + - uk + + - es-us.screen + - scribe + + - search subdomains: + + - ^ + + - \w\w: + + - ar + - au + - be + - br + - ca + - cl + - cn (→ sg) + - co + - de + - dk + - es + - fi + - fr + - gr + - hk + - id + - ie + - in + - it + - kr + - mx + - my (→ malaysia) + - nl + - no + - nz + - pe + - ph + - ru + - se + - sg + - tw + - uk + - ve + - vn + + - \w\w.blog: + + - tw + + - \w\w.dictionary: + + - tw + + - finance + + - \w\w.finance: + + - au + - nz + + - images + + - \w\w.images: + + - ar + - au + - br + - ca + - cn (→ sg.images.search) + - de + - dk + - es + - fi + - fr + - hk + - id + - in + - it + - kr (→ kr.search) + - nl + - mx + - my (→ malaysia.images.search) + - no + - nz + - pe + - ph + - qc + - ru + - se + - sg + - tw + - uk + - ve + - vn + + - malaysia.images + + - \w\w.knowledge: + + - tw + + - \w\w.lifestyle: + + - au + - nz + + - \w\w.local: + + - tw + + - malaysia + + - nz.maps (→ nz.search) + + - \w\w.news: + + - ar + - au + - ca + - de + - fr + - sg + - tw + - uk + + - malaysia.news + + - movies + + - \w\w.movies: + + - au + - ca + - es + - fr + - it + - nz + - sg + - uk + + - news + + - \w\w.news: + + - ar + - au + - br + - es + - fr + - it + - nz + - pe + - sg + - uk + + - r + - recipes + + - \w\w.recipes: + + - ar + - au + - br + - es + - fr + - it + - mx + - nz + - tw + - uk + + - shine + - shopping + + - \w\w.shopping: + + - tw + + - sports + + - \w\w.sports: + + - au + - nz + + - profiles.sports + - tools + - au.tv + - video + + - \w\w.video: + + - ar + - au + - br + - ca + - de + - es + - fr + - hk + - id + - in + - it + - mx + - my (→ malaysia.video) + - nz + - ph + - qc + - sg + - tw + - uk + - vn + + - malaysia.video + + - kr.searchad (→ tools.search) + - rtb.pclick.secure + - security + - tw.security + + - \w\w.seguranca: + + - br + + - \w\w.seguridad: + + - ar + - cl + - co + - mx + - pe + - ve + + - es-us.seguridad + + - \w\w.seguro: + + - seguro + + - tw.serviceplus + - settings + - shine + - ca.shine + - shopping + - ca.shopping + + - \w+.sitios: + + - co + - mx + + - dashboard.slingstone + + - smallbusiness + - au.smallbusiness + - order.smallbusiness + + - smarttv + + - de.solutions (→ de.adspecs) + - es.solutions (→ es.adspecs) + - fr.solutions (→ fr.adspecs) + - it.solutions (→ it.adspecs) + - nz.solutions (→ nz.advertising) + - uk.solutions (→ uk.adspecs) + + - rd.software + - de.spiele + + - sport (→ sports) + + - sports subdomains: + + - ^ + - au + - ca + - de (→ de.eurosport) + - es (→ es.eurosport) + - fr + - hk + - nz + - ph + - au.rss + - nz.rss + - tw + - uk (→ uk.eurosport) + + - tw.stock + - au.thehype + + - \w\w.tiempo: + + - ar + - es + + - au.todaytonight (→ au.news) + - es.todo + - toolbar + + - \w\w.toolbar: + + - ar + - au + - br + - ca + - cl + - cn + - co + - de + - es + - fr + - hk + - id + - in + - it + - mx + - my + - nz + - pe + - ph + - sg + - tw + - uk + - ve + - vn + + - data.toolbar + - malaysia.toolbar + - au.totaltravel + - nz.totaltravel + - transparency + - travel + - au.travel (→ au.totaltravel) + - ca.travel (→ travel) + - my.travel (→ my.news) + - nz.travel (→ nz.totaltravel) + - ph.travel (→ ph.news) + - tw.travel + - uk.travel (→ uk.lifestyle) + + - tv subdomains: + + - ^ + - ar + - au + - br + - ca (→ tv) + - de + - es + - es-us + - fr + - hk (→ hk.celebrity) + - it + - mx + - nz + - pe (→ es-us.tv) + - au.rss + - uk + + - tw.uwant + + - video subdomains: + + - ^ (→ screen) + - ar (→ ar.screen) + - au (→ au.tv) + - br (→ br.screen) + - ca (→ ca.screen) + - co (→ co.screen) + - de (→ de.screen) + - es (→ es.screen) + - es-us (→ es-us.screen) + - fr (→ fr.screen) + - hk (→ help) + - in (→ in.screen) + - it (→ it.screen) + - mh + - mx (→ mx.screen) + - nz + - pe (→ es-us.screen) + - qos + - uk (→ uk.screen) + - ve (→ es-us.screen) + - yep + + - weather subdomains: + + - ^ + - ar (→ ar.tiempo) + - au + - ca + - cl (→ cl.clima) + - co (→ co.clima) + - es (→ es.tiempo) + - espanol (→ es-us.clima) + - fr (→ fr.meteo) + - hk + - in + - it (→ it.meteo) + - mx (→ mx.clima) + - nz + - pe (→ pe.clima) + - ph + - sg + - tw (→ tw.news) + - uk + - us + - ve (→ ve.clima) + + - de.wetter + - widgets (→ www) + - au.yel + - video.media.yql + - dmros.ysm + + + These altnames don't exist: + + - manhattan.yahoo.com + - tw.moderation.money.yahoo.com + + + Observed cookie domains: + + - . ¹ + - .answers ² + - .auctions ¹ + - .bid ¹ + - .buy ⁴ + - commercecentral + - developers.commercecentral ² + - .contributor ⁵ + - tw.ysm.emarketing ³ + - games ³ + - homes ³ + - au.local ³ + - .maps ³ + - .playerio ³ + - profile ³ + - .search ⁴ + - .\w\w.tv ³ + - tw.uwant ³ + - .voices ⁵ + - .www ³ + + ¹ Partially secured by us <= accounting for possible use on unsecurable domains + ² Secured by server + ⁵ Some secured by server, rest by us + ³ Secured by us <= not secured by server + ⁴ Not secured by us <= accounting for possible use on unsecurable domains + ⁵ Not secured by us <= no tls support + + + Mixed content: + + - css, on: + + - au.gwn7, tw.money, au.rss.news, and au.prime7 from l[13]?.yimg.com ¹ + + - Ads/web bugs, on: + + - au.games from secure-us.imrworldwide.com ¹ + - \w\w.celebrity, m, \w\w.m, and ar.mujer from csc.beap.bc.yahoo.com ¹ + - au.news from au.adserver.yahoo.com ¹ + - shine from www.facebook.com ¹ + + - Images, on: + + - au.local from dacsisb9yvy2v.cloudfront.net ¹ + - au.advertising, nz.advertising, au.answers, nz.answers, ph.answers, sg.answers, au, biz, \w\w.celebrity, cricket, nz.entertainment, eurosport, \w\w.eurosport, everything, au.fango, games, ichart, au.launch, nz.lifestyle, au.local, sg.messenger, tw.money, au.movies, nz.movies, au.news, nz.news, au.oztips, au.promotions, \w\w.safely, fr-ca.safely, search, \w\w.seguridad, es-us.seguridad, es.seguro, au.smallbusiness, au.rss.sports, nz.rss.sports, au.thehype, tw.toolbar, au.totaltravel, nz.totaltravel, au.tv, nz.tv, au.rss.tv, and nz.weather from l.yimg.com ¹ + - ca.autos from yui.yahooapis.com ¹ + - tw.info from l.yimg.com ¹ + - tw.knowledge from tw.tool.ks ¹ + - tw.knowledge from l.yimg.com ¹ + - tw.money from ichart ¹ + - tw.money from tw.news2.yimg.com ² + - tw.promo from www.adobe.com ¹ + - au.totaltravel and nz.totaltravel from www.totaltravel.com ² + - \w\w.weather and de.wetter from media.zenfs.com ¹ + + - faivcon on tw from tw * + + - Ads, on: + + - fr.finance from www.borse.it ³ + - tw.promo from www.facebook.com ¹ + - de.kino from yahoo.quizaction.de ¹ + - my.news from widgets.wego.com ² + + ¹ Secured by us + ² Unsecurable + ³ Unsecurable <= redirects to http + + + Reported to fix bug + + https://trac.torproject.org/projects/tor/ticket/4441 + + + If you have a Yahoo Mail account, please test this ruleset! + +--> +<ruleset name="Yahoo! (partial)"> + + <target host="i.acdn.us" /> + <target host="rocketmail.com" /> + <target host="www.rocketmail.com" /> + <target host="totaltravel.co.uk" /> + <target host="www.totaltravel.co.uk" /> + <target host="totaltravel.com" /> + <target host="*.totaltravel.com" /> + <exclusion pattern="^http://(?:www\.)?totaltravel\.com/images/" /> + <target host="yahoo.com" /> + <target host="*.yahoo.com" /> + <!-- + Refused: + --> + <exclusion pattern="^http://(?:(?:cn|kr|tw)\.adspecs|(?:co|espanol|mx)\.astrology|kr\.mobile)\.yahoo\.com/" /> + <!-- + Redirect destination cert mismatched: + --> + <exclusion pattern="^http://ca\.local\.yahoo\.com/" /> + <!-- + Refused: + --> + <exclusion pattern="^http://cn\.overview\.mail\.yahoo\.com/" /> + <!--exclusion pattern="^http://(cn|de|dk|id|ie|it|qc)\.news\.yahoo\.com/" /--> + <!-- + Destination has mismatched cert: + --> + <exclusion pattern="^http://(?:br|es)\.safely\.yahoo\.com/" /> + <target host="*.yahoofs.com" /> + <target host="yhoo.it" /> + <target host="ymail.com" /> + <target host="www.ymail.com" /> + <target host="*.zenfs.com" /> + + + <!-- Some Yahoo cookies are cross-domain cookies. + It's a case of figuring out which ones + aren't needed on unsecurable pages. + + - .yahoo.com + - AO + - B + - Set by y3.analytics.yahoo.com/itr.pl & us.bc.yahoo.com/b + + - BA + + - t=\d{10} + + - CH + - \w{59}/ + - F + + - HP + + - 0 + + - MSC + - t=\d{10}X + - PH (set by hjsal) + - SSL + + - ucs (set by ucs.query) + + - bnas=\d + + - V + + - v=\d.\d&cc=0&m=0 + + - Y + + --> + <!-- + Secured by server: + --> + <!--securecookie host="^\.answers\.yahoo\.com$" name="^answers3$" /--> + <!--securecookie host="^(developers\.)?commercecentral\.yahoo\.com$" name="^_rockstar_session$" /--> + <!--securecookie host="^\.contributor\.yahoo\.com$" name="^c$" /--> + <!-- + Not secured by server: + --> + <!--securecookie host="^\.yahoo\.com$" name="^(AO|B|PH|au_ytv|tt_currency)$" /--> + <!--securecookie host="^\.auctions\.yahoo\.com$" name="^hkRecentHistory$" /--> + <!--securecookie host="^\.bid\.yahoo\.com$" name="^twRecentHistory$" /--> + <!--securecookie host="^commercecentral\.yahoo\.com$" name="^first_referer$" /--> + <!--securecookie host="^\.contributor\.yahoo\.com$" name="^ACSESS$" /--> + <!--securecookie host="^(\w\w\.celebridades|\w\w\.cinema|everything|\w\w\.financas|games|homes|\w\w\.news)\.yahoo\.com$" name="^AO$" /--> + <!--securecookie host="^tw\.ysm\.emarketing\.yahoo\.com$" name="^(device|is_c|tw_ysm_soeasy)$" /--> + <!--securecookie host="^(uk\.)?help\.yahoo\.com$" name="^(JSESSIONID|scav|scwysiwygparams)$" /--> + <!--securecookie host="^au\.local\.yahoo\.com$" name="^(aunz\.aulocal\.cookie|au_yloc)$" /--> + <!--securecookie host="^\.maktoob\.yahoo\.com$" name="^hpc$" /--> + <!--securecookie host="^\.maps\.yahoo\.com$" name="^MYCFL$" /--> + <!--securecookie host="^\.playerio\.yahoo\.com$" name="^playcodes-\d+$" /--> + <!--securecookie host="^profile\.yahoo\.com$" name="^YPRF$" /--> + <!--securecookie host="^\.search\.yahoo\.com$" name="^sSN$" /--> + <!--securecookie host="^\.es\.tv\.yahoo\.com$" name="^tv_listings_last_time$" /--> + <!--securecookie host="^tw\.uwant\.yahoo\.com$" name="^uwwtutorial$" /--> + <!--securecookie host="^\.www\.yahoo\.com$" name="^fpc$" /--> + + <securecookie host="^\.yahoo\.com$" name="^(?:AO|B|SSL)$" /> + <securecookie host="^(?:\.analytics|\w\w\.celebridades|\w\w\.cinema|commercecentral|\.contributor|tw\.ysm\.emarketing|everything|\w\w\.financas|games|help|\w\w\.help|homes|\w\w\.local|\.mail|\.maps|\.maktoob|movies|\.?news|\w\w.news|\.playerio|profile|(?:us-locdrop|video)\.query|images\.search|fr\.images\.search|\.toolbar|\.\w\w\.tv|\.uk|\.?us|tw\.uwant|\.www)\.yahoo\.com$" name=".+" /> + <securecookie host="^\.bid\.yahoo\.com$" name="^twRecentHistory$" /> + <securecookie host="^\.auctions\.yahoo\.com$" name="^hkRecentHistory$" /> + <securecookie host="^\.zenfs\.com$" name="^BX$" /> + + <!-- Could we secure any of these safely? + --> + <!--securecookie host="^\.yahoo\.com$" name="^(DK|PH|au_ytv|tt_currency)$" /--> + <!--securecookie host="^\.buy\.yahoo\.com$" name="^YAct$" /--> + <!--securecookie host="^\.my\.yahoo\.com$" name="^(myc|MYTMI|U_mtupes)$" /--> + <!--securecookie host="^\.search\.yahoo\.com$" name="^sSN$" /--> + + + <rule from="^http://i\.acdn\.us/" + to="https://s.yimg.com/ck/" /> + + <rule from="^http://(?:www\.)?(?:rocket|y)mail\.com/" + to="https://mail.yahoo.com/" /> + + <rule from="^http://(?:www\.)?totaltravel\.co(?:m|\.uk)/" + to="https://au.totaltravel.yahoo.com/" /> + + <rule from="^http://builder\.totaltravel\.com/" + to="https://builder.totaltravel.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://fr\.actualites\.yahoo\.com/.*" + to="https://fr.news.yahoo.com/" /> + + <rule from="^http://advertisingcentral\.yahoo\.com/+(?=$|\?)" + to="https://advertising.yahoo.com/" /> + + <!-- Redirect preserves path and args: + --> + <rule from="^http://(?:cl|co|pe|ve)\.answers\.yahoo\.com/+" + to="https://espanol.answers.yahoo.com/" /> + + <!-- Redirect drops path but not args: + --> + <rule from="^http://(au|nz)\.astrology\.yahoo\.com/[^?]*" + to="https://$1.lifestyle.yahoo.com/horoscopes/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://ca\.astrology\.yahoo\.com/.*" + to="https://ca.shine.yahoo.com/horoscope/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(ar|mx)\.autos\.yahoo\.com/+" + to="https://$1.autocosmos.yahoo.net/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(de|fr)\.autos\.yahoo\.com/+" + to="https://$1.cars.yahoo.com/" /> + + <!-- Redirect drops path but not args: + --> + <rule from="^http://(au|nz)\.biz\.yahoo\.com/[^?]*" + to="https://$1.finance.yahoo.com/news" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(ar|au|br|ca|cl|de|fr|es|hk|id|ie|in|it|jp|mx|my|no|nz|ph|sg|tw|uk|us|vn)\.careers\.yahoo\.com/+" + to="https://careers.yahoo.com/$1/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://malaysia\.careers\.yahoo\.com/+" + to="https://careers.yahoo.com/my/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://qc\.careers\.yahoo\.com/+" + to="https://careers.yahoo.com/ca/" /> + + <!-- Redirect preserves forward slash, path, and args: + --> + <rule from="^http://cars\.yahoo\.com/" + to="https://autos.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://(?:tw\.help\.cc|help\.cc\.tw)\.yahoo\.com/.*" + to="https://help.yahoo.com/kb/index?page=home&amp;locale=zh_TW" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://cn\.yahoo\.com/+" + to="https://sg.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(?:cine|espanol\.movies)\.yahoo\.com/+" + to="https://es-us.cine.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(?:cl|co|pe|ve)\.deportes\.yahoo\.com/+" + to="https://es-us.deportes.yahoo.com/" /> + <!-- Redirect keeps path and args: + --> + <rule from="^http://es\.deportes\.yahoo\.com/+" + to="https://es.eurosport.yahoo.com/" /> + + <!-- Redirect keeps path but not args: + --> + <rule from="^http://au\.dir\.yahoo\.com/+([^?]*).*" + to="https://au.search.yahoo.com/web?fr=" /> + + <rule from="^http://(?:dk|no|ru)\.yahoo\.com/+" + to="https://www.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://e1\.yahoo\.com/+" + to="https://espanol.yahoo.com/" /> + + <rule from="^http://hk\.ent\.yahoo\.com/+" + to="https://hk.celebrity.yahoo.com/" /> + + <rule from="^http://java\.europe\.yahoo\.com/" + to="https://adgallery.zenfs.com/" /> + + <rule from="^http://fr\.eurosport\.yahoo\.com/" + to="https://fr.sports.yahoo.com/" /> + + <!-- Server drops path and args: + --> + <rule from="^http://es\.everything\.yahoo\.com/.*" + to="https://es.todo.yahoo.com/" /> + + <rule from="^http://fantasysports\.yahoo\.com/(?=$|\?)" + to="https://sports.yahoo.com/fantasy" /> + + <!-- Server drops path but not args: + --> + <rule from="^http://es\.laliga\.fantasysports\.yahoo\.com/+" + to="https://es.eurosport.yahoo.com/fantasy/la-liga/" /> + + <rule from="^http://feedback\.yahoo\.com/" + to="https://yahoo.uservoice.com/" /> + + <rule from="^http://(i)?chart\.finance\.yahoo\.com/" + to="https://$1chart.yahoo.com/" /> + + <!-- Redirect drops path buy not args: + --> + <rule from="^http://connectedtv\.yahoo\.com/[^?]*" + to="https://smarttv.yahoo.com/" /> + + <!-- Server keeps path and args: + --> + <rule from="^http://kr\.finance\.yahoo\.com/" + to="https://tools.search.yahoo.com/kr-eol.html" /> + + <rule from="^http://(au|nz)\.food\.yahoo\.com/" + to="https://$1.lifestyle.yahoo.com/food/" /> + + <!-- Server keeps path and args: + --> + <rule from="^http://de\.games\.yahoo\.com/+" + to="https://de.spiele.yahoo.com/" /> + + <!-- Server keeps path and args: + --> + <rule from="^http://(?:id|malaysia|nz|ph)\.games\.yahoo\.com/+" + to="https://games.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://ie\.(finance|groups|lifestyle)\.yahoo\.com/.*" + to="https://uk.$1.yahoo.com/" /> + + <!-- Redirect drops path but not args: + --> + <rule from="^http://au\.(?:answer|forum)s\.yahoo\.com/[^?]*" + to="https://au.answers.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://kr\.(?:gugi|maps|searchad)\.yahoo\.com/.*" + to="https://tools.search.yahoo.com/kr-eol.html" /> + + <rule from="^http://fr\.help\.yahoo\.com/+" + to="https://help.yahoo.com/l/fr/yahoo/helpcentral/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://help\.cc\.hk\.yahoo\.com/.*" + to="https://help.yahoo.com/kb/index?page=home&amp;locale=zh_HK" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(ar|es-us|mx)\.lifestyle\.yahoo\.com/+" + to="https://$1.mujer.yahoo.com/" /> + + <rule from="^http://ca\.(?:lifestyle|shine)\.yahoo\.com/" + to="https://ca.shine.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://fr\.local\.yahoo\.com/.*" + to="https://fr.yahoo.com/" /> + + + <!-- Redirect drops path and args: + --> + <rule from="^http://es\.maps\.yahoo\.com/.*" + to="https://es.search.yahoo.com/search/es?p=callejero+itinerarios&amp;y=y" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://in\.maps\.yahoo\.com/.*" + to="https://maps.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://mx\.maps\.yahoo\.com/+" + to="https://espanol.maps.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://nz\.maps\.yahoo\.com/+" + to="https://nz.search.yahoo.com/search/maps/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://ie\.messenger\.yahoo\.com/.*" + to="https://uk.messenger.yahoo.com/" /> + + <!-- Redirect drops path but not args: + --> + <rule from="^http://nz\.messenger\.yahoo\.com/[^?].*" + to="https://messenger.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://ie\.mobile\.yahoo\.com/.*" + to="https://uk.mobile.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://tw\.music\.yahoo\.com/+" + to="https://tw.music.yahoo.net/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://(?:axis|(?:dk|no)\.mobile|dk\.news)\.yahoo\.com/.*" + to="https://www.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://es\.movies\.yahoo\.com/+" + to="https://es.cine.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(br|fr|it)\.movies\.yahoo\.com/+" + to="https://$1.cinema.yahoo.com/" /> + + <!-- This rule must be above the main one: + --> + <rule from="^http://dps\.msg\.yahoo\.com/" + to="https://ycpi-mail-dps.msg.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://hk\.(?:music|tv)\.yahoo\.com/.*" + to="https://hk.celebrity.yahoo.com/music/" /> + + <rule from="^http://(ar|br|co|es|mx|pe)\.news\.yahoo\.com/+" + to="https://$1.noticias.yahoo.com/" /> + + <!-- Redirect drops paths and args: + --> + <rule from="^http://ie\.news\.yahoo\.com/.*" + to="https://uk.news.yahoo.com/n/news_ireland.html" /> + + <rule from="^http://on\.yahoo\.com/+" + to="https://pilotx1.yahoo.com/" /> + + <!-- Cert only matches us.rd, + all appear equivalent. + --> + <rule from="^http://rds?\.yahoo\.com/" + to="https://us.rd.yahoo.com/" /> + + <rule from="^http://(ar|cl|co|es-us|mx|pe|ve)\.safely\.yahoo\.com/+" + to="https://$1.seguridad.yahoo.com/" /> + + <rule from="^http://malaysia\.safely\.yahoo\.com/+" + to="https://my.safely.yahoo.com/" /> + + <!-- Redirect drops paths and args: + --> + <rule from="^http://cn\.search\.yahoo\.com/.*" + to="https://sg.search.yahoo.com/" /> + + <!-- Redirect drops paths and args: + --> + <rule from="^http://kr\.(?:images\.)?search\.yahoo\.com/.*" + to="https://kr.search.yahoo.com/" /> + + <rule from="^http://my\.images\.search\.yahoo\.com/" + to="https://malaysia.images.search.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://nz\.maps\.search\.yahoo\.com/+" + to="https://nz.search.yahoo.com/" /> + + <rule from="^http://my\.search\.yahoo\.com/+" + to="https://malaysia.search.yahoo.com/" /> + + <!-- Redirect drops path but not args: + --> + <rule from="^http://(de|es|fr|it|uk)\.solutions\.yahoo\.com/[^?]*" + to="https://$1.adspecs.yahoo.com/" /> + + <rule from="^http://sport\.yahoo\.com/+" + to="https://sports.yahoo.com/" /> + + <rule from="^http://(de|es|uk)\.sports\.yahoo\.com/+" + to="https://$1.eurosport.yahoo.com/" /> + + <rule from="^http://in\.sports\.yahoo\.com/+$" + to="https://cricket.yahoo.com/" /> + + <!-- Server drops paths but not args: + --> + <rule from="^http://au\.todaytonight\.yahoo\.com/+\??$" + to="https://au.news.yahoo.com/today-tonight/" /> + + <rule from="^http://au\.todaytonight\.yahoo\.com/[^?]*" + to="https://au.news.yahoo.com/today-tonight/" /> + + <!-- Redirect drops path but not args: + --> + <rule from="^http://(au|nz)\.travel\.yahoo\.com/[^?]*" + to="https://$1.totaltravel.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://ca\.travel\.yahoo\.com/+" + to="https://travel.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://(my|ph)\.travel\.yahoo\.com/.*" + to="https://$1.news.yahoo.com/travel/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://uk\.travel\.yahoo\.com/.*" + to="https://uk.lifestyle.yahoo.com/travel/" /> + + <rule from="^http://ca\.tv\.yahoo\.com/+" + to="https://tv.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://pe\.tv\.yahoo\.com/+" + to="https://es-us.tv.yahoo.com/" /> + + <rule from="^http://((?:br|ca|de|es|es-us|fr|it|mx|uk)\.)?video\.yahoo\.com/+" + to="https://$1screen.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://(ar|co|in)\.video\.yahoo\.com/.*" + to="https://$1.screen.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://au\.video\.yahoo\.com/.*" + to="https://au.tv.yahoo.com/plus7/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://[pv]e\.video\.yahoo\.com/+" + to="https://es-us.screen.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://hk\.video\.yahoo\.com/.*" + to="https://help.yahoo.com/kb/index?page=home&amp;locale=zh_HK" /> + + <!-- Server doesn't redirect: + --> + <rule from="^http://my\.video\.yahoo\.com/" + to="https://malaysia.video.yahoo.com/" /> + + <rule from="^http://nz\.video\.yahoo\.com/+(?:\?.*)?$" + to="https://nz.news.yahoo.com/video/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(ar|es)\.weather\.yahoo\.com/+" + to="https://$1.tiempo.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(cl|co|mx|pe|ve)\.weather\.yahoo\.com/+" + to="https://$1.clima.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://espanol\.weather\.yahoo\.com/+" + to="https://es-us.clima.yahoo.com/" /> + + <!-- Redirect keeps path and args: + --> + <rule from="^http://(fr|it)\.weather\.yahoo\.com/+" + to="https://$1.meteo.yahoo.com/" /> + + <!-- Redirect drops path and args: + --> + <rule from="^http://tw\.weather\.yahoo\.com/.*" + to="https://tw.news.yahoo.com/weather-forecast/" /> + + <!-- Redirect drops path but not args: + --> + <rule from="^http://widgets\.yahoo\.com/[^?]*" + to="https://www.yahoo.com/" /> + + <rule from="^http://((?:\w\w|fr-ca\.actualites|address|\w\w\.address|admanager|(?:\w\w|global)\.adserver|adspecs|\w+\.adspecs|\w+\.adspecs-new|advertising|\w\w\.advertising|beap\.adx|c5a?\.ah|(?:s-)?cookex\.amp|(?:[aosz]|apac|y3?)\.analytics|anc|answers|(?:\w\w|espanol|malaysia)\.answers|antispam|\w\w\.antispam|vn\.antoan|au\.apps|global\.ard|astrology|\w\w\.astrology|hk\.(?:(?:info|f1\.master|f1\.page|search|store|edit\.store|user)\.)?auctions|autos|\w\w\.autos|ar\.ayuda|(?:clicks\.beap|csc\.beap|pn1|row|us)\.bc|tw\.bid|tw\.(?:campaign|master|mb|page|search|store|user)\.bid|(?:m\.)?tw\.bigdeals|tw\.billing|biz|boss|(?:tw\.partner|tw)\.buy|(?:\w\w\.)?calendar|careers|\w\w\.cars|(?:\w\w|es-us)\.celebridades|(?:\w\w\.)?celebrity|tw\.charity|i?chart|(?:\w\w|es-us)\.cine|\w\w\.cinema|(?:\w\w|es-us)\.clima|migration\.cn|(?:deveopers\.)?commercecentral|br\.contribuidores|(?:uk\.)?contributor|au\.dating|(?:\w\w|es-us)\.deportes|developer|tw\.dictionary|dir|downloads|s-b\.dp|(?:eu\.|na\.|sa\.|tw\.)?edit|tw\.(?:ysm\.)?emarketing|en-maktoob|\w\w\.entertainment|espanol|edit\.europe|eurosport|(?:de|es|it|uk)\.eurosport|everything|\w\w\.everything|\w+\.fantasysports|au\.fango|tw\.fashion|br\.financas|finance|(?:\w\w|tw\.chart|espanol|tw\.futures|streamerapi)\.finance|(?:\w\w|es-us)\.finanzas|nz\.rss\.food|nz\.forums|games|(?:au|ca|uk)\.games|geo|gma|groups|(?:\w\w|asia|espanol|es-us|fr-ca|moderators)\.groups|health|help|(?:\w\w|secure)\.help|homes|(?:tw|tw\.v2)\.house|info|\w\w\.info|tw\.tool\.ks|au\.launch|legalredirect|(?:\w\w)\.lifestyle|(?:gh\.bouncer\.)?login|us\.l?rd|local|\w\w\.local|m|r\.m|\w\w\.m|mail|(?:\w\w\.overview|[\w-]+(?:\.c\.yom)?)\.mail|maktoob|malaysia|tw\.(?:user\.)?mall|maps|(?:\w\w|espanol|sgws2)\.maps|messenger|(?:\w\w|malaysia)\.messenger|\w\w\.meteo|mlogin|mobile|(?:\w\w|espanol|malaysia)\.mobile|tw\.(?:campaign\.)?money|tw\.movie|movies|(?:au|ca|nz|au\.rss|nz\.rss|tw|uk)\.movies|[\w.-]+\.msg|(?:\w\w|es-us)\.mujer|music|ca\.music|[\w-]+\.musica|my|us\.my|de\.nachrichten|ucs\.netsvs|news|(?:au|ca|fr|gr|hk|in|nz|ph|nz\.rss|sg|tw|uk)\.news|cookiex\.ngd|(?:\w\w|es-us)\.noticias|omg|(?:\w\w|es-us)\.omg|au\.oztips|rtb\.pclick|pilotx1|pipes|play|playerio|privacy|profile|tw\.promo|(?:au|hk|nz)\.promotions|publishing|(?:analytics|mailapps|media|ucs|us-locdrop|video)\.query|hk\.rd|(?:\w\w\.|fr-ca\.)?safely|screen|(?:\w\w|es-us)\.screen|scribe|search|(?:\w\w|w\w\.blog|\w\w\.dictionary|finance|\w\w\.finance|images|\w\w\.images|\w\w\.knowledge|\w\w\.lifestyle|\w\w\.local|malaysia|movies|\w\w\.movies|news|\w\w\.news|malaysia\.news|r|recipes|\w\w\.recipes|shine|shopping|\w\w\.shopping|sports|\w\w\.sports|tools|au\.tv|video|\w\w\.video|malaysia\.video)\.search|sec|rtb\.pclick\.secure|security|tw\.security|\w\w\.seguranca|\w\w\.seguridad|es-us\.seguridad|\w\w\.seguro|tw\.serviceplus|settings|shine|ca\.shine|shopping|ca\.shopping|\w+\.sitios|dashboard\.slingstone|(?:au\.|order\.)?smallbusiness|smarttv|rd\.software|de\.spiele|sports|(?:au|ca|fr|hk|nz|ph|profiles|au\.rss|nz\.rss|tw)\.sports|tw\.stock|au\.thehype|\w\w\.tiempo|es\.todo|toolbar|(?:\w\w|data|malaysia)\.toolbar|(?:au|nz)\.totaltravel|transparency|travel|tw\.travel||tv|(?:ar|au|de|fr|es|es-us|it|mx|nz|au\.rss|uk)\.tv|tw\.uwant|(?:mh|nz|qos|yep)\.video|weather|(?:au|ca|hk|in|nz|sg|ph|uk|us)\.weather|de\.wetter|www|au\.yel|video\.media\.yql|dmros\.ysm)\.)?yahoo\.com/" + to="https://$1yahoo.com/" /> + + <rule from="^http://([\w-]+)\.yahoofs\.com/" + to="https://$1.yahoofs.com/" /> + + <rule from="^http://yhoo\.it/" + to="https://bit.ly/" /> + + <rule from="^http://(\w+)\.zenfs\.com/" + to="https://$1.zenfs.com/" /> + +</ruleset> diff --git a/searx/https_rules/YouTube.xml b/searx/https_rules/YouTube.xml @@ -0,0 +1,46 @@ +<ruleset name="YouTube (partial)"> + + <target host="youtube.com" /> + <target host="*.youtube.com" /> + <exclusion pattern="^http://(?:www\.)?youtube\.com/crossdomain\.xml"/> + <exclusion pattern="^http://(?:www\.)?youtube\.com/(?:apiplayer|api_video_info)"/> + <exclusion pattern="^http://(?:[^/@:\.]+\.)?ytimg\.com/.*apiplayer[0-9]*\.swf"/> + <target host="*.ytimg.com" /> + <target host="youtu.be" /> + <target host="youtube-nocookie.com"/> + <target host="www.youtube-nocookie.com"/> + <target host="*.googlevideo.com"/> + <exclusion pattern="^http://([^/@:\.]+)\.googlevideo\.com/crossdomain\.xml"/> + + + <!-- Not secured by server: + --> + <!--securecookie host="^\.youtube\.com$" name="^(GEUP|PREF|VISITOR_INFO1_LIVE|YSC)$" /--> + + <!-- observed ^. cookies: + - use_hitbox + - VISITOR_INFO1_LIVE + - recently_watched_video_id_list + - .youtube.com --> + <securecookie host="^\.youtube\.com" name=".*"/> + + + <rule from="^http://(www\.)?youtube\.com/" + to="https://$1youtube.com/"/> + + <rule from="^http://(br|de|es|fr|il|img|insight|jp|m|nl|uk)\.youtube\.com/" + to="https://$1.youtube.com/"/> + + <rule from="^http://([^/@:\.]+)\.ytimg\.com/" + to="https://$1.ytimg.com/"/> + + <rule from="^http://youtu\.be/" + to="https://youtu.be/"/> + + <rule from="^http://(?:www\.)?youtube-nocookie\.com/" + to="https://www.youtube-nocookie.com/"/> + + <rule from="^http://([^/@:\.]+)\.googlevideo\.com/" + to="https://$1.googlevideo.com/"/> + +</ruleset> diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml @@ -4,6 +4,9 @@ server: debug : False request_timeout : 3.0 # seconds base_url: False + themes_path : "" + default_theme : default + https_rewrite : True engines: - name : general_dummy diff --git a/searx/webapp.py b/searx/webapp.py @@ -50,6 +50,9 @@ from searx.search import Search from searx.query import Query from searx.autocomplete import backends as autocomplete_backends +from urlparse import urlparse +import re + static_path, templates_path, themes =\ get_themes(settings['themes_path'] @@ -206,16 +209,60 @@ def index(): if not search.paging and engines[result['engine']].paging: search.paging = True + # check if HTTPS rewrite is required if settings['server']['https_rewrite']\ and result['parsed_url'].scheme == 'http': - for http_regex, https_url in https_rules: - if http_regex.match(result['url']): - result['url'] = http_regex.sub(https_url, result['url']) - # TODO result['parsed_url'].scheme + skip_https_rewrite = False + + # check if HTTPS rewrite is possible + for target, rules, exclusions in https_rules: + + # check if target regex match with url + if target.match(result['url']): + # process exclusions + for exclusion in exclusions: + # check if exclusion match with url + if exclusion.match(result['url']): + skip_https_rewrite = True + break + + # skip https rewrite if required + if skip_https_rewrite: + break + + # process rules + for rule in rules: + try: + # TODO, precompile rule + p = re.compile(rule[0]) + + # rewrite url if possible + new_result_url = p.sub(rule[1], result['url']) + except: + break + + # parse new url + new_parsed_url = urlparse(new_result_url) + + # continiue if nothing was rewritten + if result['url'] == new_result_url: + continue + + # get domainname from result + # TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de + # TODO, using publicsuffix instead of this rewrite rule + old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:]) + new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:]) + + # check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules + if old_result_domainname == new_result_domainname: + # set new url + result['url'] = new_result_url + + # target has matched, do not search over the other rules break - # HTTPS rewrite if search.request_data.get('format', 'html') == 'html': if 'content' in result: result['content'] = highlight_content(result['content'],