[fix] www. domain duplications - searx - My custom branche(s) on searx, a meta-search engine

commit: b226e6462b1a8fa18ee670f3f2738145426b6f41
parent: 78d42f094cd17c4152518a592cbc28c432a1ef22
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Tue, 20 May 2014 01:16:49 +0200

[fix] www. domain duplications

Diffstat:
M searx/engines/__init__.py 10 +++++++++-

1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
@@ -154,16 +154,24 @@ def score_results(results):
     # deduplication + scoring
     for i, res in enumerate(flat_res):
         res['parsed_url'] = urlparse(res['url'])
+        res['host'] = res['parsed_url'].netloc
+
+        if res['host'].startswith('www.'):
+            res['host'] = res['host'].replace('www.', '', 1)
+
         res['engines'] = [res['engine']]
         weight = 1.0
+
         if hasattr(engines[res['engine']], 'weight'):
             weight = float(engines[res['engine']].weight)
+
         score = int((flat_len - i) / engines_len) * weight + 1
         duplicated = False
+
         for new_res in results:
             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
             p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
-            if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
+            if res['host'] == new_res['host'] and\
                p1 == p2 and\
                res['parsed_url'].query == new_res['parsed_url'].query and\
                res.get('template') == new_res.get('template'):