Merge branch 'master' of https://github.com/asciimoo/searx - searx - My custom branche(s) on searx, a meta-search engine

commit: 51278ee0be0af0d037d467dc0e22cb844a79e5f8
parent c2e034f52a31d4eb84b01cafb3af70ed55dad792
Author: a01200356 <a01200356@itesm.mx>
Date:   Mon, 18 Jan 2016 11:29:45 -0600

Merge branch 'master' of https://github.com/asciimoo/searx

Diffstat:
M manage.sh 5 ++++-
M requirements-dev.txt 2 +-
M searx/autocomplete.py 6 ++----
M searx/engines/blekko_images.py 2 +-
M searx/engines/btdigg.py 2 +-
M searx/engines/deviantart.py 2 +-
M searx/engines/digg.py 2 +-
M searx/engines/faroo.py 2 +-
M searx/engines/google.py 46 +++++++++++++++++++++++-----------------------
M searx/engines/searchcode_code.py 4 ++--
M searx/engines/searchcode_doc.py 4 ++--
M searx/engines/stackoverflow.py 2 +-
M searx/engines/startpage.py 8 ++++----
M searx/engines/wikidata.py 4 ++--
M searx/engines/www1x.py 2 +-
M searx/engines/xpath.py 4 ++--
M searx/engines/yandex.py 2 +-
M searx/plugins/https_rewrite.py 4 ++--
M searx/poolrequests.py 2 +-

19 files changed, 53 insertions(+), 52 deletions(-)
diff --git a/manage.sh b/manage.sh
@@ -16,7 +16,10 @@ update_dev_packages() {
 
 pep8_check() {
     echo '[!] Running pep8 check'
-    pep8 --max-line-length=120 "$SEARX_DIR" "$BASE_DIR/tests"
+    # ignored rules:
+    #  E402 module level import not at top of file
+    #  W503 line break before binary operator
+    pep8 --max-line-length=120 --ignore "E402,W503" "$SEARX_DIR" "$BASE_DIR/tests"
 }
 
 unit_tests() {
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,7 +1,7 @@
 babel==2.2.0
-flake8==2.5.1
 mock==1.0.1
 nose2[coverage-plugin]
+pep8==1.7.0
 plone.testing==4.0.15
 robotframework-selenium2library==1.7.4
 robotsuite==1.7.0
diff --git a/searx/autocomplete.py b/searx/autocomplete.py
@@ -114,8 +114,7 @@ def dbpedia(query):
     # dbpedia autocompleter, no HTTPS
     autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
 
-    response = get(autocomplete_url
-                   + urlencode(dict(QueryString=query)))
+    response = get(autocomplete_url + urlencode(dict(QueryString=query)))
 
     results = []
 
@@ -141,8 +140,7 @@ def google(query):
     # google autocompleter
     autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
 
-    response = get(autocomplete_url
-                   + urlencode(dict(q=query)))
+    response = get(autocomplete_url + urlencode(dict(q=query)))
 
     results = []
 
diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py
@@ -37,7 +37,7 @@ def request(query, params):
                           c=c)
 
     if params['pageno'] != 1:
-        params['url'] += '&page={pageno}'.format(pageno=(params['pageno']-1))
+        params['url'] += '&page={pageno}'.format(pageno=(params['pageno'] - 1))
 
     # let Blekko know we wan't have profiling
     params['cookies']['tag_lesslogging'] = '1'
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py
@@ -29,7 +29,7 @@ search_url = url + '/search?q={search_term}&p={pageno}'
 # do search-request
 def request(query, params):
     params['url'] = search_url.format(search_term=quote(query),
-                                      pageno=params['pageno']-1)
+                                      pageno=params['pageno'] - 1)
 
     return params
 
diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
@@ -24,7 +24,7 @@ paging = True
 
 # search-url
 base_url = 'https://www.deviantart.com/'
-search_url = base_url+'browse/all/?offset={offset}&{query}'
+search_url = base_url + 'browse/all/?offset={offset}&{query}'
 
 
 # do search-request
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
@@ -22,7 +22,7 @@ paging = True
 
 # search-url
 base_url = 'https://digg.com/'
-search_url = base_url+'api/search/{query}.json?position={position}&format=html'
+search_url = base_url + 'api/search/{query}.json?position={position}&format=html'
 
 # specific xpath variables
 results_xpath = '//article'
diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py
@@ -88,7 +88,7 @@ def response(resp):
     for result in search_res['results']:
         if result['news']:
             # timestamp (milliseconds since 1970)
-            publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0)  # noqa
+            publishedDate = datetime.datetime.fromtimestamp(result['date'] / 1000.0)  # noqa
 
             # append news result
             results.append({'url': result['url'],
diff --git a/searx/engines/google.py b/searx/engines/google.py
@@ -209,29 +209,29 @@ def response(resp):
             parsed_url = urlparse(url, google_hostname)
 
             # map result
-            if ((parsed_url.netloc == google_hostname and parsed_url.path.startswith(maps_path))
-               or (parsed_url.netloc.startswith(map_hostname_start))):
-                x = result.xpath(map_near)
-                if len(x) > 0:
-                    # map : near the location
-                    results = results + parse_map_near(parsed_url, x, google_hostname)
-                else:
-                    # map : detail about a location
-                    results = results + parse_map_detail(parsed_url, result, google_hostname)
-
-            # google news
-            elif (parsed_url.netloc == google_hostname
-                  and parsed_url.path == search_path):
-                # skipping news results
-                pass
-
-            # images result
-            elif (parsed_url.netloc == google_hostname
-                  and parsed_url.path == images_path):
-                # only thumbnail image provided,
-                # so skipping image results
-                # results = results + parse_images(result, google_hostname)
-                pass
+            if parsed_url.netloc == google_hostname:
+                # TODO fix inside links
+                continue
+                # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start):
+                #     print "yooooo"*30
+                #     x = result.xpath(map_near)
+                #     if len(x) > 0:
+                #         # map : near the location
+                #         results = results + parse_map_near(parsed_url, x, google_hostname)
+                #     else:
+                #         # map : detail about a location
+                #         results = results + parse_map_detail(parsed_url, result, google_hostname)
+                # # google news
+                # elif parsed_url.path == search_path:
+                #     # skipping news results
+                #     pass
+
+                # # images result
+                # elif parsed_url.path == images_path:
+                #     # only thumbnail image provided,
+                #     # so skipping image results
+                #     # results = results + parse_images(result, google_hostname)
+                #     pass
 
             else:
                 # normal result
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
@@ -20,7 +20,7 @@ paging = True
 
 # search-url
 url = 'https://searchcode.com/'
-search_url = url+'api/codesearch_I/?{query}&p={pageno}'
+search_url = url + 'api/codesearch_I/?{query}&p={pageno}'
 
 # special code-endings which are not recognised by the file ending
 code_endings = {'cs': 'c#',
@@ -32,7 +32,7 @@ code_endings = {'cs': 'c#',
 # do search-request
 def request(query, params):
     params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      pageno=params['pageno']-1)
+                                      pageno=params['pageno'] - 1)
 
     # Disable SSL verification
     # error: (60) SSL certificate problem: unable to get local issuer
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py
@@ -19,13 +19,13 @@ paging = True
 
 # search-url
 url = 'https://searchcode.com/'
-search_url = url+'api/search_IV/?{query}&p={pageno}'
+search_url = url + 'api/search_IV/?{query}&p={pageno}'
 
 
 # do search-request
 def request(query, params):
     params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      pageno=params['pageno']-1)
+                                      pageno=params['pageno'] - 1)
 
     # Disable SSL verification
     # error: (60) SSL certificate problem: unable to get local issuer
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
@@ -22,7 +22,7 @@ paging = True
 
 # search-url
 url = 'https://stackoverflow.com/'
-search_url = url+'search?{query}&page={pageno}'
+search_url = url + 'search?{query}&page={pageno}'
 
 # specific xpath variables
 results_xpath = '//div[contains(@class,"question-summary")]'
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
@@ -90,8 +90,8 @@ def response(resp):
 
         # check if search result starts with something like: "2 Sep 2014 ... "
         if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content):
-            date_pos = content.find('...')+4
-            date_string = content[0:date_pos-5]
+            date_pos = content.find('...') + 4
+            date_string = content[0:date_pos - 5]
             published_date = parser.parse(date_string, dayfirst=True)
 
             # fix content string
@@ -99,8 +99,8 @@ def response(resp):
 
         # check if search result starts with something like: "5 days ago ... "
         elif re.match("^[0-9]+ days? ago \.\.\. ", content):
-            date_pos = content.find('...')+4
-            date_string = content[0:date_pos-5]
+            date_pos = content.find('...') + 4
+            date_string = content[0:date_pos - 5]
 
             # calculate datetime
             published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
@@ -295,7 +295,7 @@ def get_geolink(claims, propertyName, defaultValue=''):
     if precision < 0.0003:
         zoom = 19
     else:
-        zoom = int(15 - precision*8.8322 + precision*precision*0.625447)
+        zoom = int(15 - precision * 8.8322 + precision * precision * 0.625447)
 
     url = url_map\
         .replace('{latitude}', str(value.get('latitude', 0)))\
@@ -318,6 +318,6 @@ def get_wikilink(result, wikiid):
 
 def get_wiki_firstlanguage(result, wikipatternid):
     for k in result.get('sitelinks', {}).keys():
-        if k.endswith(wikipatternid) and len(k) == (2+len(wikipatternid)):
+        if k.endswith(wikipatternid) and len(k) == (2 + len(wikipatternid)):
             return k[0:2]
     return None
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
@@ -22,7 +22,7 @@ paging = False
 
 # search-url
 base_url = 'https://1x.com'
-search_url = base_url+'/backend/search.php?{query}'
+search_url = base_url + '/backend/search.php?{query}'
 
 
 # do search-request
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
@@ -43,7 +43,7 @@ def extract_url(xpath_results, search_url):
     if url.startswith('//'):
         # add http or https to this kind of url //example.com/
         parsed_search_url = urlparse(search_url)
-        url = parsed_search_url.scheme+url
+        url = parsed_search_url.scheme + url
     elif url.startswith('/'):
         # fix relative url to the search engine
         url = urljoin(search_url, url)
@@ -69,7 +69,7 @@ def normalize_url(url):
         p = parsed_url.path
         mark = p.find('/**')
         if mark != -1:
-            return unquote(p[mark+3:]).decode('utf-8')
+            return unquote(p[mark + 3:]).decode('utf-8')
 
     return url
 
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
@@ -38,7 +38,7 @@ content_xpath = './/div[@class="serp-item__text"]//text()'
 def request(query, params):
     lang = params['language'].split('_')[0]
     host = base_url.format(tld=language_map.get(lang) or default_tld)
-    params['url'] = host + search_url.format(page=params['pageno']-1,
+    params['url'] = host + search_url.format(page=params['pageno'] - 1,
                                              query=urlencode({'text': query}))
     return params
 
diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py
@@ -103,10 +103,10 @@ def load_single_https_ruleset(rules_path):
             # into a valid python regex group
             rule_from = ruleset.attrib['from'].replace('$', '\\')
             if rule_from.endswith('\\'):
-                rule_from = rule_from[:-1]+'$'
+                rule_from = rule_from[:-1] + '$'
             rule_to = ruleset.attrib['to'].replace('$', '\\')
             if rule_to.endswith('\\'):
-                rule_to = rule_to[:-1]+'$'
+                rule_to = rule_to[:-1] + '$'
 
             # TODO, not working yet because of the hack above,
             # currently doing that in webapp.py
diff --git a/searx/poolrequests.py b/searx/poolrequests.py
@@ -92,7 +92,7 @@ def head(url, **kwargs):
     return request('head', url, **kwargs)
 
 
-def post(url, data=None,  **kwargs):
+def post(url, data=None, **kwargs):
     return request('post', url, data=data, **kwargs)

M	manage.sh	5	++++-
M	requirements-dev.txt	2	+-
M	searx/autocomplete.py	6	++----
M	searx/engines/blekko_images.py	2	+-
M	searx/engines/btdigg.py	2	+-
M	searx/engines/deviantart.py	2	+-
M	searx/engines/digg.py	2	+-
M	searx/engines/faroo.py	2	+-
M	searx/engines/google.py	46	+++++++++++++++++++++++-----------------------
M	searx/engines/searchcode_code.py	4	++--
M	searx/engines/searchcode_doc.py	4	++--
M	searx/engines/stackoverflow.py	2	+-
M	searx/engines/startpage.py	8	++++----
M	searx/engines/wikidata.py	4	++--
M	searx/engines/www1x.py	2	+-
M	searx/engines/xpath.py	4	++--
M	searx/engines/yandex.py	2	+-
M	searx/plugins/https_rewrite.py	4	++--
M	searx/poolrequests.py	2	+-