commit: 5740cfbf1cb468af74d0e7e1c9358ce702eb4f59
parent: b0fd71b7b3463864e6c60156610d383af2ac9709
Author: Adam Tauber <asciimoo@gmail.com>
Date: Sun, 19 Oct 2014 12:41:04 +0200
[fix] pep8 part II.
Diffstat:
6 files changed, 121 insertions(+), 80 deletions(-)
diff --git a/searx/__init__.py b/searx/__init__.py
@@ -28,7 +28,8 @@ except:
searx_dir = abspath(dirname(__file__))
engine_dir = dirname(realpath(__file__))
-# if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH
+# if possible set path to settings using the
+# enviroment variable SEARX_SETTINGS_PATH
if 'SEARX_SETTINGS_PATH' in environ:
settings_path = environ['SEARX_SETTINGS_PATH']
# otherwise using default path
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
@@ -41,7 +41,7 @@ def load_module(filename):
module.name = modname
return module
-if not 'engines' in settings or not settings['engines']:
+if 'engines' not in settings or not settings['engines']:
print '[E] Error no engines found. Edit your settings.yml'
exit(2)
@@ -68,15 +68,15 @@ for engine_data in settings['engines']:
engine.categories = ['general']
if not hasattr(engine, 'language_support'):
- #engine.language_support = False
+ # engine.language_support = False
engine.language_support = True
if not hasattr(engine, 'timeout'):
- #engine.language_support = False
+ # engine.language_support = False
engine.timeout = settings['server']['request_timeout']
if not hasattr(engine, 'shortcut'):
- #engine.shortcut = '''
+ # engine.shortcut = '''
engine.shortcut = ''
# checking required variables
@@ -161,7 +161,8 @@ def get_engines_stats():
for engine in scores_per_result:
if max_score_per_result:
- engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
+ engine['percentage'] = int(engine['avg']
+ / max_score_per_result * 100)
else:
engine['percentage'] = 0
diff --git a/searx/query.py b/searx/query.py
@@ -31,30 +31,31 @@ class Query(object):
def __init__(self, query, blocked_engines):
self.query = query
self.blocked_engines = []
-
+
if blocked_engines:
self.blocked_engines = blocked_engines
-
+
self.query_parts = []
self.engines = []
self.languages = []
-
- # parse query, if tags are set, which change the serch engine or search-language
+
+ # parse query, if tags are set, which
+ # change the serch engine or search-language
def parse_query(self):
self.query_parts = []
-
+
# split query, including whitespaces
raw_query_parts = re.split(r'(\s+)', self.query)
-
+
parse_next = True
-
+
for query_part in raw_query_parts:
if not parse_next:
self.query_parts[-1] += query_part
continue
-
+
parse_next = False
-
+
# part does only contain spaces, skip
if query_part.isspace()\
or query_part == '':
@@ -62,15 +63,17 @@ class Query(object):
self.query_parts.append(query_part)
continue
- # this force a language
+ # this force a language
if query_part[0] == ':':
lang = query_part[1:].lower()
- # check if any language-code is equal with declared language-codes
+ # check if any language-code is equal with
+ # declared language-codes
for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc)
- # if correct language-code is found, set it as new search-language
+ # if correct language-code is found
+ # set it as new search-language
if lang == lang_id\
or lang_id.startswith(lang)\
or lang == lang_name\
@@ -89,23 +92,24 @@ class Query(object):
parse_next = True
self.engines.append({'category': 'none',
'name': engine_shortcuts[prefix]})
-
+
# check if prefix is equal with engine name
elif prefix in engines\
- and not prefix in self.blocked_engines:
+ and prefix not in self.blocked_engines:
parse_next = True
self.engines.append({'category': 'none',
'name': prefix})
# check if prefix is equal with categorie name
elif prefix in categories:
- # using all engines for that search, which are declared under that categorie name
+ # using all engines for that search, which
+ # are declared under that categorie name
parse_next = True
self.engines.extend({'category': prefix,
'name': engine.name}
for engine in categories[prefix]
- if not engine in self.blocked_engines)
-
+ if engine not in self.blocked_engines)
+
# append query part to query_part list
self.query_parts.append(query_part)
@@ -114,14 +118,13 @@ class Query(object):
self.query_parts[-1] = search_query
else:
self.query_parts.append(search_query)
-
+
def getSearchQuery(self):
if len(self.query_parts):
return self.query_parts[-1]
else:
return ''
-
+
def getFullQuery(self):
# get full querry including whitespaces
return string.join(self.query_parts, '')
-
diff --git a/searx/search.py b/searx/search.py
@@ -22,7 +22,7 @@ from datetime import datetime
from operator import itemgetter
from urlparse import urlparse, unquote
from searx.engines import (
- categories, engines, engine_shortcuts
+ categories, engines
)
from searx.languages import language_codes
from searx.utils import gen_useragent
@@ -39,7 +39,13 @@ def default_request_params():
# create a callback wrapper for the search engine results
-def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params):
+def make_callback(engine_name,
+ results,
+ suggestions,
+ answers,
+ infoboxes,
+ callback,
+ params):
# creating a callback wrapper for the search engine results
def process_callback(response, **kwargs):
@@ -95,7 +101,7 @@ def make_callback(engine_name, results, suggestions, answers, infoboxes, callbac
def content_result_len(content):
if isinstance(content, basestring):
content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
- return len(content)
+ return len(content)
else:
return 0
@@ -126,7 +132,8 @@ def score_results(results):
# strip multiple spaces and cariage returns from content
if 'content' in res:
- res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
+ res['content'] = re.sub(' +', ' ',
+ res['content'].strip().replace('\n', ''))
# get weight of this engine if possible
if hasattr(engines[res['engine']], 'weight'):
@@ -139,8 +146,12 @@ def score_results(results):
duplicated = False
for new_res in results:
# remove / from the end of the url if required
- p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
- p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
+ p1 = res['parsed_url'].path[:-1]\
+ if res['parsed_url'].path.endswith('/')\
+ else res['parsed_url'].path
+ p2 = new_res['parsed_url'].path[:-1]\
+ if new_res['parsed_url'].path.endswith('/')\
+ else new_res['parsed_url'].path
# check if that result is a duplicate
if res['host'] == new_res['host'] and\
@@ -153,7 +164,8 @@ def score_results(results):
# merge duplicates together
if duplicated:
# using content with more text
- if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')):
+ if content_result_len(res.get('content', '')) >\
+ content_result_len(duplicated.get('content', '')):
duplicated['content'] = res['content']
# increase result-score
@@ -182,17 +194,25 @@ def score_results(results):
for i, res in enumerate(results):
# FIXME : handle more than one category per engine
- category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template']
-
- current = None if category not in categoryPositions else categoryPositions[category]
-
- # group with previous results using the same category if the group can accept more result and is not too far from the current position
- if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
- # group with the previous results using the same category with this one
+ category = engines[res['engine']].categories[0] + ':' + ''\
+ if 'template' not in res\
+ else res['template']
+
+ current = None if category not in categoryPositions\
+ else categoryPositions[category]
+
+ # group with previous results using the same category
+ # if the group can accept more result and is not too far
+ # from the current position
+ if current is not None and (current['count'] > 0)\
+ and (len(gresults) - current['index'] < 20):
+ # group with the previous results using
+ # the same category with this one
index = current['index']
gresults.insert(index, res)
- # update every index after the current one (including the current one)
+ # update every index after the current one
+ # (including the current one)
for k in categoryPositions:
v = categoryPositions[k]['index']
if v >= index:
@@ -206,7 +226,7 @@ def score_results(results):
gresults.append(res)
# update categoryIndex
- categoryPositions[category] = { 'index' : len(gresults), 'count' : 8 }
+ categoryPositions[category] = {'index': len(gresults), 'count': 8}
# return gresults
return gresults
@@ -215,21 +235,21 @@ def score_results(results):
def merge_two_infoboxes(infobox1, infobox2):
if 'urls' in infobox2:
urls1 = infobox1.get('urls', None)
- if urls1 == None:
+ if urls1 is None:
urls1 = []
infobox1.set('urls', urls1)
urlSet = set()
for url in infobox1.get('urls', []):
urlSet.add(url.get('url', None))
-
+
for url in infobox2.get('urls', []):
if url.get('url', None) not in urlSet:
urls1.append(url)
if 'attributes' in infobox2:
attributes1 = infobox1.get('attributes', None)
- if attributes1 == None:
+ if attributes1 is None:
attributes1 = []
infobox1.set('attributes', attributes1)
@@ -237,14 +257,14 @@ def merge_two_infoboxes(infobox1, infobox2):
for attribute in infobox1.get('attributes', []):
if attribute.get('label', None) not in attributeSet:
attributeSet.add(attribute.get('label', None))
-
+
for attribute in infobox2.get('attributes', []):
attributes1.append(attribute)
if 'content' in infobox2:
content1 = infobox1.get('content', None)
content2 = infobox2.get('content', '')
- if content1 != None:
+ if content1 is not None:
if content_result_len(content2) > content_result_len(content1):
infobox1['content'] = content2
else:
@@ -257,12 +277,12 @@ def merge_infoboxes(infoboxes):
for infobox in infoboxes:
add_infobox = True
infobox_id = infobox.get('id', None)
- if infobox_id != None:
+ if infobox_id is not None:
existingIndex = infoboxes_id.get(infobox_id, None)
- if existingIndex != None:
+ if existingIndex is not None:
merge_two_infoboxes(results[existingIndex], infobox)
- add_infobox=False
-
+ add_infobox = False
+
if add_infobox:
results.append(infobox)
infoboxes_id[infobox_id] = len(results)-1
@@ -318,7 +338,8 @@ class Search(object):
self.pageno = int(pageno_param)
- # parse query, if tags are set, which change the serch engine or search-language
+ # parse query, if tags are set, which change
+ # the serch engine or search-language
query_obj = Query(self.request_data['q'], self.blocked_engines)
query_obj.parse_query()
@@ -334,25 +355,29 @@ class Search(object):
self.categories = []
- # if engines are calculated from query, set categories by using that informations
+ # if engines are calculated from query,
+ # set categories by using that informations
if self.engines:
self.categories = list(set(engine['category']
for engine in self.engines))
- # otherwise, using defined categories to calculate which engines should be used
+ # otherwise, using defined categories to
+ # calculate which engines should be used
else:
# set used categories
for pd_name, pd in self.request_data.items():
if pd_name.startswith('category_'):
category = pd_name[9:]
# if category is not found in list, skip
- if not category in categories:
+ if category not in categories:
continue
# add category to list
self.categories.append(category)
- # if no category is specified for this search, using user-defined default-configuration which (is stored in cookie)
+ # if no category is specified for this search,
+ # using user-defined default-configuration which
+ # (is stored in cookie)
if not self.categories:
cookie_categories = request.cookies.get('categories', '')
cookie_categories = cookie_categories.split(',')
@@ -360,16 +385,18 @@ class Search(object):
if ccateg in categories:
self.categories.append(ccateg)
- # if still no category is specified, using general as default-category
+ # if still no category is specified, using general
+ # as default-category
if not self.categories:
self.categories = ['general']
- # using all engines for that search, which are declared under the specific categories
+ # using all engines for that search, which are
+ # declared under the specific categories
for categ in self.categories:
self.engines.extend({'category': categ,
'name': x.name}
for x in categories[categ]
- if not x.name in self.blocked_engines)
+ if x.name not in self.blocked_engines)
# do search-request
def search(self, request):
@@ -386,7 +413,7 @@ class Search(object):
number_of_searches += 1
# set default useragent
- #user_agent = request.headers.get('User-Agent', '')
+ # user_agent = request.headers.get('User-Agent', '')
user_agent = gen_useragent()
# start search-reqest for all selected engines
@@ -400,7 +427,8 @@ class Search(object):
if self.pageno > 1 and not engine.paging:
continue
- # if search-language is set and engine does not provide language-support, skip
+ # if search-language is set and engine does not
+ # provide language-support, skip
if self.lang != 'all' and not engine.language_support:
continue
@@ -412,7 +440,8 @@ class Search(object):
request_params['pageno'] = self.pageno
request_params['language'] = self.lang
- # update request parameters dependent on search-engine (contained in engines folder)
+ # update request parameters dependent on
+ # search-engine (contained in engines folder)
request_params = engine.request(self.query.encode('utf-8'),
request_params)
@@ -431,7 +460,8 @@ class Search(object):
request_params
)
- # create dictionary which contain all informations about the request
+ # create dictionary which contain all
+ # informations about the request
request_args = dict(
headers=request_params['headers'],
hooks=dict(response=callback),
diff --git a/searx/utils.py b/searx/utils.py
@@ -1,4 +1,4 @@
-#import htmlentitydefs
+# import htmlentitydefs
from codecs import getincrementalencoder
from HTMLParser import HTMLParser
from random import choice
@@ -22,7 +22,8 @@ def gen_useragent():
def searx_useragent():
return 'searx'
-
+
+
def highlight_content(content, query):
if not content:
@@ -67,8 +68,8 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(unichr(codepoint))
def handle_entityref(self, name):
- #codepoint = htmlentitydefs.name2codepoint[name]
- #self.result.append(unichr(codepoint))
+ # codepoint = htmlentitydefs.name2codepoint[name]
+ # self.result.append(unichr(codepoint))
self.result.append(name)
def get_text(self):
diff --git a/searx/webapp.py b/searx/webapp.py
@@ -71,7 +71,7 @@ app.secret_key = settings['server']['secret_key']
babel = Babel(app)
-#TODO configurable via settings.yml
+# TODO configurable via settings.yml
favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
'twitter', 'stackoverflow', 'github']
@@ -146,14 +146,14 @@ def render(template_name, override_theme=None, **kwargs):
nonblocked_categories = set(chain.from_iterable(nonblocked_categories))
- if not 'categories' in kwargs:
+ if 'categories' not in kwargs:
kwargs['categories'] = ['general']
kwargs['categories'].extend(x for x in
sorted(categories.keys())
if x != 'general'
and x in nonblocked_categories)
- if not 'selected_categories' in kwargs:
+ if 'selected_categories' not in kwargs:
kwargs['selected_categories'] = []
for arg in request.args:
if arg.startswith('category_'):
@@ -168,7 +168,7 @@ def render(template_name, override_theme=None, **kwargs):
if not kwargs['selected_categories']:
kwargs['selected_categories'] = ['general']
- if not 'autocomplete' in kwargs:
+ if 'autocomplete' not in kwargs:
kwargs['autocomplete'] = autocomplete
kwargs['method'] = request.cookies.get('method', 'POST')
@@ -202,14 +202,15 @@ def index():
'index.html',
)
- search.results, search.suggestions, search.answers, search.infoboxes = search.search(request)
+ search.results, search.suggestions,\
+ search.answers, search.infoboxes = search.search(request)
for result in search.results:
if not search.paging and engines[result['engine']].paging:
search.paging = True
- # check if HTTPS rewrite is required
+ # check if HTTPS rewrite is required
if settings['server']['https_rewrite']\
and result['parsed_url'].scheme == 'http':
@@ -236,7 +237,7 @@ def index():
try:
# TODO, precompile rule
p = re.compile(rule[0])
-
+
# rewrite url if possible
new_result_url = p.sub(rule[1], result['url'])
except:
@@ -250,17 +251,21 @@ def index():
continue
# get domainname from result
- # TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de
+ # TODO, does only work correct with TLD's like
+ # asdf.com, not for asdf.com.de
# TODO, using publicsuffix instead of this rewrite rule
- old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:])
- new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:])
+ old_result_domainname = '.'.join(
+ result['parsed_url'].hostname.split('.')[-2:])
+ new_result_domainname = '.'.join(
+ new_parsed_url.hostname.split('.')[-2:])
- # check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules
+ # check if rewritten hostname is the same,
+ # to protect against wrong or malicious rewrite rules
if old_result_domainname == new_result_domainname:
# set new url
result['url'] = new_result_url
- # target has matched, do not search over the other rules
+ # target has matched, do not search over the other rules
break
if search.request_data.get('format', 'html') == 'html':
@@ -429,7 +434,7 @@ def preferences():
for pd_name, pd in request.form.items():
if pd_name.startswith('category_'):
category = pd_name[9:]
- if not category in categories:
+ if category not in categories:
continue
selected_categories.append(category)
elif pd_name == 'locale' and pd in settings['locales']: