logo

searx

Unnamed repository; edit this file 'description' to name the repository.
commit: ffc93ba25626e7473e58a44f057245315c99d770
parent: b7fa79081f3c7c9ce2974c406e07b1e48cb9534a
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sun, 19 Jan 2014 16:29:56 -0800

Merge pull request #30 from matejc/smallissues

fix: robot fw, entry points, some flake8, package searx egg

Diffstat:

.gitignore | 18+++++++++++-------
Makefile | 12++++--------
buildout.cfg | 2--
minimal.cfg | 2--
production.cfg | 2--
searx/__init__.py | 6++----
searx/engines/__init__.py | 110++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
searx/engines/bing.py | 5+++--
searx/engines/currency_convert.py | 27+++++++++++++++------------
searx/engines/dailymotion.py | 5++++-
searx/settings.yml | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/settings_robot.py | 16----------------
searx/settings_robot.yml | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/testing.py | 19++++++++++++++++---
searx/utils.py | 6+++++-
searx/webapp.py | 71+++++++++++++++++++++++++++++++++++++++--------------------------------
settings.yml | 107-------------------------------------------------------------------------------
setup.py | 15+++++++++++++++
18 files changed, 400 insertions(+), 237 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,20 +1,24 @@ -env -engines.cfg -.installed.cfg .coverage -coverage/ +.installed.cfg +engines.cfg +env +robot_log.html +robot_output.xml +robot_report.html setup.cfg *.pyc */*.pyc bin/ -include/ -lib/ build/ +covearge/ develop-eggs/ +dist/ eggs/ +include/ +lib/ local/ -searx.egg-info/ parts/ +searx.egg-info/ var/ diff --git a/Makefile b/Makefile @@ -21,11 +21,7 @@ $(python): tests: .installed.cfg @bin/test -enginescfg: - @test -f ./engines.cfg || echo "Copying engines.cfg ..." - @cp --no-clobber engines.cfg_sample engines.cfg - -robot: .installed.cfg enginescfg +robot: .installed.cfg @bin/robot flake8: .installed.cfg @@ -37,18 +33,18 @@ coverage: .installed.cfg @bin/coverage report --show-missing @bin/coverage html --directory ./coverage -production: bin/buildout production.cfg setup.py enginescfg +production: bin/buildout production.cfg setup.py bin/buildout -c production.cfg $(options) @echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`" @echo "* Hint 1: on production, disable debug mode and change secret_key" @echo "* Hint 2: searx will be executed at server startup by crontab" @echo "* Hint 3: to run immediatley, execute 'bin/supervisord'" -minimal: bin/buildout minimal.cfg setup.py enginescfg +minimal: bin/buildout minimal.cfg setup.py bin/buildout -c minimal.cfg $(options) clean: @rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \ searx.egg-info lib include .coverage coverage -.PHONY: all tests enginescfg robot flake8 coverage production minimal clean +.PHONY: all tests robot flake8 coverage production minimal clean diff --git a/buildout.cfg b/buildout.cfg @@ -16,8 +16,6 @@ recipe = zc.recipe.egg:script eggs = ${buildout:eggs} interpreter = py dependent-scripts = true -entry-points = - searx-run=searx.webapp:run [robot] diff --git a/minimal.cfg b/minimal.cfg @@ -13,5 +13,3 @@ parts += recipe = zc.recipe.egg:script eggs = ${buildout:eggs} interpreter = py -entry-points = - searx-run=searx.webapp:run diff --git a/production.cfg b/production.cfg @@ -15,8 +15,6 @@ parts += recipe = zc.recipe.egg:script eggs = ${buildout:eggs} interpreter = py -entry-points = - searx-run=searx.webapp:run [supervisor] diff --git a/searx/__init__.py b/searx/__init__.py @@ -1,5 +1,5 @@ from os import environ -from os.path import realpath, dirname, join +from os.path import realpath, dirname, join, abspath try: from yaml import load except: @@ -7,8 +7,7 @@ except: stderr.write('[E] install pyyaml\n') exit(2) - -searx_dir = realpath(dirname(realpath(__file__))+'/../') +searx_dir = abspath(dirname(__file__)) engine_dir = dirname(realpath(__file__)) if 'SEARX_SETTINGS_PATH' in environ: @@ -19,4 +18,3 @@ else: with open(settings_path) as settings_yaml: settings = load(settings_yaml) - diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py @@ -35,6 +35,7 @@ engines = {} categories = {'general': []} + def load_module(filename): modname = splitext(filename)[0] if modname in sys.modules: @@ -50,7 +51,7 @@ if not 'engines' in settings or not settings['engines']: for engine_data in settings['engines']: engine_name = engine_data['engine'] - engine = load_module(engine_name+'.py') + engine = load_module(engine_name + '.py') for param_name in engine_data: if param_name == 'engine': continue @@ -58,38 +59,50 @@ for engine_data in settings['engines']: if engine_data['categories'] == 'none': engine.categories = [] else: - engine.categories = map(str.strip, engine_data['categories'].split(',')) + engine.categories = map( + str.strip, engine_data['categories'].split(',')) continue setattr(engine, param_name, engine_data[param_name]) for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if getattr(engine, engine_attr) == None: - print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) + print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) # noqa sys.exit(1) engines[engine.name] = engine - engine.stats = {'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0} + engine.stats = { + 'result_count': 0, + 'search_count': 0, + 'page_load_time': 0, + 'score_count': 0, + 'errors': 0 + } if hasattr(engine, 'categories'): for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) else: categories['general'].append(engine) + def default_request_params(): - return {'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}} + return { + 'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}} + def make_callback(engine_name, results, suggestions, callback, params): # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): cb_res = [] response.search_params = params - engines[engine_name].stats['page_load_time'] += (datetime.now() - params['started']).total_seconds() + engines[engine_name].stats['page_load_time'] += \ + (datetime.now() - params['started']).total_seconds() try: search_results = callback(response) except Exception, e: engines[engine_name].stats['errors'] += 1 results[engine_name] = cb_res - print '[E] Error with engine "{0}":\n\t{1}'.format(engine_name, str(e)) + print '[E] Error with engine "{0}":\n\t{1}'.format( + engine_name, str(e)) return for result in search_results: result['engine'] = engine_name @@ -101,23 +114,25 @@ def make_callback(engine_name, results, suggestions, callback, params): results[engine_name] = cb_res return process_callback + def score_results(results): - flat_res = filter(None, chain.from_iterable(izip_longest(*results.values()))) + flat_res = filter( + None, chain.from_iterable(izip_longest(*results.values()))) flat_len = len(flat_res) engines_len = len(results) results = [] # deduplication + scoring - for i,res in enumerate(flat_res): + for i, res in enumerate(flat_res): res['parsed_url'] = urlparse(res['url']) res['engines'] = [res['engine']] weight = 1.0 if hasattr(engines[res['engine']], 'weight'): weight = float(engines[res['engine']].weight) - score = int((flat_len - i)/engines_len)*weight+1 + score = int((flat_len - i) / engines_len) * weight + 1 duplicated = False for new_res in results: - p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path - p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path + p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa + p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\ p1 == p2 and\ res['parsed_url'].query == new_res['parsed_url'].query and\ @@ -125,7 +140,7 @@ def score_results(results): duplicated = new_res break if duplicated: - if len(res.get('content', '')) > len(duplicated.get('content', '')): + if len(res.get('content', '')) > len(duplicated.get('content', '')): # noqa duplicated['content'] = res['content'] duplicated['score'] += score duplicated['engines'].append(res['engine']) @@ -139,6 +154,7 @@ def score_results(results): results.append(res) return sorted(results, key=itemgetter('score'), reverse=True) + def search(query, request, selected_engines): global engines, categories, number_of_searches requests = [] @@ -160,13 +176,20 @@ def search(query, request, selected_engines): request_params['started'] = datetime.now() request_params = engine.request(query, request_params) - callback = make_callback(selected_engine['name'], results, suggestions, engine.response, request_params) - - request_args = dict(headers = request_params['headers'] - ,hooks = dict(response=callback) - ,cookies = request_params['cookies'] - ,timeout = settings['server']['request_timeout'] - ) + callback = make_callback( + selected_engine['name'], + results, + suggestions, + engine.response, + request_params + ) + + request_args = dict( + headers=request_params['headers'], + hooks=dict(response=callback), + cookies=request_params['cookies'], + timeout=settings['server']['request_timeout'] + ) if request_params['method'] == 'GET': req = grequests.get @@ -180,7 +203,7 @@ def search(query, request, selected_engines): requests.append(req(request_params['url'], **request_args)) grequests.map(requests) - for engine_name,engine_results in results.items(): + for engine_name, engine_results in results.items(): engines[engine_name].stats['search_count'] += 1 engines[engine_name].stats['result_count'] += len(engine_results) @@ -192,6 +215,7 @@ def search(query, request, selected_engines): return results, suggestions + def get_engines_stats(): # TODO refactor pageloads = [] @@ -200,14 +224,15 @@ def get_engines_stats(): errors = [] scores_per_result = [] - max_pageload = max_results = max_score = max_errors = max_score_per_result = 0 + max_pageload = max_results = max_score = max_errors = max_score_per_result = 0 # noqa for engine in engines.values(): if engine.stats['search_count'] == 0: continue - results_num = engine.stats['result_count']/float(engine.stats['search_count']) - load_times = engine.stats['page_load_time']/float(engine.stats['search_count']) + results_num = \ + engine.stats['result_count'] / float(engine.stats['search_count']) + load_times = engine.stats['page_load_time'] / float(engine.stats['search_count']) # noqa if results_num: - score = engine.stats['score_count'] / float(engine.stats['search_count']) + score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa score_per_result = score / results_num else: score = score_per_result = 0.0 @@ -220,30 +245,39 @@ def get_engines_stats(): results.append({'avg': results_num, 'name': engine.name}) scores.append({'avg': score, 'name': engine.name}) errors.append({'avg': engine.stats['errors'], 'name': engine.name}) - scores_per_result.append({'avg': score_per_result, 'name': engine.name}) + scores_per_result.append({ + 'avg': score_per_result, + 'name': engine.name + }) for engine in pageloads: - engine['percentage'] = int(engine['avg']/max_pageload*100) + engine['percentage'] = int(engine['avg'] / max_pageload * 100) for engine in results: - engine['percentage'] = int(engine['avg']/max_results*100) + engine['percentage'] = int(engine['avg'] / max_results * 100) for engine in scores: - engine['percentage'] = int(engine['avg']/max_score*100) + engine['percentage'] = int(engine['avg'] / max_score * 100) for engine in scores_per_result: - engine['percentage'] = int(engine['avg']/max_score_per_result*100) + engine['percentage'] = int(engine['avg'] / max_score_per_result * 100) for engine in errors: if max_errors: - engine['percentage'] = int(float(engine['avg'])/max_errors*100) + engine['percentage'] = int(float(engine['avg']) / max_errors * 100) else: engine['percentage'] = 0 - - return [('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))) - ,('Number of results', sorted(results, key=itemgetter('avg'), reverse=True)) - ,('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)) - ,('Scores per result', sorted(scores_per_result, key=itemgetter('avg'), reverse=True)) - ,('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)) - ] + return [ + ('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))), + ( + 'Number of results', + sorted(results, key=itemgetter('avg'), reverse=True) + ), + ('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)), + ( + 'Scores per result', + sorted(scores_per_result, key=itemgetter('avg'), reverse=True) + ), + ('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)), + ] diff --git a/searx/engines/bing.py b/searx/engines/bing.py @@ -4,11 +4,12 @@ from cgi import escape base_url = 'http://www.bing.com/' search_string = 'search?{query}' -locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx +locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx def request(query, params): - search_path = search_string.format(query=urlencode({'q': query, 'setmkt': locale})) + search_path = search_string.format( + query=urlencode({'q': query, 'setmkt': locale})) #if params['category'] == 'images': # params['url'] = base_url + 'images/' + search_path params['url'] = base_url + search_path diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py @@ -7,6 +7,7 @@ weight = 100 parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) + def request(query, params): m = parser_re.match(query) if not m: @@ -19,7 +20,7 @@ def request(query, params): # wrong params return params - q = (from_currency+to_currency).upper() + q = (from_currency + to_currency).upper() params['url'] = url.format(query=q) params['ammount'] = ammount @@ -33,25 +34,27 @@ def response(resp): global base_url results = [] try: - _,conversion_rate,_ = resp.text.split(',', 2) + _, conversion_rate, _ = resp.text.split(',', 2) conversion_rate = float(conversion_rate) except: return results - title = '{0} {1} in {2} is {3}'.format(resp.search_params['ammount'] - ,resp.search_params['from'] - ,resp.search_params['to'] - ,resp.search_params['ammount']*conversion_rate - ) + title = '{0} {1} in {2} is {3}'.format( + resp.search_params['ammount'], + resp.search_params['from'], + resp.search_params['to'], + resp.search_params['ammount'] * conversion_rate + ) content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to']) now_date = datetime.now().strftime('%Y%m%d') url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' - url = url.format(now_date - ,resp.search_params['ammount'] - ,resp.search_params['from'].lower() - ,resp.search_params['to'].lower() - ) + url = url.format( + now_date, + resp.search_params['ammount'], + resp.search_params['from'].lower(), + resp.search_params['to'].lower() + ) results.append({'title': title, 'content': content, 'url': url}) return results diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py @@ -8,9 +8,11 @@ locale = 'en_US' # see http://www.dailymotion.com/doc/api/obj-video.html search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' + def request(query, params): global search_url - params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale })) + params['url'] = search_url.format( + query=urlencode({'search': query, 'localization': locale})) return params @@ -32,6 +34,7 @@ def response(resp): results.append({'url': url, 'title': title, 'content': content}) return results + def text_content_from_html(html_string): desc_html = html.fragment_fromstring(html_string, create_parent=True) return desc_html.text_content() diff --git a/searx/settings.yml b/searx/settings.yml @@ -0,0 +1,107 @@ +server: + port : 8888 + secret_key : "ultrasecretkey" # change this! + debug : True + request_timeout : 3.0 # seconds + base_url: False + +engines: + - name : wikipedia + engine : mediawiki + url : https://en.wikipedia.org/ + number_of_results : 1 + + - name : bing + engine : bing + locale : en-US + + - name : currency + engine : currency_convert + categories : general + + - name : deviantart + engine : deviantart + categories : images + + - name : ddg definitions + engine : duckduckgo_definitions + + - name : duckduckgo + engine : duckduckgo + locale : en-us + + - name : filecrop + engine : filecrop + categories : files + + - name : flickr + engine : flickr + categories : images + + - name : github + engine : github + categories : it + + - name : google + engine : json_engine + search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} + categories : general + url_query : /responseData/results/unescapedUrl + content_query : /responseData/results/content + title_query : /responseData/results/titleNoFormatting + + - name : google images + engine : google_images + categories : images + + - name : piratebay + engine : piratebay + categories : videos, music, files + + - name : soundcloud + engine : soundcloud + categories : music + + - name : stackoverflow + engine : stackoverflow + categories : it + + - name : startpage + engine : startpage + + - name : twitter + engine : twitter + categories : social media + + - name : urbandictionary + engine : xpath + search_url : http://www.urbandictionary.com/define.php?term={query} + url_xpath : //div[@class="word"]//a/@href + title_xpath : //div[@class="word"]//a + content_xpath : //div[@class="definition"] + + - name : yahoo + engine : xpath + search_url : http://search.yahoo.com/search?p={query} + results_xpath : //div[@class="res"] + url_xpath : .//h3/a/@href + title_xpath : .//h3/a + content_xpath : .//div[@class="abstr"] + suggestion_xpath : //div[@id="satat"]//a + + - name : youtube + engine : youtube + categories : videos + + - name : dailymotion + engine : dailymotion + locale : en_US + categories : videos + + - name : vimeo + engine : vimeo + categories : videos + results_xpath : //div[@id="browse_content"]/ol/li + url_xpath : ./a/@href + title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() + content_xpath : ./a/img/@src diff --git a/searx/settings_robot.py b/searx/settings_robot.py @@ -1,16 +0,0 @@ - -port = 11111 - -secret_key = "ultrasecretkey" # change this! - -debug = False - -request_timeout = 5.0 # seconds - -weights = {} # 'search_engine_name': float(weight) | default is 1.0 - -blacklist = [] # search engine blacklist - -categories = {} # custom search engine categories - -base_url = None # "https://your.domain.tld/" or None (to use request parameters) diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml @@ -0,0 +1,107 @@ +server: + port : 11111 + secret_key : "ultrasecretkey" # change this! + debug : False + request_timeout : 3.0 # seconds + base_url: False + +engines: + - name : wikipedia + engine : mediawiki + url : https://en.wikipedia.org/ + number_of_results : 1 + + - name : bing + engine : bing + locale : en-US + + - name : currency + engine : currency_convert + categories : general + + - name : deviantart + engine : deviantart + categories : images + + - name : ddg definitions + engine : duckduckgo_definitions + + - name : duckduckgo + engine : duckduckgo + locale : en-us + + - name : filecrop + engine : filecrop + categories : files + + - name : flickr + engine : flickr + categories : images + + - name : github + engine : github + categories : it + + - name : google + engine : json_engine + search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} + categories : general + url_query : /responseData/results/unescapedUrl + content_query : /responseData/results/content + title_query : /responseData/results/titleNoFormatting + + - name : google images + engine : google_images + categories : images + + - name : piratebay + engine : piratebay + categories : videos, music, files + + - name : soundcloud + engine : soundcloud + categories : music + + - name : stackoverflow + engine : stackoverflow + categories : it + + - name : startpage + engine : startpage + + - name : twitter + engine : twitter + categories : social media + + - name : urbandictionary + engine : xpath + search_url : http://www.urbandictionary.com/define.php?term={query} + url_xpath : //div[@class="word"]//a/@href + title_xpath : //div[@class="word"]//a + content_xpath : //div[@class="definition"] + + - name : yahoo + engine : xpath + search_url : http://search.yahoo.com/search?p={query} + results_xpath : //div[@class="res"] + url_xpath : .//h3/a/@href + title_xpath : .//h3/a + content_xpath : .//div[@class="abstr"] + suggestion_xpath : //div[@id="satat"]//a + + - name : youtube + engine : youtube + categories : videos + + - name : dailymotion + engine : dailymotion + locale : en_US + categories : videos + + - name : vimeo + engine : vimeo + categories : videos + results_xpath : //div[@id="browse_content"]/ol/li + url_xpath : ./a/@href + title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() + content_xpath : ./a/img/@src diff --git a/searx/testing.py b/searx/testing.py @@ -7,10 +7,10 @@ from unittest2 import TestCase import os import subprocess -import sys class SearxTestLayer: + """Base layer for non-robot tests.""" __name__ = u'SearxTestLayer' @@ -36,24 +36,37 @@ class SearxRobotLayer(Layer): def setUp(self): os.setpgrp() # create new process group, become its leader + + # get program paths webapp = os.path.join( os.path.abspath(os.path.dirname(os.path.realpath(__file__))), 'webapp.py' ) exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py') + + # set robot settings path + os.environ['SEARX_SETTINGS_PATH'] = os.path.abspath( + os.path.dirname(__file__) + '/settings_robot.yml') + + # run the server self.server = subprocess.Popen( - [exe, webapp, 'settings_robot'], + [exe, webapp], stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) def tearDown(self): - # TERM all processes in my group + # send TERM signal to all processes in my group, to stop subprocesses os.killpg(os.getpgid(self.server.pid), 15) + # remove previously set environment variable + del os.environ['SEARX_SETTINGS_PATH'] + SEARXROBOTLAYER = SearxRobotLayer() class SearxTestCase(TestCase): + """Base test case for non-robot tests.""" + layer = SearxTestLayer diff --git a/searx/utils.py b/searx/utils.py @@ -5,10 +5,12 @@ import codecs import cStringIO import re + def gen_useragent(): # TODO return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" + def highlight_content(content, query): if not content: @@ -34,10 +36,11 @@ def highlight_content(content, query): return content + class HTMLTextExtractor(HTMLParser): def __init__(self): HTMLParser.__init__(self) - self.result = [ ] + self.result = [] def handle_data(self, d): self.result.append(d) @@ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser): def get_text(self): return u''.join(self.result) + def html_to_text(html): s = HTMLTextExtractor() s.feed(html) diff --git a/searx/webapp.py b/searx/webapp.py @@ -17,13 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, <asciimoo@gmail.com> ''' -import os -import sys -if __name__ == "__main__": - sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../')) - from searx import settings - from flask import Flask, request, render_template, url_for, Response, make_response, redirect from searx.engines import search, categories, engines, get_engines_stats import json @@ -33,10 +27,16 @@ from flask import send_from_directory from searx.utils import highlight_content, html_to_text +import os -app = Flask(__name__) -app.secret_key = settings['server']['secret_key'] +app = Flask( + __name__, + static_folder=os.path.join(os.path.dirname(__file__), 'static'), + template_folder=os.path.join(os.path.dirname(__file__), 'templates') +) + +app.secret_key = settings['server']['secret_key'] #TODO configurable via settings.yml favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud', @@ -81,6 +81,7 @@ def render(template_name, **kwargs): kwargs['selected_categories'] = ['general'] return render_template(template_name, **kwargs) + def parse_query(query): query_engines = [] query_parts = query.split() @@ -94,7 +95,7 @@ def parse_query(query): def index(): global categories - if request.method=='POST': + if request.method == 'POST': request_data = request.form else: request_data = request.args @@ -106,7 +107,7 @@ def index(): query, selected_engines = parse_query(request_data['q'].encode('utf-8')) if not len(selected_engines): - for pd_name,pd in request_data.items(): + for pd_name, pd in request_data.items(): if pd_name.startswith('category_'): category = pd_name[9:] if not category in categories: @@ -159,23 +160,24 @@ def index(): response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split()))) return response elif request_data.get('format') == 'rss': - response_rss = render('opensearch_response_rss.xml' - ,results=results - ,q=request_data['q'] - ,number_of_results=len(results) - ,base_url=get_base_url() - ) + response_rss = render( + 'opensearch_response_rss.xml', + results=results, + q=request_data['q'], + number_of_results=len(results), + base_url=get_base_url() + ) return Response(response_rss, mimetype='text/xml') - - return render('results.html' - ,results=results - ,q=request_data['q'] - ,selected_categories=selected_categories - ,number_of_results=len(results)+len(featured_results) - ,featured_results=featured_results - ,suggestions=suggestions - ) + return render( + 'results.html', + results=results, + q=request_data['q'], + selected_categories=selected_categories, + number_of_results=len(results) + len(featured_results), + featured_results=featured_results, + suggestions=suggestions + ) @app.route('/about', methods=['GET']) @@ -192,9 +194,9 @@ def list_engines(): @app.route('/preferences', methods=['GET', 'POST']) def preferences(): - if request.method=='POST': + if request.method == 'POST': selected_categories = [] - for pd_name,pd in request.form.items(): + for pd_name, pd in request.form.items(): if pd_name.startswith('category_'): category = pd_name[9:] if not category in categories: @@ -203,7 +205,10 @@ def preferences(): if selected_categories: resp = make_response(redirect('/')) # cookie max age: 4 weeks - resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4) + resp.set_cookie( + 'categories', ','.join(selected_categories), + max_age=60 * 60 * 24 * 7 * 4 + ) return resp return render('preferences.html') @@ -238,6 +243,7 @@ def opensearch(): mimetype="application/xml") return resp + @app.route('/favicon.ico') def favicon(): return send_from_directory(os.path.join(app.root_path, 'static/img'), @@ -248,10 +254,11 @@ def run(): from gevent import monkey monkey.patch_all() - app.run(debug = settings['server']['debug'] - ,use_debugger = settings['server']['debug'] - ,port = settings['server']['port'] - ) + app.run( + debug=settings['server']['debug'], + use_debugger=settings['server']['debug'], + port=settings['server']['port'] + ) if __name__ == "__main__": diff --git a/settings.yml b/settings.yml @@ -1,107 +0,0 @@ -server: - port : 8888 - secret_key : "ultrasecretkey" # change this! - debug : True - request_timeout : 3.0 # seconds - base_url: False - -engines: - - name : wikipedia - engine : mediawiki - url : https://en.wikipedia.org/ - number_of_results : 1 - - - name : bing - engine : bing - locale : en-US - - - name : currency - engine : currency_convert - categories : general - - - name : deviantart - engine : deviantart - categories : images - - - name : ddg definitions - engine : duckduckgo_definitions - - - name : duckduckgo - engine : duckduckgo - locale : en-us - - - name : filecrop - engine : filecrop - categories : files - - - name : flickr - engine : flickr - categories : images - - - name : github - engine : github - categories : it - - - name : google - engine : json_engine - search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} - categories : general - url_query : /responseData/results/unescapedUrl - content_query : /responseData/results/content - title_query : /responseData/results/titleNoFormatting - - - name : google images - engine : google_images - categories : images - - - name : piratebay - engine : piratebay - categories : videos, music, files - - - name : soundcloud - engine : soundcloud - categories : music - - - name : stackoverflow - engine : stackoverflow - categories : it - - - name : startpage - engine : startpage - - - name : twitter - engine : twitter - categories : social media - - - name : urbandictionary - engine : xpath - search_url : http://www.urbandictionary.com/define.php?term={query} - url_xpath : //div[@class="word"]//a/@href - title_xpath : //div[@class="word"]//a - content_xpath : //div[@class="definition"] - - - name : yahoo - engine : xpath - search_url : http://search.yahoo.com/search?p={query} - results_xpath : //div[@class="res"] - url_xpath : .//h3/a/@href - title_xpath : .//h3/a - content_xpath : .//div[@class="abstr"] - suggestion_xpath : //div[@id="satat"]//a - - - name : youtube - engine : youtube - categories : videos - - - name : dailymotion - engine : dailymotion - locale : en_US - categories : videos - - - name : vimeo - engine : vimeo - categories : videos - results_xpath : //div[@id="browse_content"]/ol/li - url_xpath : ./a/@href - title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() - content_xpath : ./a/img/@src diff --git a/setup.py b/setup.py @@ -49,4 +49,19 @@ setup( 'zope.testrunner', ] }, + entry_points={ + 'console_scripts': [ + 'searx-run = searx.webapp:run' + ] + }, + package_data={ + 'searx': [ + 'settings.yml', + '../README.md', + 'static/*/*', + 'templates/*.html', + 'templates/result_templates/*.html', + ], + }, + )