logo

searx

My custom branche(s) on searx, a meta-search engine
commit: d793c2733c7aac3aacf40f3f5cf9fc0919305e76
parent: c3b7ed868783691d5678977779e91568cc2f2fec
Author: asciimoo <asciimoo@gmail.com>
Date:   Tue, 15 Oct 2013 19:11:43 +0200

[enh] engine types

Diffstat:

Mexamples/basic_engine.py2+-
Msearx/__init__.py7-------
Msearx/engines/__init__.py14++++++++------
Msearx/engines/duckduckgo.py19++++++++++++-------
Msearx/engines/duckduckgo_definitions.py12++++++------
Msearx/static/css/style.css4+++-
Msearx/templates/results.html5++++-
7 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/examples/basic_engine.py b/examples/basic_engine.py @@ -17,5 +17,5 @@ def response(resp): '''post-response callback resp: requests response object ''' - return [resp.text] + return [{'url': '', 'title': '', 'content': ''}] diff --git a/searx/__init__.py b/searx/__init__.py @@ -1,7 +0,0 @@ - -base_result_template = """ -<div class="result"> - <h3 class="result_title"><a href="{url}">{title}</a></h3> - <p class="content">{content}<br />{url}</p> -</div> -""" diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py @@ -6,7 +6,7 @@ import grequests engine_dir = dirname(realpath(__file__)) -engines = [] +engines = {} for filename in listdir(engine_dir): modname = splitext(filename)[0] @@ -16,14 +16,16 @@ for filename in listdir(engine_dir): engine = load_source(modname, filepath) if not hasattr(engine, 'request') or not hasattr(engine, 'response'): continue - engines.append(engine) + engines[modname] = engine def default_request_params(): return {'method': 'GET', 'headers': {}, 'data': {}, 'url': ''} -def make_callback(results, callback): +def make_callback(engine_name, results, callback): def process_callback(response, **kwargs): - results.extend(callback(response)) + for result in callback(response): + result['engine'] = engine_name + results.append(result) return process_callback def search(query, request): @@ -31,11 +33,11 @@ def search(query, request): requests = [] results = [] user_agent = request.headers.get('User-Agent', '') - for engine in engines: + for ename, engine in engines.items(): headers = default_request_params() headers['User-Agent'] = user_agent request_params = engine.request(query, headers) - callback = make_callback(results, engine.response) + callback = make_callback(ename, results, engine.response) if request_params['method'] == 'GET': req = grequests.get(request_params['url'] ,headers=headers diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py @@ -1,14 +1,19 @@ -from lxml import html +from json import loads def request(query, params): - params['method'] = 'POST' - params['url'] = 'https://duckduckgo.com/html' - params['data']['q'] = query + params['url'] = 'https://duckduckgo.com/d.js?q=%s&l=us-en&p=1&s=0' % query return params def response(resp): - dom = html.fromstring(resp.text) - results = dom.xpath('//div[@class="results_links results_links_deep web-result"]') - return [html.tostring(x) for x in results] + results = [] + search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1] + for r in search_res: + if not r.get('t'): + continue + results.append({'title': r['t'] + ,'content': r['a'] + ,'url': r['u'] + }) + return results diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py @@ -1,5 +1,4 @@ import json -from searx import base_result_template def request(query, params): params['url'] = 'http://api.duckduckgo.com/?q=%s&format=json&pretty=0' % query @@ -10,10 +9,11 @@ def response(resp): search_res = json.loads(resp.text) results = [] if 'Definition' in search_res: - res = {'title' : search_res.get('Heading', '') - ,'content' : search_res.get('Definition', '') - ,'url' : search_res.get('AbstractURL', '') - } - results.append(base_result_template.format(**res)) + if search_res.get('AbstractURL'): + res = {'title' : search_res.get('Heading', '') + ,'content' : search_res.get('Definition', '') + ,'url' : search_res.get('AbstractURL', '') + } + results.append(res) return results diff --git a/searx/static/css/style.css b/searx/static/css/style.css @@ -8,7 +8,9 @@ html { h1 { font-size: 5em; } -input { border: 2px solid #8888FF; padding: 8px; background-color: #FFFFFF; font-size: 1.6em; } +input { border: 2px solid #8888FF; padding: 8px; background-color: #FFFFFF; font-size: 1.3em; } + +a { text-decoration: none; } .result_title { margin-bottom: 0; } diff --git a/searx/templates/results.html b/searx/templates/results.html @@ -5,6 +5,9 @@ <input type="submit" value="search" /> </form> {% for result in results %} - <p>{{ result|safe }}</p> + <div class="result"> + <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> + <p class="content"><span class="engine">{{ result.engine }}</span><br />{% if result.content %}{{ result.content|safe }}<br />{% endif %}<span class="url">{{ result.url }}</span></p> + </div> {% endfor %} {% endblock %}