logo

searx

My custom branche(s) on searx, a meta-search engine
commit: a192438e9a59b14290b8e719fe4d5679a6371a25
parent: 1297d4109d7a1fd15c9c41f192d74ae6b0b9b179
Author: asciimoo <asciimoo@gmail.com>
Date:   Fri, 15 Nov 2013 18:55:18 +0100

[enh] csv output support

Diffstat:

Msearx/utils.py33+++++++++++++++++++++++++++++++++
Msearx/webapp.py13+++++++++++++
2 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/searx/utils.py b/searx/utils.py @@ -1,5 +1,8 @@ from HTMLParser import HTMLParser import htmlentitydefs +import csv +import codecs +import cStringIO class HTMLTextExtractor(HTMLParser): def __init__(self): @@ -24,3 +27,33 @@ def html_to_text(html): s = HTMLTextExtractor() s.feed(html) return s.get_text() + + +class UnicodeWriter: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) diff --git a/searx/webapp.py b/searx/webapp.py @@ -26,6 +26,8 @@ from flask import Flask, request, render_template, url_for, Response, make_respo from searx.engines import search, categories, engines, get_engines_stats from searx import settings import json +import cStringIO +from searx.utils import UnicodeWriter app = Flask(__name__) @@ -104,6 +106,17 @@ def index(): result['pretty_url'] = result['url'] if request_data.get('format') == 'json': return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json') + elif request_data.get('format') == 'csv': + csv = UnicodeWriter(cStringIO.StringIO()) + if len(results): + keys = results[0].keys() + csv.writerow(keys) + for row in results: + csv.writerow([row[key] for key in keys]) + csv.stream.seek(0) + response = Response(csv.stream.read(), mimetype='application/csv', ) + response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format(query)) + return response template = render('results.html' ,results=results ,q=request_data['q']