logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: bb628469d31d9ce61b2188aae3f570441eec8803
parent 8eb064dea1f312865dc5d5588d8a317a80efbb49
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Tue,  2 Sep 2014 21:01:24 +0200

fix wikipedia engine and add comments

* add paging support
* make number_of_results changable
* make result calculation more clear
* add comments

Diffstat:

Msearx/engines/wikipedia.py57+++++++++++++++++++++++++++++++++++++++++++++++----------
Msearx/settings.yml3+--
2 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py @@ -1,30 +1,67 @@ +## Wikipedia (Web) +# +# @website http://www.wikipedia.org +# @provide-api yes (http://www.mediawiki.org/wiki/API:Search) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title +# +# @todo content + from json import loads from urllib import urlencode, quote -url = 'https://{language}.wikipedia.org/' - -search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json&sroffset={offset}' # noqa - -number_of_results = 10 - +# engine dependent config +categories = ['general'] language_support = True +paging = True +number_of_results = 1 + +# search-url +url = 'https://{language}.wikipedia.org/' +search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json&sroffset={offset}&srlimit={limit}' # noqa +# do search-request def request(query, params): - offset = (params['pageno'] - 1) * 10 + offset = (params['pageno'] - 1) * number_of_results + if params['language'] == 'all': language = 'en' else: language = params['language'].split('_')[0] + + # write search-language back to params, required in response params['language'] = language + params['url'] = search_url.format(query=urlencode({'srsearch': query}), offset=offset, + limit=number_of_results, language=language) + return params +# get response from search-request def response(resp): + results = [] + search_results = loads(resp.text) - res = search_results.get('query', {}).get('search', []) - return [{'url': url.format(language=resp.search_params['language']) + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa - 'title': result['title']} for result in res[:int(number_of_results)]] + + # return empty array if there are no results + if not search_results.get('query', {}).get('search'): + return [] + + # parse results + for result in search_results['query']['search']: + res_url = url.format(language=resp.search_params['language']) + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')) + + # append result + results.append({'url': res_url, + 'title': result['title'], + 'content': ''}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -11,9 +11,8 @@ server: engines: - name : wikipedia engine : wikipedia - number_of_results : 1 - paging : False shortcut : wp +# number_of_results : 1 # default is 1 - name : bing engine : bing