logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 28fed5d9ad4227ca772f258d5e9bd3d48452b845
parent: 20400c40c34b6122621476c46460c5a3a8624c89
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sun, 19 Oct 2014 12:07:02 +0200

Merge pull request #115 from pointhi/engine_faroo

add faroo engine

Diffstat:

Asearx/engines/faroo.py108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml6++++++
Msearx/utils.py3+++
3 files changed, 117 insertions(+), 0 deletions(-)

diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py @@ -0,0 +1,108 @@ +## Faroo (Web, News) +# +# @website http://www.faroo.com +# @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content, publishedDate, img_src + +from urllib import urlencode +from json import loads +import datetime +from searx.utils import searx_useragent + +# engine dependent config +categories = ['general', 'news'] +paging = True +language_support = True +number_of_results = 10 +api_key = None + +# search-url +url = 'http://www.faroo.com/' +search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={language}&src={categorie}&i=false&f=json&key={api_key}' + +search_category = {'general': 'web', + 'news': 'news'} + +# do search-request +def request(query, params): + offset = (params['pageno']-1) * number_of_results + 1 + categorie = search_category.get(params['category'], 'web') + + if params['language'] == 'all': + language = 'en' + else: + language = params['language'].split('_')[0] + + # skip, if language is not supported + if language != 'en' and\ + language != 'de' and\ + language != 'zh': + return params + + params['url'] = search_url.format(offset=offset, + number_of_results=number_of_results, + query=urlencode({'q': query}), + language=language, + categorie=categorie, + api_key=api_key ) + + # using searx User-Agent + params['headers']['User-Agent'] = searx_useragent() + + return params + + +# get response from search-request +def response(resp): + # HTTP-Code 401: api-key is not valide + if resp.status_code == 401: + raise Exception("API key is not valide") + return [] + + # HTTP-Code 429: rate limit exceeded + if resp.status_code == 429: + raise Exception("rate limit has been exceeded!") + return [] + + results = [] + + search_res = loads(resp.text) + + # return empty array if there are no results + if not search_res.get('results', {}): + return [] + + # parse results + for result in search_res['results']: + if result['news']: + # timestamp (how many milliseconds have passed between now and the beginning of 1970) + publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0) + + # append news result + results.append({'url': result['url'], + 'title': result['title'], + 'publishedDate': publishedDate, + 'content': result['kwic']}) + + else: + # append general result + # TODO, publishedDate correct? + results.append({'url': result['url'], + 'title': result['title'], + 'content': result['kwic']}) + + # append image result if image url is set + # TODO, show results with an image like in faroo + if result['iurl']: + results.append({'template': 'images.html', + 'url': result['url'], + 'title': result['title'], + 'content': result['kwic'], + 'img_src': result['iurl']}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -52,6 +52,12 @@ engines: engine : duckduckgo shortcut : ddg +# api-key required: http://www.faroo.com/hp/api/api.html#key +# - name : faroo +# engine : faroo +# shortcut : fa +# api_key : 'apikey' # required! + # down - website is under criminal investigation by the UK # - name : filecrop # engine : filecrop diff --git a/searx/utils.py b/searx/utils.py @@ -20,6 +20,9 @@ def gen_useragent(): return ua.format(os=choice(ua_os), version=choice(ua_versions)) +def searx_useragent(): + return 'searx' + def highlight_content(content, query): if not content: