logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 4578575c284584a58cce0acd85f86bef2f49d77f
parent 47b4a85fe33cfbb1c83556bdf3c3a6cdfdb178ab
Author: a01200356 <a01200356@itesm.mx>
Date:   Wed, 23 Dec 2015 00:01:00 -0600

Wolfie kinda works using API

Diffstat:

Asearx/engines/wolframalpha.py60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/engines/wolframalpha_api.py70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/search.py2+-
Msearx/settings.yml6++++++
4 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/searx/engines/wolframalpha.py b/searx/engines/wolframalpha.py @@ -0,0 +1,60 @@ +""" + WolframAlpha + + @website http://www.wolframalpha.com/ + + @using-api yes + @results no c + @stable i guess so + @parse result +""" + +import wolframalpha + +# engine dependent config +paging = False + +# search-url +# url = 'http://www.wolframalpha.com/' +# search_url = url+'input/?{query}' + +client_id = '5952JX-X52L3VKWT8' +''' +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'i': query})) + print params + + return params + + +# get response from search-request +def response(resp): + print resp + + dom = html.fromstring(resp.text) + #resshit = dom.find_class('output pnt') + #for shit in resshit: + #print shit.text_content() + results = [] + #results.append({'url': 'https://wikipedia.org', 'title': 'Wolfie, lol', 'content': 'es kwatro'}) + #print results + #return results + + # parse results + for result in dom.xpath(results_xpath): + print result + + link = result.xpath(link_xpath)[0] + href = urljoin(url, link.attrib.get('href')) + title = escape(extract_text(link)) + content = escape(extract_text(result.xpath(content_xpath))) + + # append result + results.append({'url': href, + 'title': title, + 'content': content}) + + print results + return results +''' diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py @@ -0,0 +1,70 @@ +# Wolfram Alpha (Maths) +# +# @website http://www.wolframalpha.com +# @provide-api yes (http://api.wolframalpha.com/v2/) +# +# @using-api yes +# @results XML +# @stable yes +# @parse result + +from urllib import urlencode +from lxml import etree +from searx.engines.xpath import extract_text +from searx.utils import html_to_text + +# search-url +base_url = 'http://api.wolframalpha.com/v2/query' +search_url = base_url + '?appid={api_key}&{query}&format=plaintext' +site_url = 'http://wolframalpha.com/input/?{query}' + +#embedded_url = '<iframe width="540" height="304" ' +\ +# 'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\ +# 'frameborder="0" allowfullscreen></iframe>' + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'input': query}), + api_key=api_key) + + # need this for url in response + global my_query + my_query = query + + return params + +# replace private user area characters to make text legible +def replace_pua_chars(text): + pua_chars = { u'\uf74c': 'd', + u'\uf74d': u'\u212f', + u'\uf74e': 'i', + u'\uf7d9': '=' } + + for k, v in pua_chars.iteritems(): + text = text.replace(k, v) + + return text + +# get response from search-request +def response(resp): + results = [] + + search_results = etree.XML(resp.content) + + # return empty array if there are no results + if search_results.xpath('/queryresult[attribute::success="false"]'): + return [] + + # parse result + result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text + result = replace_pua_chars(result) + + # bind url from site + result_url = site_url.format(query=urlencode({'i': my_query})) + + # append result + results.append({'url': result_url, + 'title': result}) + + # return results + return results diff --git a/searx/search.py b/searx/search.py @@ -98,7 +98,7 @@ def make_callback(engine_name, callback, params, result_container): with threading.RLock(): engines[engine_name].stats['page_load_time'] += search_duration - timeout_overhead = 0.2 # seconds + timeout_overhead = 0.5 # seconds timeout_limit = engines[engine_name].timeout + timeout_overhead if search_duration > timeout_limit: diff --git a/searx/settings.yml b/searx/settings.yml @@ -300,6 +300,12 @@ engines: engine : vimeo shortcut : vm + - name : wolframalpha + shortcut : wa + engine : wolframalpha_api + api_key: '5952JX-X52L3VKWT8' + timeout: 6.0 + #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images # engine : blekko_images