commit: b5a3dfca60f23bac10ade068c40729f030bbad63
parent: 09b7673fbd271349b6878959bd2e1ae846981e13
Author: Adam Tauber <asciimoo@gmail.com>
Date: Tue, 19 Jan 2016 17:02:14 +0100
Merge pull request #486 from a01200356/master
[enh] WolframAlpha no API engine (and tests for both)
Diffstat:
5 files changed, 621 insertions(+), 16 deletions(-)
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
@@ -10,11 +10,18 @@
from urllib import urlencode
from lxml import etree
+from re import search
# search-url
base_url = 'http://api.wolframalpha.com/v2/query'
search_url = base_url + '?appid={api_key}&{query}&format=plaintext'
-api_key = ''
+site_url = 'http://www.wolframalpha.com/input/?{query}'
+api_key = '' # defined in settings.yml
+
+# xpath variables
+failure_xpath = '/queryresult[attribute::success="false"]'
+answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
+input_xpath = '//pod[starts-with(attribute::title, "Input")]/subpod/plaintext'
# do search-request
@@ -45,16 +52,26 @@ def response(resp):
search_results = etree.XML(resp.content)
# return empty array if there are no results
- if search_results.xpath('/queryresult[attribute::success="false"]'):
+ if search_results.xpath(failure_xpath):
return []
- # parse result
- result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text
- result = replace_pua_chars(result)
+ # parse answers
+ answers = search_results.xpath(answer_xpath)
+ if answers:
+ for answer in answers:
+ answer = replace_pua_chars(answer.text)
+
+ results.append({'answer': answer})
+
+ # if there's no input section in search_results, check if answer has the input embedded (before their "=" sign)
+ try:
+ query_input = search_results.xpath(input_xpath)[0].text
+ except IndexError:
+ query_input = search(u'([^\uf7d9]+)', answers[0].text).group(1)
- # append result
- # TODO: shouldn't it bind the source too?
- results.append({'answer': result})
+ # append link to site
+ result_url = site_url.format(query=urlencode({'i': query_input.encode('utf-8')}))
+ results.append({'url': result_url,
+ 'title': query_input + " - Wolfram|Alpha"})
- # return results
return results
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
@@ -0,0 +1,86 @@
+# WolframAlpha (Maths)
+#
+# @website http://www.wolframalpha.com/
+# @provide-api yes (http://api.wolframalpha.com/v2/)
+#
+# @using-api no
+# @results HTML
+# @stable no
+# @parse answer
+
+from re import search, sub
+from json import loads
+from urllib import urlencode
+from lxml import html
+import HTMLParser
+
+# search-url
+url = 'http://www.wolframalpha.com/'
+search_url = url + 'input/?{query}'
+
+# xpath variables
+scripts_xpath = '//script'
+title_xpath = '//title'
+failure_xpath = '//p[attribute::class="pfail"]'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'i': query}))
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+ line = None
+
+ dom = html.fromstring(resp.text)
+ scripts = dom.xpath(scripts_xpath)
+
+ # the answer is inside a js function
+ # answer can be located in different 'pods', although by default it should be in pod_0200
+ possible_locations = ['pod_0200\.push\((.*)',
+ 'pod_0100\.push\((.*)']
+
+ # failed result
+ if dom.xpath(failure_xpath):
+ return results
+
+ # get line that matches the pattern
+ for pattern in possible_locations:
+ for script in scripts:
+ try:
+ line = search(pattern, script.text_content()).group(1)
+ break
+ except AttributeError:
+ continue
+ if line:
+ break
+
+ if line:
+ # extract answer from json
+ answer = line[line.find('{'):line.rfind('}') + 1]
+ try:
+ answer = loads(answer)
+ except Exception:
+ answer = loads(answer.encode('unicode-escape'))
+ answer = answer['stringified']
+
+ # clean plaintext answer
+ h = HTMLParser.HTMLParser()
+ answer = h.unescape(answer.decode('unicode-escape'))
+ answer = sub(r'\\', '', answer)
+
+ results.append({'answer': answer})
+
+ # user input is in first part of title
+ title = dom.xpath(title_xpath)[0].text.encode('utf-8')
+ result_url = request(title[:-16], {})['url']
+
+ # append result
+ results.append({'url': result_url,
+ 'title': title.decode('utf-8')})
+
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -300,13 +300,15 @@ engines:
engine : vimeo
shortcut : vm
-# You can use the engine using the official stable API, but you need an API key
-# See : http://products.wolframalpha.com/api/
-# - name : wolframalpha
-# shortcut : wa
-# engine : wolframalpha_api
-# api_key: 'apikey' # required!
-# timeout: 6.0
+ - name : wolframalpha
+ shortcut : wa
+ # You can use the engine using the official stable API, but you need an API key
+ # See : http://products.wolframalpha.com/api/
+ # engine : wolframalpha_api
+ # api_key: 'apikey' # required!
+ engine : wolframalpha_noapi
+ timeout: 6.0
+ disabled : True
#The blekko technology and team have joined IBM Watson! -> https://blekko.com/
# - name : blekko images
diff --git a/tests/unit/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py
@@ -0,0 +1,307 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import wolframalpha_api
+from searx.testing import SearxTestCase
+
+
+class TestWolframAlphaAPIEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ api_key = 'XXXXXX-XXXXXXXXXX'
+ dicto = defaultdict(dict)
+ dicto['api_key'] = api_key
+ params = wolframalpha_api.request(query, dicto)
+
+ self.assertIn('url', params)
+ self.assertIn(query, params['url'])
+ self.assertIn('wolframalpha.com', params['url'])
+
+ self.assertIn('api_key', params)
+ self.assertIn(api_key, params['api_key'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, wolframalpha_api.response, None)
+ self.assertRaises(AttributeError, wolframalpha_api.response, [])
+ self.assertRaises(AttributeError, wolframalpha_api.response, '')
+ self.assertRaises(AttributeError, wolframalpha_api.response, '[]')
+
+ xml = '''<?xml version='1.0' encoding='UTF-8'?>
+ <queryresult success='false' error='false' />
+ '''
+ # test failure
+ response = mock.Mock(content=xml)
+ self.assertEqual(wolframalpha_api.response(response), [])
+
+ xml = """<?xml version='1.0' encoding='UTF-8'?>
+ <queryresult success='true'
+ error='false'
+ numpods='6'
+ datatypes=''
+ timedout=''
+ timedoutpods=''
+ timing='0.684'
+ parsetiming='0.138'
+ parsetimedout='false'
+ recalculate=''
+ id='MSPa416020a7966dachc463600000f9c66cc21444cfg'
+ host='http://www3.wolframalpha.com'
+ server='6'
+ related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?...'
+ version='2.6'>
+ <pod title='Input'
+ scanner='Identity'
+ id='Input'
+ position='100'
+ error='false'
+ numsubpods='1'>
+ <subpod title=''>
+ <plaintext>sqrt(-1)</plaintext>
+ </subpod>
+ </pod>
+ <pod title='Result'
+ scanner='Simplification'
+ id='Result'
+ position='200'
+ error='false'
+ numsubpods='1'
+ primary='true'>
+ <subpod title=''>
+ <plaintext></plaintext>
+ </subpod>
+ <states count='1'>
+ <state name='Step-by-step solution'
+ input='Result__Step-by-step solution' />
+ </states>
+ </pod>
+ <pod title='Polar coordinates'
+ scanner='Numeric'
+ id='PolarCoordinates'
+ position='300'
+ error='false'
+ numsubpods='1'>
+ <subpod title=''>
+ <plaintext>r1 (radius), θ90° (angle)</plaintext>
+ </subpod>
+ </pod>
+ <pod title='Position in the complex plane'
+ scanner='Numeric'
+ id='PositionInTheComplexPlane'
+ position='400'
+ error='false'
+ numsubpods='1'>
+ <subpod title=''>
+ <plaintext></plaintext>
+ </subpod>
+ </pod>
+ <pod title='All 2nd roots of -1'
+ scanner='RootsOfUnity'
+ id=''
+ position='500'
+ error='false'
+ numsubpods='2'>
+ <subpod title=''>
+ <plaintext> (principal root)</plaintext>
+ </subpod>
+ <subpod title=''>
+ <plaintext>-</plaintext>
+ </subpod>
+ </pod>
+ <pod title='Plot of all roots in the complex plane'
+ scanner='RootsOfUnity'
+ id='PlotOfAllRootsInTheComplexPlane'
+ position='600'
+ error='false'
+ numsubpods='1'>
+ <subpod title=''>
+ <plaintext></plaintext>
+ </subpod>
+ </pod>
+ </queryresult>
+ """
+ # test private user area char in response
+ response = mock.Mock(content=xml)
+ results = wolframalpha_api.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 2)
+ self.assertIn('i', results[0]['answer'])
+ self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title'])
+ self.assertEquals('http://www.wolframalpha.com/input/?i=sqrt%28-1%29', results[1]['url'])
+
+ xml = """<?xml version='1.0' encoding='UTF-8'?>
+ <queryresult success='true'
+ error='false'
+ numpods='2'
+ datatypes=''
+ timedout=''
+ timedoutpods=''
+ timing='1.286'
+ parsetiming='0.255'
+ parsetimedout='false'
+ recalculate=''
+ id='MSPa195222ad740ede5214h30000480ca61h003d3gd6'
+ host='http://www3.wolframalpha.com'
+ server='20'
+ related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?id=...'
+ version='2.6'>
+ <pod title='Indefinite integral'
+ scanner='Integral'
+ id='IndefiniteIntegral'
+ position='100'
+ error='false'
+ numsubpods='1'
+ primary='true'>
+ <subpod title=''>
+ <plaintext>∫1/xxlog(x)+constant</plaintext>
+ </subpod>
+ <states count='1'>
+ <state name='Step-by-step solution'
+ input='IndefiniteIntegral__Step-by-step solution' />
+ </states>
+ <infos count='1'>
+ <info text='log(x) is the natural logarithm'>
+ <link url='http://reference.wolfram.com/mathematica/ref/Log.html'
+ text='Documentation'
+ title='Mathematica' />
+ <link url='http://functions.wolfram.com/ElementaryFunctions/Log'
+ text='Properties'
+ title='Wolfram Functions Site' />
+ <link url='http://mathworld.wolfram.com/NaturalLogarithm.html'
+ text='Definition'
+ title='MathWorld' />
+ </info>
+ </infos>
+ </pod>
+ <pod title='Plots of the integral'
+ scanner='Integral'
+ id='Plot'
+ position='200'
+ error='false'
+ numsubpods='2'>
+ <subpod title=''>
+ <plaintext></plaintext>
+ <states count='1'>
+ <statelist count='2'
+ value='Complex-valued plot'
+ delimiters=''>
+ <state name='Complex-valued plot'
+ input='Plot__1_Complex-valued plot' />
+ <state name='Real-valued plot'
+ input='Plot__1_Real-valued plot' />
+ </statelist>
+ </states>
+ </subpod>
+ <subpod title=''>
+ <plaintext></plaintext>
+ <states count='1'>
+ <statelist count='2'
+ value='Complex-valued plot'
+ delimiters=''>
+ <state name='Complex-valued plot'
+ input='Plot__2_Complex-valued plot' />
+ <state name='Real-valued plot'
+ input='Plot__2_Real-valued plot' />
+ </statelist>
+ </states>
+ </subpod>
+ </pod>
+ <assumptions count='1'>
+ <assumption type='Clash'
+ word='integral'
+ template='Assuming "${word}" is ${desc1}. Use as ${desc2} instead'
+ count='2'>
+ <value name='IntegralsWord'
+ desc='an integral'
+ input='*C.integral-_*IntegralsWord-' />
+ <value name='MathematicalFunctionIdentityPropertyClass'
+ desc='a function property'
+ input='*C.integral-_*MathematicalFunctionIdentityPropertyClass-' />
+ </assumption>
+ </assumptions>
+ </queryresult>
+ """
+ # test integral
+ response = mock.Mock(content=xml)
+ results = wolframalpha_api.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 2)
+ self.assertIn('log(x)+c', results[0]['answer'])
+ self.assertIn('∫1/xx - Wolfram|Alpha'.decode('utf-8'), results[1]['title'])
+ self.assertEquals('http://www.wolframalpha.com/input/?i=%E2%88%AB1%2Fx%EF%9D%8Cx', results[1]['url'])
+
+ xml = """<?xml version='1.0' encoding='UTF-8'?>
+ <queryresult success='true'
+ error='false'
+ numpods='4'
+ datatypes='Solve'
+ timedout=''
+ timedoutpods=''
+ timing='0.79'
+ parsetiming='0.338'
+ parsetimedout='false'
+ recalculate=''
+ id='MSPa7481f7i06d25h3deh2900004810i3a78d9b4fdc'
+ host='http://www5b.wolframalpha.com'
+ server='23'
+ related='http://www5b.wolframalpha.com/api/v2/relatedQueries.jsp?id=...'
+ version='2.6'>
+ <pod title='Input interpretation'
+ scanner='Identity'
+ id='Input'
+ position='100'
+ error='false'
+ numsubpods='1'>
+ <subpod title=''>
+ <plaintext>solve x^2+x0</plaintext>
+ </subpod>
+ </pod>
+ <pod title='Results'
+ scanner='Solve'
+ id='Result'
+ position='200'
+ error='false'
+ numsubpods='2'
+ primary='true'>
+ <subpod title=''>
+ <plaintext>x-1</plaintext>
+ </subpod>
+ <subpod title=''>
+ <plaintext>x0</plaintext>
+ </subpod>
+ <states count='1'>
+ <state name='Step-by-step solution'
+ input='Result__Step-by-step solution' />
+ </states>
+ </pod>
+ <pod title='Root plot'
+ scanner='Solve'
+ id='RootPlot'
+ position='300'
+ error='false'
+ numsubpods='1'>
+ <subpod title=''>
+ <plaintext></plaintext>
+ </subpod>
+ </pod>
+ <pod title='Number line'
+ scanner='Solve'
+ id='NumberLine'
+ position='400'
+ error='false'
+ numsubpods='1'>
+ <subpod title=''>
+ <plaintext></plaintext>
+ </subpod>
+ </pod>
+ </queryresult>
+ """
+ # test ecuation with multiple answers
+ response = mock.Mock(content=xml)
+ results = wolframalpha_api.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 3)
+ self.assertIn('x=-1', results[0]['answer'])
+ self.assertIn('x=0', results[1]['answer'])
+ self.assertIn('solve x^2+x0 - Wolfram|Alpha'.decode('utf-8'), results[2]['title'])
+ self.assertEquals('http://www.wolframalpha.com/input/?i=solve+x%5E2%2Bx%EF%9F%990', results[2]['url'])
diff --git a/tests/unit/engines/test_wolframalpha_noapi.py b/tests/unit/engines/test_wolframalpha_noapi.py
@@ -0,0 +1,193 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import wolframalpha_noapi
+from searx.testing import SearxTestCase
+
+
+class TestWolframAlphaNoAPIEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 1
+ params = wolframalpha_noapi.request(query, dicto)
+ self.assertIn('url', params)
+ self.assertIn(query, params['url'])
+ self.assertIn('wolframalpha.com', params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, wolframalpha_noapi.response, None)
+ self.assertRaises(AttributeError, wolframalpha_noapi.response, [])
+ self.assertRaises(AttributeError, wolframalpha_noapi.response, '')
+ self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]')
+
+ html = """
+ <!DOCTYPE html>
+ <title> Parangaricutirimícuaro - Wolfram|Alpha</title>
+ <meta charset="utf-8" />
+ <body>
+ <div id="closest">
+ <p class="pfail">Wolfram|Alpha doesn't know how to interpret your input.</p>
+ <div id="dtips">
+ <div class="tip">
+ <span class="tip-title">Tip: </span>
+ Check your spelling, and use English
+ <span class="tip-extra"></span>
+ </div>
+ </div>
+ </div>
+ </body>
+ </html>
+ """
+ # test failed query
+ response = mock.Mock(text=html)
+ self.assertEqual(wolframalpha_noapi.response(response), [])
+
+ html = """
+ <!DOCTYPE html>
+ <title> sqrt(-1) - Wolfram|Alpha</title>
+ <meta charset="utf-8" />
+ <body>
+ <script type="text/javascript">
+ try {
+ if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+ context.jsonArray.popups.pod_0100 = [];
+ }
+ context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": ""});
+ } catch(e) { }
+
+ try {
+ if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
+ context.jsonArray.popups.pod_0200 = [];
+ }
+ context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": ""});
+ } catch(e) { }
+ </script>
+ </body>
+ </html>
+ """
+ # test plaintext
+ response = mock.Mock(text=html)
+ results = wolframalpha_noapi.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 2)
+ self.assertEquals('i', results[0]['answer'])
+ self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title'])
+ self.assertEquals('http://www.wolframalpha.com/input/?i=+sqrt%28-1%29', results[1]['url'])
+
+ html = """
+ <!DOCTYPE html>
+ <title> integral 1/x - Wolfram|Alpha</title>
+ <meta charset="utf-8" />
+ <body>
+ <script type="text/javascript">
+ try {
+ if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+ context.jsonArray.popups.pod_0100 = [];
+ }
+ context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"});
+ } catch(e) { }
+ </script>
+ </body>
+ </html>
+ """
+ # test integral
+ response = mock.Mock(text=html)
+ results = wolframalpha_noapi.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 2)
+ self.assertIn('log(x)+c', results[0]['answer'])
+ self.assertIn('integral 1/x - Wolfram|Alpha', results[1]['title'])
+ self.assertEquals('http://www.wolframalpha.com/input/?i=+integral+1%2Fx', results[1]['url'])
+
+ html = """
+ <!DOCTYPE html>
+ <title> ∫1/x x - Wolfram|Alpha</title>
+ <meta charset="utf-8" />
+ <body>
+ <script type="text/javascript">
+ try {
+ if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+ context.jsonArray.popups.pod_0100 = [];
+ }
+ context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"});
+ } catch(e) { }
+ </script>
+ </body>
+ </html>
+ """
+ # test input in mathematical notation
+ response = mock.Mock(text=html)
+ results = wolframalpha_noapi.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 2)
+ self.assertIn('log(x)+c', results[0]['answer'])
+ self.assertIn('∫1/x x - Wolfram|Alpha'.decode('utf-8'), results[1]['title'])
+ self.assertEquals('http://www.wolframalpha.com/input/?i=+%E2%88%AB1%2Fx+%EF%9D%8Cx', results[1]['url'])
+
+ html = """
+ <!DOCTYPE html>
+ <title> 1 euro to yen - Wolfram|Alpha</title>
+ <meta charset="utf-8" />
+ <body>
+ <script type="text/javascript">
+ try {
+ if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+ context.jsonArray.popups.pod_0100 = [];
+ }
+ context.jsonArray.popups.pod_0100.push( {"stringified": "convert euro1 (euro) to Japanese yen"});
+ } catch(e) { }
+
+ try {
+ if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
+ context.jsonArray.popups.pod_0200 = [];
+ }
+ context.jsonArray.popups.pod_0200.push( {"stringified": "¥130.5 (Japanese yen)"});
+ } catch(e) { }
+ </script>
+ </body>
+ </html>
+ """
+ # test output with htmlentity
+ response = mock.Mock(text=html)
+ results = wolframalpha_noapi.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 2)
+ self.assertIn('¥'.decode('utf-8'), results[0]['answer'])
+ self.assertIn('1 euro to yen - Wolfram|Alpha', results[1]['title'])
+ self.assertEquals('http://www.wolframalpha.com/input/?i=+1+euro+to+yen', results[1]['url'])
+
+ html = """
+ <!DOCTYPE html>
+ <title> distance from nairobi to kyoto in inches - Wolfram|Alpha</title>
+ <meta charset="utf-8" />
+ <body>
+ <script type="text/javascript">
+ try {
+ if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+ context.jsonArray.popups.pod_0100 = [];
+ }
+[...].pod_0100.push( {"stringified": "convert distance | from | Nairobi, Kenya\nto | Kyoto, Japan to inches"});
+ } catch(e) { }
+
+ try {
+ if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
+ context.jsonArray.popups.pod_0200 = [];
+ }
+pod_0200.push({"stringified": "4.295×10^8 inches","mOutput": "Quantity[4.295×10^8,&quot;Inches&quot;]"});
+
+ } catch(e) { }
+ </script>
+ </body>
+ </html>
+ """
+ # test output with utf-8 character
+ response = mock.Mock(text=html)
+ results = wolframalpha_noapi.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 2)
+ self.assertIn('4.295×10^8 inches'.decode('utf-8'), results[0]['answer'])
+ self.assertIn('distance from nairobi to kyoto in inches - Wolfram|Alpha', results[1]['title'])
+ self.assertEquals('http://www.wolframalpha.com/input/?i=+distance+from+nairobi+to+kyoto+in+inches',
+ results[1]['url'])