commit: d827fc49a11b6f84bba3d006b54a70a6a05757fd
parent b51ba32f619e6b7a927444475b0ee986d4d13a60
Author: a01200356 <a01200356@itesm.mx>
Date: Tue, 29 Dec 2015 21:11:49 -0600
Remove unnecessary code in wolframalpha_noapi engine
The answer is scraped from a js function, so parsing the html tree
doesn't achieve anything here.
Diffstat:
1 file changed, 18 insertions(+), 31 deletions(-)
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
@@ -10,8 +10,6 @@
import re
import json
from urllib import urlencode
-from lxml import html
-from searx.engines.xpath import extract_text
# search-url
url = 'http://www.wolframalpha.com/'
@@ -25,42 +23,31 @@ def request(query, params):
return params
-# tries to find answer under the pattern given
-def extract_answer(script_list, pattern):
- answer = None
+# get response from search-request
+def response(resp):
+ results = []
+
+ # the answer is inside a js function
+ # answer can be located in different 'pods', although by default it should be in pod_0200
+ possible_locations = ['pod_0200\.push(.*)\n',
+ 'pod_0100\.push(.*)\n']
# get line that matches the pattern
- for script in script_list:
+ for pattern in possible_locations:
try:
- line = re.search(pattern, script.text_content()).group(1)
+ line = re.search(pattern, resp.text).group(1)
+ break
except AttributeError:
continue
- # extract answer from json
- answer = line[line.find('{') : line.rfind('}')+1]
- answer = json.loads(answer.encode('unicode-escape'))
- answer = answer['stringified'].decode('unicode-escape')
-
- return answer
-
+ if not line:
+ return results
-# get response from search-request
-def response(resp):
-
- dom = html.fromstring(resp.text)
-
- # the answer is inside a js script
- scripts = dom.xpath('//script')
+ # extract answer from json
+ answer = line[line.find('{') : line.rfind('}')+1]
+ answer = json.loads(answer.encode('unicode-escape'))
+ answer = answer['stringified'].decode('unicode-escape')
- results = []
-
- # answer can be located in different 'pods', although by default it should be in pod_0200
- answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
- if not answer:
- answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
- if answer:
- results.append({'answer': answer})
- else:
- results.append({'answer': answer})
+ results.append({'answer': answer})
return results