commit: 8f48c518aa049a37e6e7721cb17ceef92c519ca8
parent: 390ad59bfcd256e9145d7ef539acaf3a83a73c8b
Author: Adam Tauber <asciimoo@gmail.com>
Date: Sun, 11 Sep 2016 00:29:05 +0200
Merge pull request #609 from LuccoJ/betterwolfram
Improving Wolfram Alpha search hit content
Diffstat:
4 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
@@ -18,10 +18,10 @@ api_key = '' # defined in settings.yml
# xpath variables
failure_xpath = '/queryresult[attribute::success="false"]'
-answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
pods_xpath = '//pod'
subpods_xpath = './subpod'
+pod_primary_xpath = './@primary'
pod_id_xpath = './@id'
pod_title_xpath = './@title'
plaintext_xpath = './plaintext'
@@ -75,13 +75,15 @@ def response(resp):
try:
infobox_title = search_results.xpath(input_xpath)[0].text
except:
- infobox_title = None
+ infobox_title = ""
pods = search_results.xpath(pods_xpath)
result_chunks = []
+ result_content = ""
for pod in pods:
pod_id = pod.xpath(pod_id_xpath)[0]
pod_title = pod.xpath(pod_title_xpath)[0]
+ pod_is_result = pod.xpath(pod_primary_xpath)
subpods = pod.xpath(subpods_xpath)
if not subpods:
@@ -94,6 +96,10 @@ def response(resp):
if content and pod_id not in image_pods:
+ if pod_is_result or not result_content:
+ if pod_id != "Input":
+ result_content = "%s: %s" % (pod_title, content)
+
# if no input pod was found, title is first plaintext pod
if not infobox_title:
infobox_title = content
@@ -109,6 +115,8 @@ def response(resp):
if not result_chunks:
return []
+ title = "Wolfram|Alpha (%s)" % infobox_title
+
# append infobox
results.append({'infobox': infobox_title,
'attributes': result_chunks,
@@ -116,7 +124,7 @@ def response(resp):
# append link to site
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
- 'title': 'Wolfram|Alpha',
- 'content': infobox_title})
+ 'title': title,
+ 'content': result_content})
return results
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
@@ -8,9 +8,11 @@
# @stable no
# @parse url, infobox
+from cgi import escape
from json import loads
from time import time
from urllib import urlencode
+from lxml.etree import XML
from searx.poolrequests import get as http_get
@@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\
referer_url = url + 'input/?{query}'
token = {'value': '',
- 'last_updated': 0}
+ 'last_updated': None}
# pods to display as image in infobox
# this pods do return a plaintext, but they look better and are more useful as images
@@ -80,10 +82,12 @@ def response(resp):
# TODO handle resp_json['queryresult']['assumptions']
result_chunks = []
- infobox_title = None
+ infobox_title = ""
+ result_content = ""
for pod in resp_json['queryresult']['pods']:
pod_id = pod.get('id', '')
pod_title = pod.get('title', '')
+ pod_is_result = pod.get('primary', None)
if 'subpods' not in pod:
continue
@@ -97,6 +101,10 @@ def response(resp):
if subpod['plaintext'] != '(requires interactivity)':
result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
+ if pod_is_result or not result_content:
+ if pod_id != "Input":
+ result_content = pod_title + ': ' + subpod['plaintext']
+
elif 'img' in subpod:
result_chunks.append({'label': pod_title, 'image': subpod['img']})
@@ -108,7 +116,7 @@ def response(resp):
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
- 'title': 'Wolfram|Alpha',
- 'content': infobox_title})
+ 'title': 'Wolfram|Alpha (' + infobox_title + ')',
+ 'content': result_content})
return results
diff --git a/tests/unit/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py
@@ -103,7 +103,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
self.assertEqual(referer_url, results[1]['url'])
- self.assertEqual('Wolfram|Alpha', results[1]['title'])
+ self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
+ self.assertIn('result_plaintext', results[1]['content'])
# test calc
xml = """<?xml version='1.0' encoding='UTF-8'?>
@@ -161,4 +162,5 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
self.assertEqual(referer_url, results[1]['url'])
- self.assertEqual('Wolfram|Alpha', results[1]['title'])
+ self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
+ self.assertIn('integral_plaintext', results[1]['content'])
diff --git a/tests/unit/engines/test_wolframalpha_noapi.py b/tests/unit/engines/test_wolframalpha_noapi.py
@@ -140,7 +140,8 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
self.assertEqual(referer_url, results[1]['url'])
- self.assertEqual('Wolfram|Alpha', results[1]['title'])
+ self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
+ self.assertIn('result_plaintext', results[1]['content'])
# test calc
json = r"""
@@ -219,4 +220,5 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
self.assertEqual(referer_url, results[1]['url'])
- self.assertEqual('Wolfram|Alpha', results[1]['title'])
+ self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
+ self.assertIn('integral_plaintext', results[1]['content'])