logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 8f48c518aa049a37e6e7721cb17ceef92c519ca8
parent: 390ad59bfcd256e9145d7ef539acaf3a83a73c8b
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sun, 11 Sep 2016 00:29:05 +0200

Merge pull request #609 from LuccoJ/betterwolfram

Improving Wolfram Alpha search hit content

Diffstat:

Msearx/engines/wolframalpha_api.py16++++++++++++----
Msearx/engines/wolframalpha_noapi.py16++++++++++++----
Mtests/unit/engines/test_wolframalpha_api.py6++++--
Mtests/unit/engines/test_wolframalpha_noapi.py6++++--
4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py @@ -18,10 +18,10 @@ api_key = '' # defined in settings.yml # xpath variables failure_xpath = '/queryresult[attribute::success="false"]' -answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext' pods_xpath = '//pod' subpods_xpath = './subpod' +pod_primary_xpath = './@primary' pod_id_xpath = './@id' pod_title_xpath = './@title' plaintext_xpath = './plaintext' @@ -75,13 +75,15 @@ def response(resp): try: infobox_title = search_results.xpath(input_xpath)[0].text except: - infobox_title = None + infobox_title = "" pods = search_results.xpath(pods_xpath) result_chunks = [] + result_content = "" for pod in pods: pod_id = pod.xpath(pod_id_xpath)[0] pod_title = pod.xpath(pod_title_xpath)[0] + pod_is_result = pod.xpath(pod_primary_xpath) subpods = pod.xpath(subpods_xpath) if not subpods: @@ -94,6 +96,10 @@ def response(resp): if content and pod_id not in image_pods: + if pod_is_result or not result_content: + if pod_id != "Input": + result_content = "%s: %s" % (pod_title, content) + # if no input pod was found, title is first plaintext pod if not infobox_title: infobox_title = content @@ -109,6 +115,8 @@ def response(resp): if not result_chunks: return [] + title = "Wolfram|Alpha (%s)" % infobox_title + # append infobox results.append({'infobox': infobox_title, 'attributes': result_chunks, @@ -116,7 +124,7 @@ def response(resp): # append link to site results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': 'Wolfram|Alpha', - 'content': infobox_title}) + 'title': title, + 'content': result_content}) return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py @@ -8,9 +8,11 @@ # @stable no # @parse url, infobox +from cgi import escape from json import loads from time import time from urllib import urlencode +from lxml.etree import XML from searx.poolrequests import get as http_get @@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\ referer_url = url + 'input/?{query}' token = {'value': '', - 'last_updated': 0} + 'last_updated': None} # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images @@ -80,10 +82,12 @@ def response(resp): # TODO handle resp_json['queryresult']['assumptions'] result_chunks = [] - infobox_title = None + infobox_title = "" + result_content = "" for pod in resp_json['queryresult']['pods']: pod_id = pod.get('id', '') pod_title = pod.get('title', '') + pod_is_result = pod.get('primary', None) if 'subpods' not in pod: continue @@ -97,6 +101,10 @@ def response(resp): if subpod['plaintext'] != '(requires interactivity)': result_chunks.append({'label': pod_title, 'value': subpod['plaintext']}) + if pod_is_result or not result_content: + if pod_id != "Input": + result_content = pod_title + ': ' + subpod['plaintext'] + elif 'img' in subpod: result_chunks.append({'label': pod_title, 'image': subpod['img']}) @@ -108,7 +116,7 @@ def response(resp): 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': 'Wolfram|Alpha', - 'content': infobox_title}) + 'title': 'Wolfram|Alpha (' + infobox_title + ')', + 'content': result_content}) return results diff --git a/tests/unit/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py @@ -103,7 +103,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('Wolfram|Alpha', results[1]['title']) + self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title']) + self.assertIn('result_plaintext', results[1]['content']) # test calc xml = """<?xml version='1.0' encoding='UTF-8'?> @@ -161,4 +162,5 @@ class TestWolframAlphaAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('Wolfram|Alpha', results[1]['title']) + self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title']) + self.assertIn('integral_plaintext', results[1]['content']) diff --git a/tests/unit/engines/test_wolframalpha_noapi.py b/tests/unit/engines/test_wolframalpha_noapi.py @@ -140,7 +140,8 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('Wolfram|Alpha', results[1]['title']) + self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title']) + self.assertIn('result_plaintext', results[1]['content']) # test calc json = r""" @@ -219,4 +220,5 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(referer_url, results[0]['urls'][0]['url']) self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title']) self.assertEqual(referer_url, results[1]['url']) - self.assertEqual('Wolfram|Alpha', results[1]['title']) + self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title']) + self.assertIn('integral_plaintext', results[1]['content'])