logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git

wolframalpha_noapi.py (3581B)


  1. # Wolfram|Alpha (Science)
  2. #
  3. # @website https://www.wolframalpha.com/
  4. # @provide-api yes (https://api.wolframalpha.com/v2/)
  5. #
  6. # @using-api no
  7. # @results JSON
  8. # @stable no
  9. # @parse url, infobox
  10. from json import loads
  11. from time import time
  12. from searx.poolrequests import get as http_get
  13. from searx.url_utils import urlencode
  14. # search-url
  15. url = 'https://www.wolframalpha.com/'
  16. search_url = url + 'input/json.jsp'\
  17. '?async=false'\
  18. '&banners=raw'\
  19. '&debuggingdata=false'\
  20. '&format=image,plaintext,imagemap,minput,moutput'\
  21. '&formattimeout=2'\
  22. '&{query}'\
  23. '&output=JSON'\
  24. '&parsetimeout=2'\
  25. '&proxycode={token}'\
  26. '&scantimeout=0.5'\
  27. '&sponsorcategories=true'\
  28. '&statemethod=deploybutton'
  29. referer_url = url + 'input/?{query}'
  30. token = {'value': '',
  31. 'last_updated': None}
  32. # pods to display as image in infobox
  33. # this pods do return a plaintext, but they look better and are more useful as images
  34. image_pods = {'VisualRepresentation',
  35. 'Illustration',
  36. 'Symbol'}
  37. # seems, wolframalpha resets its token in every hour
  38. def obtain_token():
  39. update_time = time() - (time() % 3600)
  40. try:
  41. token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
  42. token['value'] = loads(token_response.text)['code']
  43. token['last_updated'] = update_time
  44. except:
  45. pass
  46. return token
  47. def init():
  48. obtain_token()
  49. # do search-request
  50. def request(query, params):
  51. # obtain token if last update was more than an hour
  52. if time() - (token['last_updated'] or 0) > 3600:
  53. obtain_token()
  54. params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
  55. params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
  56. return params
  57. # get response from search-request
  58. def response(resp):
  59. results = []
  60. resp_json = loads(resp.text)
  61. if not resp_json['queryresult']['success']:
  62. return []
  63. # TODO handle resp_json['queryresult']['assumptions']
  64. result_chunks = []
  65. infobox_title = ""
  66. result_content = ""
  67. for pod in resp_json['queryresult']['pods']:
  68. pod_id = pod.get('id', '')
  69. pod_title = pod.get('title', '')
  70. pod_is_result = pod.get('primary', None)
  71. if 'subpods' not in pod:
  72. continue
  73. if pod_id == 'Input' or not infobox_title:
  74. infobox_title = pod['subpods'][0]['plaintext']
  75. for subpod in pod['subpods']:
  76. if subpod['plaintext'] != '' and pod_id not in image_pods:
  77. # append unless it's not an actual answer
  78. if subpod['plaintext'] != '(requires interactivity)':
  79. result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
  80. if pod_is_result or not result_content:
  81. if pod_id != "Input":
  82. result_content = pod_title + ': ' + subpod['plaintext']
  83. elif 'img' in subpod:
  84. result_chunks.append({'label': pod_title, 'image': subpod['img']})
  85. if not result_chunks:
  86. return []
  87. results.append({'infobox': infobox_title,
  88. 'attributes': result_chunks,
  89. 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
  90. results.append({'url': resp.request.headers['Referer'],
  91. 'title': 'Wolfram|Alpha (' + infobox_title + ')',
  92. 'content': result_content})
  93. return results