logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git

microsoft_academic.py (1825B)


  1. """
  2. Microsoft Academic (Science)
  3. @website https://academic.microsoft.com
  4. @provide-api yes
  5. @using-api no
  6. @results JSON
  7. @stable no
  8. @parse url, title, content
  9. """
  10. from datetime import datetime
  11. from json import loads
  12. from uuid import uuid4
  13. from searx.url_utils import urlencode
  14. from searx.utils import html_to_text
  15. categories = ['images']
  16. paging = True
  17. result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'
  18. def request(query, params):
  19. correlation_id = uuid4()
  20. msacademic = uuid4()
  21. time_now = datetime.now()
  22. params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id}))
  23. params['cookies']['msacademic'] = str(msacademic)
  24. params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now))
  25. params['method'] = 'POST'
  26. params['data'] = {
  27. 'Query': '@{query}@'.format(query=query),
  28. 'Limit': 10,
  29. 'Offset': params['pageno'] - 1,
  30. 'Filters': '',
  31. 'OrderBy': '',
  32. 'SortAscending': False,
  33. }
  34. return params
  35. def response(resp):
  36. results = []
  37. response_data = loads(resp.text)
  38. for result in response_data['results']:
  39. url = _get_url(result)
  40. title = result['e']['dn']
  41. content = _get_content(result)
  42. results.append({
  43. 'url': url,
  44. 'title': html_to_text(title),
  45. 'content': html_to_text(content),
  46. })
  47. return results
  48. def _get_url(result):
  49. if 's' in result['e']:
  50. return result['e']['s'][0]['u']
  51. return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id'])
  52. def _get_content(result):
  53. if 'd' in result['e']:
  54. content = result['e']['d']
  55. if len(content) > 300:
  56. return content[:300] + '...'
  57. return content
  58. return ''