microsoft_academic.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. """
  2. Microsoft Academic (Science)
  3. @website https://academic.microsoft.com
  4. @provide-api yes
  5. @using-api no
  6. @results JSON
  7. @stable no
  8. @parse url, title, content
  9. """
  10. from datetime import datetime
  11. from json import loads
  12. from uuid import uuid4
  13. from searx.url_utils import urlencode
  14. from searx.utils import html_to_text
  15. categories = ['images']
  16. paging = True
  17. result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'
  18. def request(query, params):
  19. correlation_id = uuid4()
  20. msacademic = uuid4()
  21. time_now = datetime.now()
  22. params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id}))
  23. params['cookies']['msacademic'] = str(msacademic)
  24. params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now))
  25. params['method'] = 'POST'
  26. params['data'] = {
  27. 'Query': '@{query}@'.format(query=query),
  28. 'Limit': 10,
  29. 'Offset': params['pageno'] - 1,
  30. 'Filters': '',
  31. 'OrderBy': '',
  32. 'SortAscending': False,
  33. }
  34. return params
  35. def response(resp):
  36. results = []
  37. response_data = loads(resp.text)
  38. if not response_data:
  39. return results
  40. for result in response_data['results']:
  41. url = _get_url(result)
  42. title = result['e']['dn']
  43. content = _get_content(result)
  44. results.append({
  45. 'url': url,
  46. 'title': html_to_text(title),
  47. 'content': html_to_text(content),
  48. })
  49. return results
  50. def _get_url(result):
  51. if 's' in result['e']:
  52. return result['e']['s'][0]['u']
  53. return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id'])
  54. def _get_content(result):
  55. if 'd' in result['e']:
  56. content = result['e']['d']
  57. if len(content) > 300:
  58. return content[:300] + '...'
  59. return content
  60. return ''