wolframalpha_noapi.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. # Wolfram|Alpha (Science)
  2. #
  3. # @website https://www.wolframalpha.com/
  4. # @provide-api yes (https://api.wolframalpha.com/v2/)
  5. #
  6. # @using-api no
  7. # @results JSON
  8. # @stable no
  9. # @parse url, infobox
  10. from cgi import escape
  11. from json import loads
  12. from time import time
  13. from urllib import urlencode
  14. from lxml.etree import XML
  15. from searx.poolrequests import get as http_get
  16. # search-url
  17. url = 'https://www.wolframalpha.com/'
  18. search_url = url + 'input/json.jsp'\
  19. '?async=false'\
  20. '&banners=raw'\
  21. '&debuggingdata=false'\
  22. '&format=image,plaintext,imagemap,minput,moutput'\
  23. '&formattimeout=2'\
  24. '&{query}'\
  25. '&output=JSON'\
  26. '&parsetimeout=2'\
  27. '&proxycode={token}'\
  28. '&scantimeout=0.5'\
  29. '&sponsorcategories=true'\
  30. '&statemethod=deploybutton'
  31. referer_url = url + 'input/?{query}'
  32. token = {'value': '',
  33. 'last_updated': None}
  34. # pods to display as image in infobox
  35. # this pods do return a plaintext, but they look better and are more useful as images
  36. image_pods = {'VisualRepresentation',
  37. 'Illustration',
  38. 'Symbol'}
  39. # seems, wolframalpha resets its token in every hour
  40. def obtain_token():
  41. update_time = time() - (time() % 3600)
  42. try:
  43. token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
  44. token['value'] = loads(token_response.text)['code']
  45. token['last_updated'] = update_time
  46. except:
  47. pass
  48. return token
  49. obtain_token()
  50. # do search-request
  51. def request(query, params):
  52. # obtain token if last update was more than an hour
  53. if time() - token['last_updated'] > 3600:
  54. obtain_token()
  55. params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
  56. params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
  57. return params
  58. # get response from search-request
  59. def response(resp):
  60. results = []
  61. resp_json = loads(resp.text)
  62. if not resp_json['queryresult']['success']:
  63. return []
  64. # TODO handle resp_json['queryresult']['assumptions']
  65. result_chunks = []
  66. infobox_title = ""
  67. result_content = ""
  68. for pod in resp_json['queryresult']['pods']:
  69. pod_id = pod.get('id', '')
  70. pod_title = pod.get('title', '')
  71. pod_is_result = pod.get('primary', None)
  72. if 'subpods' not in pod:
  73. continue
  74. if pod_id == 'Input' or not infobox_title:
  75. infobox_title = pod['subpods'][0]['plaintext']
  76. for subpod in pod['subpods']:
  77. if subpod['plaintext'] != '' and pod_id not in image_pods:
  78. # append unless it's not an actual answer
  79. if subpod['plaintext'] != '(requires interactivity)':
  80. result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
  81. if pod_is_result or not result_content:
  82. if pod_id != "Input":
  83. result_content = pod_title + ': ' + subpod['plaintext']
  84. elif 'img' in subpod:
  85. result_chunks.append({'label': pod_title, 'image': subpod['img']})
  86. if not result_chunks:
  87. return []
  88. results.append({'infobox': infobox_title,
  89. 'attributes': result_chunks,
  90. 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
  91. results.append({'url': resp.request.headers['Referer'].decode('utf8'),
  92. 'title': 'Wolfram|Alpha (' + infobox_title + ')',
  93. 'content': result_content})
  94. return results