wolframalpha_noapi.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. # WolframAlpha (Maths)
  2. #
  3. # @website http://www.wolframalpha.com/
  4. # @provide-api yes (http://api.wolframalpha.com/v2/)
  5. #
  6. # @using-api no
  7. # @results HTML
  8. # @stable no
  9. # @parse answer
  10. from cgi import escape
  11. from json import loads
  12. from time import time
  13. from urllib import urlencode
  14. from searx.poolrequests import get as http_get
  15. # search-url
  16. url = 'https://www.wolframalpha.com/'
  17. search_url = url + 'input/?{query}'
  18. search_url = url + 'input/json.jsp'\
  19. '?async=true'\
  20. '&banners=raw'\
  21. '&debuggingdata=false'\
  22. '&format=image,plaintext,imagemap,minput,moutput'\
  23. '&formattimeout=2'\
  24. '&{query}'\
  25. '&output=JSON'\
  26. '&parsetimeout=2'\
  27. '&proxycode={token}'\
  28. '&scantimeout=0.5'\
  29. '&sponsorcategories=true'\
  30. '&statemethod=deploybutton'
  31. # xpath variables
  32. scripts_xpath = '//script'
  33. title_xpath = '//title'
  34. failure_xpath = '//p[attribute::class="pfail"]'
  35. token = {'value': '',
  36. 'last_updated': None}
  37. # seems, wolframalpha resets its token in every hour
  38. def obtain_token():
  39. update_time = time() - (time() % 3600)
  40. try:
  41. token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
  42. token['value'] = loads(token_response.text)['code']
  43. token['last_updated'] = update_time
  44. except:
  45. pass
  46. return token
  47. obtain_token()
  48. # do search-request
  49. def request(query, params):
  50. # obtain token if last update was more than an hour
  51. if time() - token['last_updated'] > 3600:
  52. obtain_token()
  53. params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
  54. params['headers']['Referer'] = 'https://www.wolframalpha.com/input/?i=' + query
  55. return params
  56. # get response from search-request
  57. def response(resp):
  58. resp_json = loads(resp.text)
  59. if not resp_json['queryresult']['success']:
  60. return []
  61. # TODO handle resp_json['queryresult']['assumptions']
  62. result_chunks = []
  63. for pod in resp_json['queryresult']['pods']:
  64. pod_title = pod.get('title', '')
  65. if 'subpods' not in pod:
  66. continue
  67. for subpod in pod['subpods']:
  68. if 'img' in subpod:
  69. result_chunks.append(u'<p>{0}<br /><img src="{1}" alt="{2}" /></p>'
  70. .format(escape(pod_title or subpod['img']['alt']),
  71. escape(subpod['img']['src']),
  72. escape(subpod['img']['alt'])))
  73. if not result_chunks:
  74. return []
  75. return [{'url': resp.request.headers['Referer'],
  76. 'title': 'Wolframalpha',
  77. 'content': ''.join(result_chunks)}]