wolframalpha_noapi.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. # WolframAlpha (Maths)
  2. #
  3. # @website http://www.wolframalpha.com/
  4. # @provide-api yes (http://api.wolframalpha.com/v2/)
  5. #
  6. # @using-api no
  7. # @results HTML
  8. # @stable no
  9. # @parse answer
  10. from cgi import escape
  11. from json import loads
  12. from time import time
  13. from urllib import urlencode
  14. from searx.poolrequests import get as http_get
  15. # search-url
  16. url = 'https://www.wolframalpha.com/'
  17. search_url = url + 'input/?{query}'
  18. search_url = url + 'input/json.jsp'\
  19. '?async=true'\
  20. '&banners=raw'\
  21. '&debuggingdata=false'\
  22. '&format=image,plaintext,imagemap,minput,moutput'\
  23. '&formattimeout=2'\
  24. '&{query}'\
  25. '&output=JSON'\
  26. '&parsetimeout=2'\
  27. '&proxycode={token}'\
  28. '&scantimeout=0.5'\
  29. '&sponsorcategories=true'\
  30. '&statemethod=deploybutton'
  31. # xpath variables
  32. scripts_xpath = '//script'
  33. title_xpath = '//title'
  34. failure_xpath = '//p[attribute::class="pfail"]'
  35. token = {'value': '',
  36. 'last_updated': None}
  37. # seems, wolframalpha resets its token in every hour
  38. def obtain_token():
  39. update_time = time() - (time() % 3600)
  40. token_response = http_get('https://www.wolframalpha.com/input/api/v1/code?ts=9999999999999999999', timeout=2.0)
  41. token['value'] = loads(token_response.text)['code']
  42. token['last_updated'] = update_time
  43. return token
  44. obtain_token()
  45. # do search-request
  46. def request(query, params):
  47. # obtain token if last update was more than an hour
  48. if time() - token['last_updated'] > 3600:
  49. obtain_token()
  50. params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
  51. params['headers']['Referer'] = 'https://www.wolframalpha.com/input/?i=' + query
  52. return params
  53. # get response from search-request
  54. def response(resp):
  55. resp_json = loads(resp.text)
  56. if not resp_json['queryresult']['success']:
  57. return []
  58. # TODO handle resp_json['queryresult']['assumptions']
  59. result_chunks = []
  60. for pod in resp_json['queryresult']['pods']:
  61. pod_title = pod.get('title', '')
  62. if 'subpods' not in pod:
  63. continue
  64. for subpod in pod['subpods']:
  65. if 'img' in subpod:
  66. result_chunks.append(u'<p>{0}<br /><img src="{1}" alt="{2}" /></p>'
  67. .format(escape(pod_title or subpod['img']['alt']),
  68. escape(subpod['img']['src']),
  69. escape(subpod['img']['alt'])))
  70. if not result_chunks:
  71. return []
  72. return [{'url': resp.request.headers['Referer'],
  73. 'title': 'Wolframalpha',
  74. 'content': ''.join(result_chunks)}]