wolframalpha_noapi.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # WolframAlpha (Maths)
  2. #
  3. # @website http://www.wolframalpha.com/
  4. # @provide-api yes (http://api.wolframalpha.com/v2/)
  5. #
  6. # @using-api no
  7. # @results HTML
  8. # @stable no
  9. # @parse answer
  10. from re import search, sub
  11. from json import loads
  12. from urllib import urlencode
  13. from lxml import html
  14. # search-url
  15. url = 'http://www.wolframalpha.com/'
  16. search_url = url+'input/?{query}'
  17. # xpath variables
  18. scripts_xpath = '//script'
  19. title_xpath = '//title'
  20. failure_xpath = '//p[attribute::class="pfail"]'
  21. # do search-request
  22. def request(query, params):
  23. params['url'] = search_url.format(query=urlencode({'i': query}))
  24. return params
  25. # get response from search-request
  26. def response(resp):
  27. results = []
  28. line = None
  29. dom = html.fromstring(resp.text)
  30. scripts = dom.xpath(scripts_xpath)
  31. # the answer is inside a js function
  32. # answer can be located in different 'pods', although by default it should be in pod_0200
  33. possible_locations = ['pod_0200\.push(.*)\n',
  34. 'pod_0100\.push(.*)\n']
  35. # failed result
  36. if dom.xpath(failure_xpath):
  37. return results
  38. # get line that matches the pattern
  39. for pattern in possible_locations:
  40. for script in scripts:
  41. try:
  42. line = search(pattern, script.text_content()).group(1)
  43. break
  44. except AttributeError:
  45. continue
  46. if line:
  47. break
  48. if line:
  49. # extract answer from json
  50. answer = line[line.find('{'):line.rfind('}')+1]
  51. answer = loads(answer.encode('unicode-escape'))
  52. answer = answer['stringified'].decode('unicode-escape')
  53. answer = sub(r'\\', '', answer)
  54. results.append({'answer': answer})
  55. # user input is in first part of title
  56. title = dom.xpath(title_xpath)[0].text
  57. result_url = request(title[:-16], {})['url']
  58. # append result
  59. results.append({'url': result_url,
  60. 'title': title})
  61. return results