wolframalpha_noapi.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. # WolframAlpha (Maths)
  2. #
  3. # @website http://www.wolframalpha.com/
  4. #
  5. # @using-api no
  6. # @results HTML, JS
  7. # @stable no
  8. # @parse answer
  9. import re
  10. import json
  11. from urllib import urlencode
  12. from lxml import html
  13. from searx.engines.xpath import extract_text
  14. # search-url
  15. url = 'http://www.wolframalpha.com/'
  16. search_url = url+'input/?{query}'
  17. # do search-request
  18. def request(query, params):
  19. params['url'] = search_url.format(query=urlencode({'i': query}))
  20. return params
  21. # tries to find answer under the pattern given
  22. def extract_answer(script_list, pattern):
  23. answer = None
  24. # get line that matches the pattern
  25. for script in script_list:
  26. try:
  27. line = re.search(pattern, script.text_content()).group(1)
  28. except AttributeError:
  29. continue
  30. # extract answer from json
  31. answer = line[line.find('{') : line.rfind('}')+1]
  32. answer = json.loads(answer.encode('unicode-escape'))
  33. answer = answer['stringified'].decode('unicode-escape')
  34. return answer
  35. # get response from search-request
  36. def response(resp):
  37. dom = html.fromstring(resp.text)
  38. # the answer is inside a js script
  39. scripts = dom.xpath('//script')
  40. results = []
  41. # answer can be located in different 'pods', although by default it should be in pod_0200
  42. answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
  43. if not answer:
  44. answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
  45. if answer:
  46. results.append({'answer': answer})
  47. else:
  48. results.append({'answer': answer})
  49. return results