Browse Source

Merge pull request #486 from a01200356/master

[enh] WolframAlpha no API engine (and tests for both)
Adam Tauber 9 years ago
parent
commit
b5a3dfca60

+ 26 - 9
searx/engines/wolframalpha_api.py

@@ -10,11 +10,18 @@
 
 from urllib import urlencode
 from lxml import etree
+from re import search
 
 # search-url
 base_url = 'http://api.wolframalpha.com/v2/query'
 search_url = base_url + '?appid={api_key}&{query}&format=plaintext'
-api_key = ''
+site_url = 'http://www.wolframalpha.com/input/?{query}'
+api_key = ''  # defined in settings.yml
+
+# xpath variables
+failure_xpath = '/queryresult[attribute::success="false"]'
+answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
+input_xpath = '//pod[starts-with(attribute::title, "Input")]/subpod/plaintext'
 
 
 # do search-request
@@ -45,16 +52,26 @@ def response(resp):
     search_results = etree.XML(resp.content)
 
     # return empty array if there are no results
-    if search_results.xpath('/queryresult[attribute::success="false"]'):
+    if search_results.xpath(failure_xpath):
         return []
 
-    # parse result
-    result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text
-    result = replace_pua_chars(result)
+    # parse answers
+    answers = search_results.xpath(answer_xpath)
+    if answers:
+        for answer in answers:
+            answer = replace_pua_chars(answer.text)
+
+            results.append({'answer': answer})
+
+    # if there's no input section in search_results, check if answer has the input embedded (before their "=" sign)
+    try:
+        query_input = search_results.xpath(input_xpath)[0].text
+    except IndexError:
+        query_input = search(u'([^\uf7d9]+)', answers[0].text).group(1)
 
-    # append result
-    # TODO: shouldn't it bind the source too?
-    results.append({'answer': result})
+    # append link to site
+    result_url = site_url.format(query=urlencode({'i': query_input.encode('utf-8')}))
+    results.append({'url': result_url,
+                    'title': query_input + " - Wolfram|Alpha"})
 
-    # return results
     return results

+ 86 - 0
searx/engines/wolframalpha_noapi.py

@@ -0,0 +1,86 @@
+# WolframAlpha (Maths)
+#
+# @website     http://www.wolframalpha.com/
+# @provide-api yes (http://api.wolframalpha.com/v2/)
+#
+# @using-api   no
+# @results     HTML
+# @stable      no
+# @parse       answer
+
+from re import search, sub
+from json import loads
+from urllib import urlencode
+from lxml import html
+import HTMLParser
+
+# search-url
+url = 'http://www.wolframalpha.com/'
+search_url = url + 'input/?{query}'
+
+# xpath variables
+scripts_xpath = '//script'
+title_xpath = '//title'
+failure_xpath = '//p[attribute::class="pfail"]'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(query=urlencode({'i': query}))
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+    line = None
+
+    dom = html.fromstring(resp.text)
+    scripts = dom.xpath(scripts_xpath)
+
+    # the answer is inside a js function
+    # answer can be located in different 'pods', although by default it should be in pod_0200
+    possible_locations = ['pod_0200\.push\((.*)',
+                          'pod_0100\.push\((.*)']
+
+    # failed result
+    if dom.xpath(failure_xpath):
+        return results
+
+    # get line that matches the pattern
+    for pattern in possible_locations:
+        for script in scripts:
+            try:
+                line = search(pattern, script.text_content()).group(1)
+                break
+            except AttributeError:
+                continue
+        if line:
+            break
+
+    if line:
+        # extract answer from json
+        answer = line[line.find('{'):line.rfind('}') + 1]
+        try:
+            answer = loads(answer)
+        except Exception:
+            answer = loads(answer.encode('unicode-escape'))
+        answer = answer['stringified']
+
+        # clean plaintext answer
+        h = HTMLParser.HTMLParser()
+        answer = h.unescape(answer.decode('unicode-escape'))
+        answer = sub(r'\\', '', answer)
+
+        results.append({'answer': answer})
+
+    # user input is in first part of title
+    title = dom.xpath(title_xpath)[0].text.encode('utf-8')
+    result_url = request(title[:-16], {})['url']
+
+    # append result
+    results.append({'url': result_url,
+                    'title': title.decode('utf-8')})
+
+    return results

+ 9 - 7
searx/settings.yml

@@ -300,13 +300,15 @@ engines:
     engine : vimeo
     shortcut : vm
 
-# You can use the engine using the official stable API, but you need an API key
-# See : http://products.wolframalpha.com/api/
-#  - name : wolframalpha
-#    shortcut : wa
-#    engine : wolframalpha_api
-#    api_key: 'apikey' # required!
-#    timeout: 6.0
+  - name : wolframalpha
+    shortcut : wa
+    # You can use the engine using the official stable API, but you need an API key
+    # See : http://products.wolframalpha.com/api/
+    #    engine : wolframalpha_api
+    #    api_key: 'apikey' # required!
+    engine : wolframalpha_noapi
+    timeout: 6.0
+    disabled : True
 
 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/
 #  - name : blekko images

+ 307 - 0
tests/unit/engines/test_wolframalpha_api.py

@@ -0,0 +1,307 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import wolframalpha_api
+from searx.testing import SearxTestCase
+
+
+class TestWolframAlphaAPIEngine(SearxTestCase):
+
+    def test_request(self):
+        query = 'test_query'
+        api_key = 'XXXXXX-XXXXXXXXXX'
+        dicto = defaultdict(dict)
+        dicto['api_key'] = api_key
+        params = wolframalpha_api.request(query, dicto)
+
+        self.assertIn('url', params)
+        self.assertIn(query, params['url'])
+        self.assertIn('wolframalpha.com', params['url'])
+
+        self.assertIn('api_key', params)
+        self.assertIn(api_key, params['api_key'])
+
+    def test_response(self):
+        self.assertRaises(AttributeError, wolframalpha_api.response, None)
+        self.assertRaises(AttributeError, wolframalpha_api.response, [])
+        self.assertRaises(AttributeError, wolframalpha_api.response, '')
+        self.assertRaises(AttributeError, wolframalpha_api.response, '[]')
+
+        xml = '''<?xml version='1.0' encoding='UTF-8'?>
+        <queryresult success='false' error='false' />
+        '''
+        # test failure
+        response = mock.Mock(content=xml)
+        self.assertEqual(wolframalpha_api.response(response), [])
+
+        xml = """<?xml version='1.0' encoding='UTF-8'?>
+        <queryresult success='true'
+            error='false'
+            numpods='6'
+            datatypes=''
+            timedout=''
+            timedoutpods=''
+            timing='0.684'
+            parsetiming='0.138'
+            parsetimedout='false'
+            recalculate=''
+            id='MSPa416020a7966dachc463600000f9c66cc21444cfg'
+            host='http://www3.wolframalpha.com'
+            server='6'
+            related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?...'
+            version='2.6'>
+         <pod title='Input'
+             scanner='Identity'
+             id='Input'
+             position='100'
+             error='false'
+             numsubpods='1'>
+          <subpod title=''>
+           <plaintext>sqrt(-1)</plaintext>
+          </subpod>
+         </pod>
+         <pod title='Result'
+             scanner='Simplification'
+             id='Result'
+             position='200'
+             error='false'
+             numsubpods='1'
+             primary='true'>
+          <subpod title=''>
+           <plaintext></plaintext>
+          </subpod>
+          <states count='1'>
+           <state name='Step-by-step solution'
+               input='Result__Step-by-step solution' />
+          </states>
+         </pod>
+         <pod title='Polar coordinates'
+             scanner='Numeric'
+             id='PolarCoordinates'
+             position='300'
+             error='false'
+             numsubpods='1'>
+          <subpod title=''>
+           <plaintext>r1 (radius), θ90° (angle)</plaintext>
+          </subpod>
+         </pod>
+         <pod title='Position in the complex plane'
+             scanner='Numeric'
+             id='PositionInTheComplexPlane'
+             position='400'
+             error='false'
+             numsubpods='1'>
+          <subpod title=''>
+           <plaintext></plaintext>
+          </subpod>
+         </pod>
+         <pod title='All 2nd roots of -1'
+             scanner='RootsOfUnity'
+             id=''
+             position='500'
+             error='false'
+             numsubpods='2'>
+          <subpod title=''>
+           <plaintext>  (principal root)</plaintext>
+          </subpod>
+          <subpod title=''>
+           <plaintext>-</plaintext>
+          </subpod>
+         </pod>
+         <pod title='Plot of all roots in the complex plane'
+             scanner='RootsOfUnity'
+             id='PlotOfAllRootsInTheComplexPlane'
+             position='600'
+             error='false'
+             numsubpods='1'>
+          <subpod title=''>
+           <plaintext></plaintext>
+          </subpod>
+         </pod>
+        </queryresult>
+        """
+        # test private user area char in response
+        response = mock.Mock(content=xml)
+        results = wolframalpha_api.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertIn('i', results[0]['answer'])
+        self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title'])
+        self.assertEquals('http://www.wolframalpha.com/input/?i=sqrt%28-1%29', results[1]['url'])
+
+        xml = """<?xml version='1.0' encoding='UTF-8'?>
+            <queryresult success='true'
+                error='false'
+                numpods='2'
+                datatypes=''
+                timedout=''
+                timedoutpods=''
+                timing='1.286'
+                parsetiming='0.255'
+                parsetimedout='false'
+                recalculate=''
+                id='MSPa195222ad740ede5214h30000480ca61h003d3gd6'
+                host='http://www3.wolframalpha.com'
+                server='20'
+                related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?id=...'
+                version='2.6'>
+             <pod title='Indefinite integral'
+                 scanner='Integral'
+                 id='IndefiniteIntegral'
+                 position='100'
+                 error='false'
+                 numsubpods='1'
+                 primary='true'>
+              <subpod title=''>
+               <plaintext>∫1/xxlog(x)+constant</plaintext>
+              </subpod>
+              <states count='1'>
+               <state name='Step-by-step solution'
+                   input='IndefiniteIntegral__Step-by-step solution' />
+              </states>
+              <infos count='1'>
+               <info text='log(x) is the natural logarithm'>
+                <link url='http://reference.wolfram.com/mathematica/ref/Log.html'
+                    text='Documentation'
+                    title='Mathematica' />
+                <link url='http://functions.wolfram.com/ElementaryFunctions/Log'
+                    text='Properties'
+                    title='Wolfram Functions Site' />
+                <link url='http://mathworld.wolfram.com/NaturalLogarithm.html'
+                    text='Definition'
+                    title='MathWorld' />
+               </info>
+              </infos>
+             </pod>
+             <pod title='Plots of the integral'
+                 scanner='Integral'
+                 id='Plot'
+                 position='200'
+                 error='false'
+                 numsubpods='2'>
+              <subpod title=''>
+               <plaintext></plaintext>
+               <states count='1'>
+                <statelist count='2'
+                    value='Complex-valued plot'
+                    delimiters=''>
+                 <state name='Complex-valued plot'
+                     input='Plot__1_Complex-valued plot' />
+                 <state name='Real-valued plot'
+                     input='Plot__1_Real-valued plot' />
+                </statelist>
+               </states>
+              </subpod>
+              <subpod title=''>
+               <plaintext></plaintext>
+               <states count='1'>
+                <statelist count='2'
+                    value='Complex-valued plot'
+                    delimiters=''>
+                 <state name='Complex-valued plot'
+                     input='Plot__2_Complex-valued plot' />
+                 <state name='Real-valued plot'
+                     input='Plot__2_Real-valued plot' />
+                </statelist>
+               </states>
+              </subpod>
+             </pod>
+             <assumptions count='1'>
+              <assumption type='Clash'
+                  word='integral'
+                  template='Assuming &quot;${word}&quot; is ${desc1}. Use as ${desc2} instead'
+                  count='2'>
+               <value name='IntegralsWord'
+                   desc='an integral'
+                   input='*C.integral-_*IntegralsWord-' />
+               <value name='MathematicalFunctionIdentityPropertyClass'
+                   desc='a function property'
+                   input='*C.integral-_*MathematicalFunctionIdentityPropertyClass-' />
+              </assumption>
+             </assumptions>
+            </queryresult>
+        """
+        # test integral
+        response = mock.Mock(content=xml)
+        results = wolframalpha_api.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertIn('log(x)+c', results[0]['answer'])
+        self.assertIn('∫1/xx - Wolfram|Alpha'.decode('utf-8'), results[1]['title'])
+        self.assertEquals('http://www.wolframalpha.com/input/?i=%E2%88%AB1%2Fx%EF%9D%8Cx', results[1]['url'])
+
+        xml = """<?xml version='1.0' encoding='UTF-8'?>
+        <queryresult success='true'
+            error='false'
+            numpods='4'
+            datatypes='Solve'
+            timedout=''
+            timedoutpods=''
+            timing='0.79'
+            parsetiming='0.338'
+            parsetimedout='false'
+            recalculate=''
+            id='MSPa7481f7i06d25h3deh2900004810i3a78d9b4fdc'
+            host='http://www5b.wolframalpha.com'
+            server='23'
+            related='http://www5b.wolframalpha.com/api/v2/relatedQueries.jsp?id=...'
+            version='2.6'>
+         <pod title='Input interpretation'
+             scanner='Identity'
+             id='Input'
+             position='100'
+             error='false'
+             numsubpods='1'>
+          <subpod title=''>
+           <plaintext>solve x^2+x0</plaintext>
+          </subpod>
+         </pod>
+         <pod title='Results'
+             scanner='Solve'
+             id='Result'
+             position='200'
+             error='false'
+             numsubpods='2'
+             primary='true'>
+          <subpod title=''>
+           <plaintext>x-1</plaintext>
+          </subpod>
+          <subpod title=''>
+           <plaintext>x0</plaintext>
+          </subpod>
+          <states count='1'>
+           <state name='Step-by-step solution'
+               input='Result__Step-by-step solution' />
+          </states>
+         </pod>
+         <pod title='Root plot'
+             scanner='Solve'
+             id='RootPlot'
+             position='300'
+             error='false'
+             numsubpods='1'>
+          <subpod title=''>
+           <plaintext></plaintext>
+          </subpod>
+         </pod>
+         <pod title='Number line'
+             scanner='Solve'
+             id='NumberLine'
+             position='400'
+             error='false'
+             numsubpods='1'>
+          <subpod title=''>
+           <plaintext></plaintext>
+          </subpod>
+         </pod>
+        </queryresult>
+        """
+        # test ecuation with multiple answers
+        response = mock.Mock(content=xml)
+        results = wolframalpha_api.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 3)
+        self.assertIn('x=-1', results[0]['answer'])
+        self.assertIn('x=0', results[1]['answer'])
+        self.assertIn('solve x^2+x0 - Wolfram|Alpha'.decode('utf-8'), results[2]['title'])
+        self.assertEquals('http://www.wolframalpha.com/input/?i=solve+x%5E2%2Bx%EF%9F%990', results[2]['url'])

+ 193 - 0
tests/unit/engines/test_wolframalpha_noapi.py

@@ -0,0 +1,193 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import wolframalpha_noapi
+from searx.testing import SearxTestCase
+
+
+class TestWolframAlphaNoAPIEngine(SearxTestCase):
+
+    def test_request(self):
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        dicto['pageno'] = 1
+        params = wolframalpha_noapi.request(query, dicto)
+        self.assertIn('url', params)
+        self.assertIn(query, params['url'])
+        self.assertIn('wolframalpha.com', params['url'])
+
+    def test_response(self):
+        self.assertRaises(AttributeError, wolframalpha_noapi.response, None)
+        self.assertRaises(AttributeError, wolframalpha_noapi.response, [])
+        self.assertRaises(AttributeError, wolframalpha_noapi.response, '')
+        self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]')
+
+        html = """
+        <!DOCTYPE html>
+            <title> Parangaricutirimícuaro - Wolfram|Alpha</title>
+            <meta charset="utf-8" />
+            <body>
+                <div id="closest">
+                    <p class="pfail">Wolfram|Alpha doesn't know how to interpret your input.</p>
+                    <div id="dtips">
+                        <div class="tip">
+                            <span class="tip-title">Tip:&nbsp;</span>
+                                Check your spelling, and use English
+                            <span class="tip-extra"></span>
+                        </div>
+                    </div>
+                </div>
+            </body>
+        </html>
+        """
+        # test failed query
+        response = mock.Mock(text=html)
+        self.assertEqual(wolframalpha_noapi.response(response), [])
+
+        html = """
+        <!DOCTYPE html>
+            <title> sqrt(-1) - Wolfram|Alpha</title>
+            <meta charset="utf-8" />
+            <body>
+                <script type="text/javascript">
+                  try {
+                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+                      context.jsonArray.popups.pod_0100 = [];
+                    }
+                    context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": ""});
+                  } catch(e) { }
+
+                  try {
+                    if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
+                      context.jsonArray.popups.pod_0200 = [];
+                    }
+                    context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": ""});
+                  } catch(e) { }
+                </script>
+            </body>
+        </html>
+        """
+        # test plaintext
+        response = mock.Mock(text=html)
+        results = wolframalpha_noapi.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertEquals('i', results[0]['answer'])
+        self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title'])
+        self.assertEquals('http://www.wolframalpha.com/input/?i=+sqrt%28-1%29', results[1]['url'])
+
+        html = """
+        <!DOCTYPE html>
+            <title> integral 1/x - Wolfram|Alpha</title>
+            <meta charset="utf-8" />
+            <body>
+                <script type="text/javascript">
+                  try {
+                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+                      context.jsonArray.popups.pod_0100 = [];
+                    }
+                    context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"});
+                  } catch(e) { }
+                </script>
+            </body>
+        </html>
+        """
+        # test integral
+        response = mock.Mock(text=html)
+        results = wolframalpha_noapi.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertIn('log(x)+c', results[0]['answer'])
+        self.assertIn('integral 1/x - Wolfram|Alpha', results[1]['title'])
+        self.assertEquals('http://www.wolframalpha.com/input/?i=+integral+1%2Fx', results[1]['url'])
+
+        html = """
+        <!DOCTYPE html>
+            <title> &int;1&#x2f;x &#xf74c;x - Wolfram|Alpha</title>
+            <meta charset="utf-8" />
+            <body>
+                <script type="text/javascript">
+                  try {
+                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+                      context.jsonArray.popups.pod_0100 = [];
+                    }
+                    context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"});
+                  } catch(e) { }
+                </script>
+            </body>
+        </html>
+        """
+        # test input in mathematical notation
+        response = mock.Mock(text=html)
+        results = wolframalpha_noapi.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertIn('log(x)+c', results[0]['answer'])
+        self.assertIn('∫1/x x - Wolfram|Alpha'.decode('utf-8'), results[1]['title'])
+        self.assertEquals('http://www.wolframalpha.com/input/?i=+%E2%88%AB1%2Fx+%EF%9D%8Cx', results[1]['url'])
+
+        html = """
+        <!DOCTYPE html>
+            <title> 1 euro to yen - Wolfram|Alpha</title>
+            <meta charset="utf-8" />
+            <body>
+                <script type="text/javascript">
+                  try {
+                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+                      context.jsonArray.popups.pod_0100 = [];
+                    }
+                  context.jsonArray.popups.pod_0100.push( {"stringified": "convert euro1  (euro) to Japanese yen"});
+                  } catch(e) { }
+
+                  try {
+                    if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
+                      context.jsonArray.popups.pod_0200 = [];
+                    }
+                    context.jsonArray.popups.pod_0200.push( {"stringified": "&yen;130.5  (Japanese yen)"});
+                  } catch(e) { }
+                </script>
+            </body>
+        </html>
+        """
+        # test output with htmlentity
+        response = mock.Mock(text=html)
+        results = wolframalpha_noapi.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertIn('¥'.decode('utf-8'), results[0]['answer'])
+        self.assertIn('1 euro to yen - Wolfram|Alpha', results[1]['title'])
+        self.assertEquals('http://www.wolframalpha.com/input/?i=+1+euro+to+yen', results[1]['url'])
+
+        html = """
+        <!DOCTYPE html>
+            <title> distance from nairobi to kyoto in inches - Wolfram|Alpha</title>
+            <meta charset="utf-8" />
+            <body>
+                <script type="text/javascript">
+                  try {
+                    if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
+                      context.jsonArray.popups.pod_0100 = [];
+                    }
+[...].pod_0100.push( {"stringified": "convert distance | from | Nairobi, Kenya\nto | Kyoto, Japan to inches"});
+                  } catch(e) { }
+
+                  try {
+                    if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
+                      context.jsonArray.popups.pod_0200 = [];
+                    }
+pod_0200.push({"stringified": "4.295&times;10^8 inches","mOutput": "Quantity[4.295×10^8,&amp;quot;Inches&amp;quot;]"});
+
+                  } catch(e) { }
+                </script>
+            </body>
+        </html>
+        """
+        # test output with utf-8 character
+        response = mock.Mock(text=html)
+        results = wolframalpha_noapi.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 2)
+        self.assertIn('4.295×10^8 inches'.decode('utf-8'), results[0]['answer'])
+        self.assertIn('distance from nairobi to kyoto in inches - Wolfram|Alpha', results[1]['title'])
+        self.assertEquals('http://www.wolframalpha.com/input/?i=+distance+from+nairobi+to+kyoto+in+inches',
+                          results[1]['url'])