Browse Source

Merge pull request #523 from a01200356/master

[fix] duckduckgo's xpaths changed
Adam Tauber 9 years ago
parent
commit
24ea39d046
3 changed files with 27 additions and 47 deletions
  1. 1 1
      .travis.yml
  2. 4 4
      searx/engines/duckduckgo.py
  3. 22 42
      tests/unit/engines/test_duckduckgo.py

+ 1 - 1
.travis.yml

@@ -10,7 +10,7 @@ python:
 before_install:
 before_install:
   - "export DISPLAY=:99.0"
   - "export DISPLAY=:99.0"
   - "sh -e /etc/init.d/xvfb start"
   - "sh -e /etc/init.d/xvfb start"
-  - npm install -g less grunt-cli
+  - npm install less grunt-cli
   - ( cd searx/static/themes/oscar;npm install; cd - )
   - ( cd searx/static/themes/oscar;npm install; cd - )
 install:
 install:
   - ./manage.sh update_dev_packages
   - ./manage.sh update_dev_packages

+ 4 - 4
searx/engines/duckduckgo.py

@@ -28,10 +28,10 @@ language_support = True
 url = 'https://duckduckgo.com/html?{query}&s={offset}'
 url = 'https://duckduckgo.com/html?{query}&s={offset}'
 
 
 # specific xpath variables
 # specific xpath variables
-result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
-url_xpath = './/a[@class="large"]/@href'
-title_xpath = './/a[@class="large"]'
-content_xpath = './/div[@class="snippet"]'
+result_xpath = '//div[@class="result results_links results_links_deep web-result "]'  # noqa
+url_xpath = './/a[@class="result__a"]/@href'
+title_xpath = './/a[@class="result__a"]'
+content_xpath = './/a[@class="result__snippet"]'
 
 
 
 
 # do search-request
 # do search-request

+ 22 - 42
tests/unit/engines/test_duckduckgo.py

@@ -32,55 +32,32 @@ class TestDuckduckgoEngine(SearxTestCase):
         self.assertEqual(duckduckgo.response(response), [])
         self.assertEqual(duckduckgo.response(response), [])
 
 
         html = u"""
         html = u"""
-        <div class="results_links results_links_deep web-result">
-            <div class="icon_fav" style="display: block;">
-                <a rel="nofollow" href="https://www.test.com/">
-                    <img width="16" height="16" alt=""
-                    src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
-                </a>
-            </div>
-            <div class="links_main links_deep"> <!-- This is the visible part -->
-                <a rel="nofollow" class="large" href="http://this.should.be.the.link/ű">
-                    This <b>is</b> <b>the</b> title
-                </a>
-                <div class="snippet"><b>This</b> should be the content.</div>
-                <div class="url">
-                    http://this.should.be.the.link/
+        <div class="result results_links results_links_deep web-result result--no-result">
+            <div class="links_main links_deep result__body">
+                <h2 class="result__title">
+                </h2>
+                <div class="no-results">No results</div>
+                <div class="result__extras">
                 </div>
                 </div>
             </div>
             </div>
         </div>
         </div>
         """
         """
         response = mock.Mock(text=html)
         response = mock.Mock(text=html)
         results = duckduckgo.response(response)
         results = duckduckgo.response(response)
-        self.assertEqual(type(results), list)
-        self.assertEqual(len(results), 1)
-        self.assertEqual(results[0]['title'], 'This is the title')
-        self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
-        self.assertEqual(results[0]['content'], 'This should be the content.')
+        self.assertEqual(duckduckgo.response(response), [])
 
 
-        html = """
-        <div class="results_links results_links_deep web-result">
-            <div class="icon_fav" style="display: block;">
-            </div>
-            <div class="links_main links_deep"> <!-- This is the visible part -->
-                <div class="snippet"><b>This</b> should be the content.</div>
-                <div class="url">
-                    http://this.should.be.the.link/
-                </div>
-            </div>
-        </div>
-        <div class="results_links results_links_deep web-result">
-            <div class="icon_fav" style="display: block;">
-                <img width="16" height="16" alt=""
-                src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
-            </div>
-            <div class="links_main links_deep"> <!-- This is the visible part -->
-                <a rel="nofollow" class="large" href="">
-                    This <b>is</b> <b>the</b> title
+        html = u"""
+        <div class="result results_links results_links_deep web-result ">
+            <div class="links_main links_deep result__body">
+                <h2 class="result__title">
+                    <a rel="nofollow" class="result__a" href="http://this.should.be.the.link/ű">
+                        This <b>is</b> <b>the</b> title
+                    </a>
+                </h2>
+                <a class="result__snippet" href="http://this.should.be.the.link/ű">
+                    <b>This</b> should be the content.
                 </a>
                 </a>
-                <div class="snippet"><b>This</b> should be the content.</div>
-                <div class="url">
-                    http://this.should.be.the.link/
+                <div class="result__extras">
                 </div>
                 </div>
             </div>
             </div>
         </div>
         </div>
@@ -88,4 +65,7 @@ class TestDuckduckgoEngine(SearxTestCase):
         response = mock.Mock(text=html)
         response = mock.Mock(text=html)
         results = duckduckgo.response(response)
         results = duckduckgo.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(type(results), list)
-        self.assertEqual(len(results), 0)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'], 'This is the title')
+        self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
+        self.assertEqual(results[0]['content'], 'This should be the content.')