Browse Source

Remove content field from ArchWiki results; reformat code in archlinux.py

Content field in Arch Wiki search results is of no real use, more often
than not it contains no usable information and includes too many markup
tags which make the text unreadable. It is safe to remove it.
Kirill Isakov 9 years ago
parent
commit
8b7dc2acb9
2 changed files with 13 additions and 21 deletions
  1. 7 10
      searx/engines/archlinux.py
  2. 6 11
      tests/unit/engines/test_archlinux.py

+ 7 - 10
searx/engines/archlinux.py

@@ -3,12 +3,12 @@
 """
 """
  Arch Linux Wiki
  Arch Linux Wiki
 
 
- @website	https://wiki.archlinux.org
- @provide-api	no (Mediawiki provides API, but Arch Wiki blocks access to it
- @using-api	no
- @results	HTML
- @stable	no (HTML can change)
- @parse		url, title, content
+ @website      https://wiki.archlinux.org
+ @provide-api  no (Mediawiki provides API, but Arch Wiki blocks access to it
+ @using-api    no
+ @results      HTML
+ @stable       no (HTML can change)
+ @parse        url, title
 """
 """
 
 
 from urlparse import urljoin
 from urlparse import urljoin
@@ -26,7 +26,6 @@ base_url = 'https://wiki.archlinux.org'
 # xpath queries
 # xpath queries
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
-xpath_content = './/div[@class="searchresult"]'
 
 
 
 
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
 # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
@@ -135,10 +134,8 @@ def response(resp):
         link = result.xpath(xpath_link)[0]
         link = result.xpath(xpath_link)[0]
         href = urljoin(base_url, link.attrib.get('href'))
         href = urljoin(base_url, link.attrib.get('href'))
         title = escape(extract_text(link))
         title = escape(extract_text(link))
-        content = escape(extract_text(result.xpath(xpath_content)))
 
 
         results.append({'url': href,
         results.append({'url': href,
-                        'title': title,
-                        'content': content})
+                        'title': title})
 
 
     return results
     return results

+ 6 - 11
tests/unit/engines/test_archlinux.py

@@ -18,7 +18,7 @@ class TestArchLinuxEngine(SearxTestCase):
     def test_request(self):
     def test_request(self):
         query = 'test_query'
         query = 'test_query'
         dic = defaultdict(dict)
         dic = defaultdict(dict)
-        dic['pageno'] = 0
+        dic['pageno'] = 1
         dic['language'] = 'en_US'
         dic['language'] = 'en_US'
         params = archlinux.request(query, dic)
         params = archlinux.request(query, dic)
         self.assertTrue('url' in params)
         self.assertTrue('url' in params)
@@ -31,10 +31,8 @@ class TestArchLinuxEngine(SearxTestCase):
             self.assertTrue(domain in params['url'])
             self.assertTrue(domain in params['url'])
 
 
     def test_response(self):
     def test_response(self):
-        response = mock.Mock(text='<html></html>')
-        response.search_params = {
-            'language': 'en_US'
-        }
+        response = mock.Mock(text='<html></html>',
+                             search_params={'language': 'en_US'})
         self.assertEqual(archlinux.response(response), [])
         self.assertEqual(archlinux.response(response), [])
 
 
         html = """
         html = """
@@ -79,18 +77,15 @@ class TestArchLinuxEngine(SearxTestCase):
         expected = [
         expected = [
             {
             {
                 'title': 'ATI',
                 'title': 'ATI',
-                'url': 'https://wiki.archlinux.org/index.php/ATI',
-                'content': 'Lorem ipsum dolor sit amet'
+                'url': 'https://wiki.archlinux.org/index.php/ATI'
             },
             },
             {
             {
                 'title': 'Frequently asked questions',
                 'title': 'Frequently asked questions',
-                'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions',
-                'content': 'CPUs with AMDs instruction set "AMD64"'
+                'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions'
             },
             },
             {
             {
                 'title': 'CPU frequency scaling',
                 'title': 'CPU frequency scaling',
-                'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling',
-                'content': 'ondemand for AMD and older Intel CPU'
+                'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling'
             }
             }
         ]
         ]