Browse Source

Merge pull request #1669 from dalf/engine-fixes

Engine fixes
Alexandre Flament 5 years ago
parent
commit
12f891da84

+ 1 - 1
searx/engines/arxiv.py

@@ -29,7 +29,7 @@ def request(query, params):
     # basic search
     offset = (params['pageno'] - 1) * number_of_results
 
-    string_args = dict(query=query,
+    string_args = dict(query=query.decode('utf-8'),
                        offset=offset,
                        number_of_results=number_of_results)
 

+ 0 - 2
searx/engines/bing.py

@@ -47,8 +47,6 @@ def request(query, params):
 
     params['url'] = base_url + search_path
 
-    params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64')
-
     return params
 
 

+ 1 - 1
searx/engines/dictzone.py

@@ -15,7 +15,7 @@ from searx.utils import is_valid_lang
 from searx.url_utils import urljoin
 
 categories = ['general']
-url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 weight = 100
 
 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)

+ 14 - 15
searx/engines/fdroid.py

@@ -18,13 +18,13 @@ categories = ['files']
 paging = True
 
 # search-url
-base_url = 'https://f-droid.org/'
-search_url = base_url + 'repository/browse/?{query}'
+base_url = 'https://search.f-droid.org/'
+search_url = base_url + '?{query}'
 
 
 # do search-request
 def request(query, params):
-    query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
+    query = urlencode({'q': query, 'page': params['pageno'], 'lang': ''})
     params['url'] = search_url.format(query=query)
     return params
 
@@ -35,17 +35,16 @@ def response(resp):
 
     dom = html.fromstring(resp.text)
 
-    for app in dom.xpath('//div[@id="appheader"]'):
-        url = app.xpath('./ancestor::a/@href')[0]
-        title = app.xpath('./p/span/text()')[0]
-        img_src = app.xpath('.//img/@src')[0]
-
-        content = extract_text(app.xpath('./p')[0])
-        content = content.replace(title, '', 1).strip()
-
-        results.append({'url': url,
-                        'title': title,
-                        'content': content,
-                        'img_src': img_src})
+    for app in dom.xpath('//a[@class="package-header"]'):
+        app_url = app.xpath('./@href')[0]
+        app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()'))
+        app_content = extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() \
+            + ' - ' + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip()
+        app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0]
+
+        results.append({'url': app_url,
+                        'title': app_title,
+                        'content': app_content,
+                        'img_src': app_img_src})
 
     return results

+ 8 - 8
searx/settings.yml

@@ -204,11 +204,11 @@ engines:
   - name : etymonline
     engine : xpath
     paging : True
-    search_url : http://etymonline.com/?search={query}&p={pageno}
-    url_xpath : //a[contains(@class, "word--")]/@href
-    title_xpath : //p[contains(@class, "word__name--")]/text()
-    content_xpath : //section[contains(@class, "word__defination")]/object
-    first_page_num : 0
+    search_url : https://etymonline.com/search?page={pageno}&q={query}
+    url_xpath : //a[contains(@class, "word__name--")]/@href
+    title_xpath : //a[contains(@class, "word__name--")]
+    content_xpath : //section[contains(@class, "word__defination")]
+    first_page_num : 1
     shortcut : et
     disabled : True
 
@@ -703,9 +703,9 @@ engines:
     shortcut: vo
     categories: social media
     search_url : https://searchvoat.co/?t={query}
-    url_xpath : //div[@class="entry"]/p/a[@class="title"]/@href
-    title_xpath : //div[@class="entry"]/p/a[@class="title"]
-    content_xpath : //div[@class="entry"]/p/span[@class="domain"]
+    url_xpath : //div[@class="entry"]/p/a[contains(@class, "title")]/@href
+    title_xpath : //div[@class="entry"]/p/a[contains(@class, "title")]
+    content_xpath : //div[@class="entry"]/p/span[@class="domain"]/a/text()
     timeout : 10.0
     disabled : True
 

+ 1 - 1
tests/unit/engines/test_arxiv.py

@@ -8,7 +8,7 @@ from searx.testing import SearxTestCase
 class TestBaseEngine(SearxTestCase):
 
     def test_request(self):
-        query = 'test_query'
+        query = 'test_query'.encode('utf-8')
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
         params = arxiv.request(query, dicto)

+ 33 - 22
tests/unit/engines/test_fdroid.py

@@ -13,29 +13,40 @@ class TestFdroidEngine(SearxTestCase):
         params = fdroid.request(query, dic)
         self.assertTrue('url' in params)
         self.assertTrue(query in params['url'])
-        self.assertTrue('f-droid.org' in params['url'])
+        self.assertTrue('search.f-droid.org' in params['url'])
 
-    def test_response(self):
+    def test_response_empty(self):
         resp = mock.Mock(text='<html></html>')
         self.assertEqual(fdroid.response(resp), [])
 
+    def test_response_oneresult(self):
         html = """
-        <a href="https://google.com/qwerty">
-          <div id="appheader">
-            <div style="float:left;padding-right:10px;">
-              <img src="http://example.com/image.png"
-                   style="width:48px;border:none;">
-            </div>
-            <div style="float:right;">
-              <p>Details...</p>
-            </div>
-            <p style="color:#000000;">
-              <span style="font-size:20px;">Sample title</span>
-              <br>
-              Sample content
-            </p>
-          </div>
-        </a>
+<!DOCTYPE html>
+<html>
+<head>
+    <title>test</title>
+</head>
+<body>
+    <div class="site-wrapper">
+        <div class="main-content">
+            <a class="package-header" href="https://example.com/app.url">
+                <img class="package-icon" src="https://example.com/appexample.logo.png" />
+
+                <div class="package-info">
+                    <h4 class="package-name">
+                        App Example 1
+                    </h4>
+
+                    <div class="package-desc">
+                        <span class="package-summary">Description App Example 1</span>
+                        <span class="package-license">GPL-3.0-only</span>
+                    </div>
+                </div>
+            </a>
+        </div>
+    </div>
+</body>
+</html>
         """
 
         resp = mock.Mock(text=html)
@@ -43,7 +54,7 @@ class TestFdroidEngine(SearxTestCase):
 
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 1)
-        self.assertEqual(results[0]['url'], 'https://google.com/qwerty')
-        self.assertEqual(results[0]['title'], 'Sample title')
-        self.assertEqual(results[0]['content'], 'Sample content')
-        self.assertEqual(results[0]['img_src'], 'http://example.com/image.png')
+        self.assertEqual(results[0]['url'], 'https://example.com/app.url')
+        self.assertEqual(results[0]['title'], 'App Example 1')
+        self.assertEqual(results[0]['content'], 'Description App Example 1 - GPL-3.0-only')
+        self.assertEqual(results[0]['img_src'], 'https://example.com/appexample.logo.png')