Browse Source

Merge pull request #542 from ukwt/fix538

[fix] incorrect URLs in Reddit image search results - closes #538
Adam Tauber 9 years ago
parent
commit
817c74e523
2 changed files with 11 additions and 5 deletions
  1. 5 3
      searx/engines/reddit.py
  2. 6 2
      tests/unit/engines/test_reddit.py

+ 5 - 3
searx/engines/reddit.py

@@ -13,7 +13,7 @@
 import json
 from cgi import escape
 from urllib import urlencode
-from urlparse import urlparse
+from urlparse import urlparse, urljoin
 from datetime import datetime
 
 # engine dependent config
@@ -21,7 +21,8 @@ categories = ['general', 'images', 'news', 'social media']
 page_size = 25
 
 # search-url
-search_url = 'https://www.reddit.com/search.json?{query}'
+base_url = 'https://www.reddit.com/'
+search_url = base_url + 'search.json?{query}'
 
 
 # do search-request
@@ -52,7 +53,7 @@ def response(resp):
 
         # extract post information
         params = {
-            'url': data['url'],
+            'url': urljoin(base_url, data['permalink']),
             'title': data['title']
         }
 
@@ -61,6 +62,7 @@ def response(resp):
         url_info = urlparse(thumbnail)
         # netloc & path
         if url_info[1] != '' and url_info[2] != '':
+            params['img_src'] = data['url']
             params['thumbnail_src'] = thumbnail
             params['template'] = 'images.html'
             img_results.append(params)

+ 6 - 2
tests/unit/engines/test_reddit.py

@@ -25,7 +25,8 @@ class TestRedditEngine(SearxTestCase):
             "data": {
                 "children": [{
                     "data": {
-                        "url": "http://google.com/",
+                        "url": "http://google2.com/",
+                        "permalink": "http://google.com/",
                         "title": "Title number one",
                         "selftext": "Sample",
                         "created_utc": 1401219957.0,
@@ -33,7 +34,8 @@ class TestRedditEngine(SearxTestCase):
                     }
                 }, {
                     "data": {
-                        "url": "https://reddit.com/",
+                        "url": "https://reddit2.com/",
+                        "permalink": "https://reddit.com/",
                         "title": "Title number two",
                         "selftext": "Dominus vobiscum",
                         "created_utc": 1438792533.0,
@@ -55,6 +57,7 @@ class TestRedditEngine(SearxTestCase):
         self.assertEqual(r['url'], 'http://google.com/')
         self.assertEqual(r['title'], 'Title number one')
         self.assertEqual(r['template'], 'images.html')
+        self.assertEqual(r['img_src'], 'http://google2.com/')
         self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg')
 
         # testing second result (self-post)
@@ -65,3 +68,4 @@ class TestRedditEngine(SearxTestCase):
         created = datetime.fromtimestamp(1438792533.0)
         self.assertEqual(r['publishedDate'], created)
         self.assertTrue('thumbnail_src' not in r)
+        self.assertTrue('img_src' not in r)