Browse Source

Removes what looks like tracking parameters

Denis Wernert 6 years ago
parent
commit
b9ada93b3a
2 changed files with 19 additions and 7 deletions
  1. 16 4
      searx/engines/unsplash.py
  2. 3 3
      tests/unit/engines/test_unsplash.py

+ 16 - 4
searx/engines/unsplash.py

@@ -10,7 +10,7 @@
  @parse       url, title, img_src, thumbnail_src
 """
 
-from searx.url_utils import urlencode
+from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
 from json import loads
 
 url = 'https://unsplash.com/'
@@ -20,6 +20,18 @@ page_size = 20
 paging = True
 
 
+def clean_url(url):
+    parsed = urlparse(url)
+    query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']]
+
+    return urlunparse((parsed.scheme,
+                       parsed.netloc,
+                       parsed.path,
+                       parsed.params,
+                       urlencode(query),
+                       parsed.fragment))
+
+
 def request(query, params):
     params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size})
     return params
@@ -32,9 +44,9 @@ def response(resp):
     if 'results' in json_data:
         for result in json_data['results']:
             results.append({'template': 'images.html',
-                            'url': result['links']['html'],
-                            'thumbnail_src': result['urls']['thumb'],
-                            'img_src': result['urls']['raw'],
+                            'url': clean_url(result['links']['html']),
+                            'thumbnail_src': clean_url(result['urls']['thumb']),
+                            'img_src': clean_url(result['urls']['raw']),
                             'title': result['description'],
                             'content': ''})
     return results

+ 3 - 3
tests/unit/engines/test_unsplash.py

@@ -32,7 +32,7 @@ class TestUnsplashEngine(SearxTestCase):
         self.assertEqual(result[0]['title'], 'low angle photography of swimming penguin')
         self.assertEqual(result[0]['url'], 'https://unsplash.com/photos/FY8d721UO_4')
         self.assertEqual(result[0]['thumbnail_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5&q=80\
-&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9&s=a9b9e56e63efc6f4611a87ce7e9a48f8')
-        self.assertEqual(result[0]['img_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c?ixlib=rb-0.3.5\
-&ixid=eyJhcHBfaWQiOjEyMDd9&s=095c5fc319c5a77c705f49ad63e0f195')
+&fm=jpg&crop=entropy&cs=tinysrgb&w=200&fit=max')
+        self.assertEqual(result[0]['img_src'], 'https://images.unsplash.com/photo-1523557148507-1b77641c7e7c\
+?ixlib=rb-0.3.5')
         self.assertEqual(result[0]['content'], '')