Browse Source

Merge pull request #305 from dalf/https_again

[enh] reduce the number of http outgoing connections.
Adam Tauber 10 years ago
parent
commit
59a65d0bdc

+ 2 - 2
searx/autocomplete.py

@@ -111,7 +111,7 @@ def searx_bang(full_query):
 
 
 
 
 def dbpedia(query):
 def dbpedia(query):
-    # dbpedia autocompleter
+    # dbpedia autocompleter, no HTTPS
     autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'  # noqa
     autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'  # noqa
 
 
     response = get(autocomplete_url
     response = get(autocomplete_url
@@ -139,7 +139,7 @@ def duckduckgo(query):
 
 
 def google(query):
 def google(query):
     # google autocompleter
     # google autocompleter
-    autocomplete_url = 'http://suggestqueries.google.com/complete/search?client=toolbar&'  # noqa
+    autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'  # noqa
 
 
     response = get(autocomplete_url
     response = get(autocomplete_url
                    + urlencode(dict(q=query)))
                    + urlencode(dict(q=query)))

+ 3 - 0
searx/engines/dailymotion.py

@@ -60,6 +60,9 @@ def response(resp):
         publishedDate = datetime.fromtimestamp(res['created_time'], None)
         publishedDate = datetime.fromtimestamp(res['created_time'], None)
         embedded = embedded_url.format(videoid=res['id'])
         embedded = embedded_url.format(videoid=res['id'])
 
 
+        # http to https
+        thumbnail = thumbnail.replace("http://", "https://")
+
         results.append({'template': 'videos.html',
         results.append({'template': 'videos.html',
                         'url': url,
                         'url': url,
                         'title': title,
                         'title': title,

+ 7 - 1
searx/engines/deviantart.py

@@ -22,7 +22,7 @@ paging = True
 
 
 # search-url
 # search-url
 base_url = 'https://www.deviantart.com/'
 base_url = 'https://www.deviantart.com/'
-search_url = base_url+'search?offset={offset}&{query}'
+search_url = base_url+'browse/all/?offset={offset}&{query}'
 
 
 
 
 # do search-request
 # do search-request
@@ -56,6 +56,12 @@ def response(resp):
         thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
         thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
         img_src = regex.sub('/', thumbnail_src)
         img_src = regex.sub('/', thumbnail_src)
 
 
+        # http to https, remove domain sharding
+        thumbnail_src = re.sub(r"https?://(th|fc)\d+.", "https://th01.", thumbnail_src)
+        thumbnail_src = re.sub(r"http://", "https://", thumbnail_src)
+
+        url = re.sub(r"http://(.*)\.deviantart\.com/", "https://\\1.deviantart.com/", url)
+
         # append result
         # append result
         results.append({'url': url,
         results.append({'url': url,
                         'title': title,
                         'title': title,

+ 3 - 0
searx/engines/digg.py

@@ -58,6 +58,9 @@ def response(resp):
         pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
         pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
         publishedDate = parser.parse(pubdate)
         publishedDate = parser.parse(pubdate)
 
 
+        # http to https
+        thumbnail = thumbnail.replace("http://static.digg.com", "https://static.digg.com")
+
         # append result
         # append result
         results.append({'url': url,
         results.append({'url': url,
                         'title': title,
                         'title': title,

+ 1 - 1
searx/engines/gigablast.py

@@ -17,7 +17,7 @@ categories = ['general']
 paging = True
 paging = True
 number_of_results = 5
 number_of_results = 5
 
 
-# search-url
+# search-url, invalid HTTPS certificate
 base_url = 'http://gigablast.com/'
 base_url = 'http://gigablast.com/'
 search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0'
 search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0'
 
 

+ 3 - 0
searx/engines/google_images.py

@@ -56,6 +56,9 @@ def response(resp):
             continue
             continue
         thumbnail_src = result['tbUrl']
         thumbnail_src = result['tbUrl']
 
 
+        # http to https
+        thumbnail_src = thumbnail_src.replace("http://", "https://")
+
         # append result
         # append result
         results.append({'url': href,
         results.append({'url': href,
                         'title': title,
                         'title': title,

+ 1 - 1
searx/engines/www1x.py

@@ -19,7 +19,7 @@ import re
 categories = ['images']
 categories = ['images']
 paging = False
 paging = False
 
 
-# search-url
+# search-url, no HTTPS (there is a valid certificate for https://api2.1x.com/ )
 base_url = 'http://1x.com'
 base_url = 'http://1x.com'
 search_url = base_url+'/backend/search.php?{query}'
 search_url = base_url+'/backend/search.php?{query}'
 
 

+ 1 - 1
searx/tests/engines/test_deviantart.py

@@ -75,7 +75,7 @@ class TestDeviantartEngine(SearxTestCase):
         self.assertEqual(results[0]['title'], 'Title of image')
         self.assertEqual(results[0]['title'], 'Title of image')
         self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
         self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url')
         self.assertNotIn('content', results[0])
         self.assertNotIn('content', results[0])
-        self.assertEqual(results[0]['thumbnail_src'], 'http://url.of.thumbnail')
+        self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail')
 
 
         html = """
         html = """
         <span class="tt-fh-tc" style="width: 202px;">
         <span class="tt-fh-tc" style="width: 202px;">

+ 1 - 1
searx/tests/engines/test_google_images.py

@@ -65,7 +65,7 @@ class TestGoogleImagesEngine(SearxTestCase):
         self.assertEqual(len(results), 1)
         self.assertEqual(len(results), 1)
         self.assertEqual(results[0]['title'], 'This is the title')
         self.assertEqual(results[0]['title'], 'This is the title')
         self.assertEqual(results[0]['url'], 'http://this.is.the.url')
         self.assertEqual(results[0]['url'], 'http://this.is.the.url')
-        self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.url')
+        self.assertEqual(results[0]['thumbnail_src'], 'https://thumbnail.url')
         self.assertEqual(results[0]['img_src'], 'http://image.url.jpg')
         self.assertEqual(results[0]['img_src'], 'http://image.url.jpg')
         self.assertEqual(results[0]['content'], '<b>test</b>')
         self.assertEqual(results[0]['content'], '<b>test</b>')