6 years ago · 2179079a91
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
@@ -16,7 +16,8 @@ from json import loads
 
				 from time import time
			
 
				 import re
			
 
				 from searx.engines import logger
			
 
				-from searx.url_utils import urlencode, unquote
			
 
				+from searx.url_utils import urlencode
			
 
				+from searx.utils import ecma_unescape, html_to_text
			
 
				 
			
 
				 logger = logger.getChild('flickr-noapi')
			
 
				 
			
@@ -75,11 +76,10 @@ def response(resp):
 
				 
			
 
				     for index in legend:
			
 
				         photo = model_export['main'][index[0]][int(index[1])][index[2]][index[3]][int(index[4])]
			
 
				-        author = unquote(photo.get('realname', ''))
			
 
				-        source = unquote(photo.get('username', '')) + ' @ Flickr'
			
 
				-        title = unquote(photo.get('title', ''))
			
 
				-        content = unquote(photo.get('description', ''))
			
 
				-
			
 
				+        author = ecma_unescape(photo.get('realname', ''))
			
 
				+        source = ecma_unescape(photo.get('username', '')) + ' @ Flickr'
			
 
				+        title = ecma_unescape(photo.get('title', ''))
			
 
				+        content = html_to_text(ecma_unescape(photo.get('description', '')))
			
 
				         img_src = None
			
 
				         # From the biggest to the lowest format
			
 
				         for image_size in image_sizes:
			
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -1,3 +1,4 @@
 
				+# -*- coding: utf-8 -*-
			
 
				 import csv
			
 
				 import hashlib
			
 
				 import hmac
			
@@ -44,6 +45,9 @@ logger = logger.getChild('utils')
 
				 blocked_tags = ('script',
			
 
				                 'style')
			
 
				 
			
 
				+ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
			
 
				+ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
			
 
				+
			
 
				 useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
			
 
				                              + "/data/useragents.json", 'r', encoding='utf-8').read())
			
 
				 
			
@@ -415,3 +419,18 @@ def to_string(obj):
 
				         return obj.__str__()
			
 
				     if hasattr(obj, '__repr__'):
			
 
				         return obj.__repr__()
			
 
				+
			
 
				+
			
 
				+def ecma_unescape(s):
			
 
				+    """
			
 
				+    python implementation of the unescape javascript function
			
 
				+
			
 
				+    https://www.ecma-international.org/ecma-262/6.0/#sec-unescape-string
			
 
				+    https://developer.mozilla.org/fr/docs/Web/JavaScript/Reference/Objets_globaux/unescape
			
 
				+    """
			
 
				+    # s = unicode(s)
			
 
				+    # "%u5409" becomes "吉"
			
 
				+    s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
			
 
				+    # "%20" becomes " ", "%F3" becomes "ó"
			
 
				+    s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
			
 
				+    return s
			
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -90,6 +90,13 @@ class TestUtils(SearxTestCase):
 
				         self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
			
 
				         self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
			
 
				 
			
 
				+    def test_ecma_unscape(self):
			
 
				+        self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
			
 
				+        self.assertEqual(utils.ecma_unescape('text using %xx: %F3'),
			
 
				+                         u'text using %xx: ó')
			
 
				+        self.assertEqual(utils.ecma_unescape('text using %u: %u5409, %u4E16%u754c'),
			
 
				+                         u'text using %u: 吉, 世界')
			
 
				+
			
 
				 
			
 
				 class TestHTMLTextExtractor(SearxTestCase):