6 years ago · 1bed39e6cb
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -15,7 +15,7 @@
 
				 from json import loads
			
 
				 from datetime import datetime
			
 
				 from searx.url_utils import urlencode
			
 
				-from searx.utils import match_language
			
 
				+from searx.utils import match_language, html_to_text
			
 
				 
			
 
				 # engine dependent config
			
 
				 categories = ['videos']
			
@@ -59,7 +59,7 @@ def response(resp):
 
				     for res in search_res['list']:
			
 
				         title = res['title']
			
 
				         url = res['url']
			
 
				-        content = res['description']
			
 
				+        content = html_to_text(res['description'])
			
 
				         thumbnail = res['thumbnail_360_url']
			
 
				         publishedDate = datetime.fromtimestamp(res['created_time'], None)
			
 
				         embedded = embedded_url.format(videoid=res['id'])
			
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -75,15 +75,17 @@ def response(resp):
 
				 
			
 
				         # get thumbnails
			
 
				         script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
			
 
				-        id = result.xpath('.//div[@class="s"]//img/@id')[0]
			
 
				-        thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
			
 
				-                                     script)
			
 
				-        tmp = []
			
 
				-        if len(thumbnails_data) != 0:
			
 
				-            tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
			
 
				-        thumbnail = ''
			
 
				-        if len(tmp) != 0:
			
 
				-            thumbnail = tmp[-1]
			
 
				+        ids = result.xpath('.//div[@class="s"]//img/@id')
			
 
				+        if len(ids) > 0:
			
 
				+            thumbnails_data = \
			
 
				+                re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + ids[0],
			
 
				+                           script)
			
 
				+            tmp = []
			
 
				+            if len(thumbnails_data) != 0:
			
 
				+                tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
			
 
				+            thumbnail = ''
			
 
				+            if len(tmp) != 0:
			
 
				+                thumbnail = tmp[-1]
			
 
				 
			
 
				         # append result
			
 
				         results.append({'url': url,
			
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
@@ -67,12 +67,8 @@ def response(resp):
 
				             if videoid is not None:
			
 
				                 url = base_youtube_url + videoid
			
 
				                 thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg'
			
 
				-                title = video.get('title', {}).get('simpleText', videoid)
			
 
				-                description_snippet = video.get('descriptionSnippet', {})
			
 
				-                if 'runs' in description_snippet:
			
 
				-                    content = reduce(lambda a, b: a + b.get('text', ''), description_snippet.get('runs'), '')
			
 
				-                else:
			
 
				-                    content = description_snippet.get('simpleText', '')
			
 
				+                title = get_text_from_json(video.get('title', {}))
			
 
				+                content = get_text_from_json(video.get('descriptionSnippet', {}))
			
 
				                 embedded = embedded_url.format(videoid=videoid)
			
 
				 
			
 
				                 # append result
			
@@ -85,3 +81,10 @@ def response(resp):
 
				 
			
 
				     # return results
			
 
				     return results
			
 
				+
			
 
				+
			
 
				+def get_text_from_json(element):
			
 
				+    if 'runs' in element:
			
 
				+        return reduce(lambda a, b: a + b.get('text', ''), element.get('runs'), '')
			
 
				+    else:
			
 
				+        return element.get('simpleText', '')