Browse Source

[FIX] google videos thumbnails

Venca24 6 years ago
parent
commit
cf26aba93b
1 changed files with 17 additions and 3 deletions
  1. 17 3
      searx/engines/google_videos.py

+ 17 - 3
searx/engines/google_videos.py

@@ -7,15 +7,16 @@
  @using-api   no
  @using-api   no
  @results     HTML
  @results     HTML
  @stable      no
  @stable      no
- @parse       url, title, content
+ @parse       url, title, content, thumbnail
 """
 """
 
 
 from datetime import date, timedelta
 from datetime import date, timedelta
 from json import loads
 from json import loads
 from lxml import html
 from lxml import html
+from searx.engines import logger
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.url_utils import urlencode
 from searx.url_utils import urlencode
-
+import re
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']
@@ -73,11 +74,24 @@ def response(resp):
         url = result.xpath('.//div[@class="r"]/a/@href')[0]
         url = result.xpath('.//div[@class="r"]/a/@href')[0]
         content = extract_text(result.xpath('.//span[@class="st"]'))
         content = extract_text(result.xpath('.//span[@class="st"]'))
 
 
+        # get thumbnails
+        script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
+        id = result.xpath('.//div[@class="s"]//img/@id')[0]
+        thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
+                                     script)
+        logger.debug('google video engine: ' + id + ' matched ' + str(len(thumbnails_data)) + ' times (thumbnail)')
+        tmp = []
+        if len(thumbnails_data) != 0:
+            tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
+        thumbnail = ''
+        if len(tmp) != 0:
+            thumbnail = tmp[-1]
+
         # append result
         # append result
         results.append({'url': url,
         results.append({'url': url,
                         'title': title,
                         'title': title,
                         'content': content,
                         'content': content,
-                        'thumbnail': '',
+                        'thumbnail': thumbnail,
                         'template': 'videos.html'})
                         'template': 'videos.html'})
 
 
     return results
     return results