Browse Source

Merge pull request #1444 from Venca24/devel_google_videos

[fix] google videos engine
Noémi Ványi 6 years ago
parent
commit
abcbcec0b5
2 changed files with 41 additions and 10 deletions
  1. 18 6
      searx/engines/google_videos.py
  2. 23 4
      tests/unit/engines/test_google_videos.py

+ 18 - 6
searx/engines/google_videos.py

@@ -7,7 +7,7 @@
  @using-api   no
  @results     HTML
  @stable      no
- @parse       url, title, content
+ @parse       url, title, content, thumbnail
 """
 
 from datetime import date, timedelta
@@ -15,7 +15,7 @@ from json import loads
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.url_utils import urlencode
-
+import re
 
 # engine dependent config
 categories = ['videos']
@@ -25,7 +25,7 @@ time_range_support = True
 number_of_results = 10
 
 search_url = 'https://www.google.com/search'\
-    '?{query}'\
+    '?q={query}'\
     '&tbm=vid'\
     '&{search_options}'
 time_range_attr = "qdr:{range}"
@@ -69,15 +69,27 @@ def response(resp):
     # parse results
     for result in dom.xpath('//div[@class="g"]'):
 
-        title = extract_text(result.xpath('.//h3/a'))
-        url = result.xpath('.//h3/a/@href')[0]
+        title = extract_text(result.xpath('.//h3'))
+        url = result.xpath('.//div[@class="r"]/a/@href')[0]
         content = extract_text(result.xpath('.//span[@class="st"]'))
 
+        # get thumbnails
+        script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
+        id = result.xpath('.//div[@class="s"]//img/@id')[0]
+        thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
+                                     script)
+        tmp = []
+        if len(thumbnails_data) != 0:
+            tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
+        thumbnail = ''
+        if len(tmp) != 0:
+            thumbnail = tmp[-1]
+
         # append result
         results.append({'url': url,
                         'title': title,
                         'content': content,
-                        'thumbnail': '',
+                        'thumbnail': thumbnail,
                         'template': 'videos.html'})
 
     return results

+ 23 - 4
tests/unit/engines/test_google_videos.py

@@ -30,16 +30,34 @@ class TestGoogleVideosEngine(SearxTestCase):
         <div>
             <div>
                 <div class="g">
-                    <div>
-                        <h3><a href="url_1">Title 1</h3>
+                    <div class="r">
+                        <a href="url_1"><h3>Title 1</h3></a>
+                    </div>
+                    <div class="s">
+                        <div>
+                            <a>
+                                <g-img>
+                                    <img id="vidthumb1">
+                                </g-img>
+                            </a>
+                        </div>
                     </div>
                     <div>
                         <span class="st">Content 1</span>
                     </div>
                 </div>
                 <div class="g">
-                    <div>
-                        <h3><a href="url_2">Title 2</h3>
+                    <div class="r">
+                        <a href="url_2"><h3>Title 2</h3></a>
+                    </div>
+                    <div class="s">
+                        <div>
+                            <a>
+                                <g-img>
+                                    <img id="vidthumb2">
+                                </g-img>
+                            </a>
+                        </div>
                     </div>
                     <div>
                         <span class="st">Content 2</span>
@@ -47,6 +65,7 @@ class TestGoogleVideosEngine(SearxTestCase):
                 </div>
             </div>
         </div>
+        <script>function _setImagesSrc(c,d,e){}</script>
         """
         response = mock.Mock(text=html)
         results = google_videos.response(response)