Browse Source

[fix] fixes google play engines and adds thumbnails to their results (#1612)

fix google play apps, google play apps, google play music engines

xpath engine: thumbnail_xpath can define an optional thumbnail
Venca24 5 years ago
parent
commit
87baa74a86
2 changed files with 37 additions and 17 deletions
  1. 16 2
      searx/engines/xpath.py
  2. 21 15
      searx/settings.yml

+ 16 - 2
searx/engines/xpath.py

@@ -7,6 +7,7 @@ search_url = None
 url_xpath = None
 content_xpath = None
 title_xpath = None
+thumbnail_xpath = False
 paging = False
 suggestion_xpath = ''
 results_xpath = ''
@@ -40,7 +41,9 @@ def extract_text(xpath_results):
         return ''.join(xpath_results)
     else:
         # it's a element
-        text = html.tostring(xpath_results, encoding='unicode', method='text', with_tail=False)
+        text = html.tostring(
+            xpath_results, encoding='unicode', method='text', with_tail=False
+        )
         text = text.strip().replace('\n', ' ')
         return ' '.join(text.split())
 
@@ -105,7 +108,18 @@ def response(resp):
             url = extract_url(result.xpath(url_xpath), search_url)
             title = extract_text(result.xpath(title_xpath))
             content = extract_text(result.xpath(content_xpath))
-            results.append({'url': url, 'title': title, 'content': content})
+            tmp_result = {'url': url, 'title': title, 'content': content}
+
+            # add thumbnail if available
+            thumbnail = None
+            if thumbnail_xpath:
+                thumbnail = extract_url(
+                    result.xpath(thumbnail_xpath), search_url
+                )
+            if thumbnail:
+                tmp_result['img_src'] = thumbnail
+
+            results.append(tmp_result)
     else:
         for url, title, content in zip(
             (extract_url(x, search_url) for

+ 21 - 15
searx/settings.yml

@@ -311,31 +311,37 @@ engines:
     shortcut : gos
 
   - name : google play apps
-    engine        : xpath
-    search_url    : https://play.google.com/store/search?q={query}&c=apps
-    url_xpath     : //a[@class="title"]/@href
-    title_xpath   : //a[@class="title"]
-    content_xpath : //a[@class="subtitle"]
+    engine : xpath
+    search_url : https://play.google.com/store/search?q={query}&c=apps
+    results_xpath : '//div[@class="WHE7ib mpg5gc"]'
+    title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a'
+    url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href'
+    content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]'
+    thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src'
     categories : files
     shortcut : gpa
     disabled : True
 
   - name : google play movies
-    engine        : xpath
-    search_url    : https://play.google.com/store/search?q={query}&c=movies
-    url_xpath     : //a[@class="title"]/@href
-    title_xpath   : //a[@class="title"]/@title
-    content_xpath : //a[contains(@class, "subtitle")]
+    engine : xpath
+    search_url : https://play.google.com/store/search?q={query}&c=movies
+    results_xpath : '//div[@class="WHE7ib mpg5gc"]'
+    title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a'
+    url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href'
+    content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]'
+    thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src'
     categories : videos
     shortcut : gpm
     disabled : True
 
   - name : google play music
-    engine        : xpath
-    search_url    : https://play.google.com/store/search?q={query}&c=music
-    url_xpath     : //a[@class="title"]/@href
-    title_xpath   : //a[@class="title"]
-    content_xpath : //a[@class="subtitle"]
+    engine : xpath
+    search_url : https://play.google.com/store/search?q={query}&c=music
+    results_xpath : '//div[@class="WHE7ib mpg5gc"]'
+    title_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a'
+    url_xpath : './/div[@class="RZEgze"]//div[@title and not(@title="")]/a/@href'
+    content_xpath : './/div[@class="RZEgze"]//a[@class="mnKHRc"]'
+    thumbnail_xpath : './/div[@class="uzcko"]/div/span[1]/img/@data-src'
     categories : music
     shortcut : gps
     disabled : True