Browse Source

[fix] dailymotion engine : no more html tag in the description

Dalf 11 years ago
parent
commit
49c85fce51
2 changed files with 10 additions and 3 deletions
  1. 1 0
      engines.cfg_sample
  2. 9 3
      searx/engines/dailymotion.py

+ 1 - 0
engines.cfg_sample

@@ -82,5 +82,6 @@ categories = videos
 
 [dailymotion]
 engine = dailymotion
+locale = en_US
 categories = videos
 

+ 9 - 3
searx/engines/dailymotion.py

@@ -1,16 +1,17 @@
 from urllib import urlencode
+from lxml import html
 from json import loads
 from cgi import escape
 
 categories = ['videos']
-localization = 'en'
+locale = 'en_US'
 
 # see http://www.dailymotion.com/doc/api/obj-video.html
 search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
 
 def request(query, params):
     global search_url
-    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': localization }))
+    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
     return params
 
 
@@ -27,6 +28,11 @@ def response(resp):
         else:
             content = ''
         if res['description']:
-            content += escape(res['description'][:500])
+            description = text_content_from_html(res['description'])
+            content += description[:500]
         results.append({'url': url, 'title': title, 'content': content})
     return results
+
+def text_content_from_html(html_string):
+    desc_html = html.fragment_fromstring(html_string, create_parent=True)
+    return desc_html.text_content()