Browse Source

extract publishDate from vimeo

Thomas Pointhuber 11 years ago
parent
commit
993271bed3
2 changed files with 7 additions and 1 deletions
  1. 6 0
      searx/engines/vimeo.py
  2. 1 1
      searx/engines/yahoo_news.py

+ 6 - 0
searx/engines/vimeo.py

@@ -2,6 +2,8 @@ from urllib import urlencode
 from HTMLParser import HTMLParser
 from lxml import html
 from xpath import extract_text
+from datetime import datetime
+from dateutil import parser
 
 base_url = 'http://vimeo.com'
 search_url = base_url + '/search?{query}'
@@ -10,6 +12,7 @@ content_xpath = None
 title_xpath = None
 results_xpath = ''
 content_tpl = '<a href="{0}">  <img src="{2}"/> </a>'
+publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
 
 # the cookie set by vimeo contains all the following values,
 # but only __utma seems to be requiered
@@ -40,9 +43,12 @@ def response(resp):
         url = base_url + result.xpath(url_xpath)[0]
         title = p.unescape(extract_text(result.xpath(title_xpath)))
         thumbnail = extract_text(result.xpath(content_xpath)[0])
+        publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0]))
+
         results.append({'url': url,
                         'title': title,
                         'content': content_tpl.format(url, title, thumbnail),
                         'template': 'videos.html',
+                        'publishedDate': publishedDate,
                         'thumbnail': thumbnail})
     return results

+ 1 - 1
searx/engines/yahoo_news.py

@@ -53,7 +53,7 @@ def response(resp):
                     - timedelta(hours=int(timeNumbers[0]))\
                     - timedelta(minutes=int(timeNumbers[1]))
             else:
-                publishedDate =parser.parse(publishedDate)
+                publishedDate = parser.parse(publishedDate)
 
         if publishedDate.year == 1900:
             publishedDate = publishedDate.replace(year=datetime.now().year)