vimeo.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. from urllib import urlencode
  2. from HTMLParser import HTMLParser
  3. from xpath import extract_text
  4. from lxml import html
  5. base_url = 'http://vimeo.com'
  6. search_url = base_url + '/search?{query}'
  7. url_xpath = None
  8. content_xpath = None
  9. title_xpath = None
  10. results_xpath = ''
  11. # the cookie set by vimeo contains all the following values, but only __utma seems to be requiered
  12. cookie = {
  13. #'vuid':'918282893.1027205400'
  14. # 'ab_bs':'%7B%223%22%3A279%7D'
  15. '__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0'
  16. # '__utmb':'18302654.1.10.1388942090'
  17. #, '__utmc':'18302654'
  18. #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
  19. #, '__utml':'search'
  20. }
  21. def request(query, params):
  22. params['url'] = search_url.format(query=urlencode({'q' :query}))
  23. params['cookies'] = cookie
  24. return params
  25. def response(resp):
  26. results = []
  27. dom = html.fromstring(resp.text)
  28. p = HTMLParser()
  29. for result in dom.xpath(results_xpath):
  30. url = base_url + result.xpath(url_xpath)[0]
  31. title = p.unescape(extract_text(result.xpath(title_xpath)))
  32. content = '<a href="{0}"> <img src="{2}"/> </a>'.format(url, title, extract_text(result.xpath(content_xpath)[0]))
  33. results.append({'url': url, 'title': title, 'content': content})
  34. return results