vimeo.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. from urllib import urlencode
  2. from HTMLParser import HTMLParser
  3. from xpath import extract_text
  4. from lxml import html
  5. base_url = 'http://vimeo.com'
  6. search_url = base_url + '/search?{query}'
  7. # the cookie set by vime contains all the following values, but only __utma seems to be requiered
  8. Cookie = {
  9. #'vuid':'918282893.1027205400'
  10. # 'ab_bs':'%7B%223%22%3A279%7D'
  11. '__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0'
  12. # '__utmb':'18302654.1.10.1388942090'
  13. #, '__utmc':'18302654'
  14. #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
  15. #, '__utml':'search'
  16. }
  17. def request(query, params):
  18. params['url'] = search_url.format(query=urlencode({'q' :query}))
  19. print params['url']
  20. params['cookies'] = Cookie
  21. return params
  22. def response(resp):
  23. results = []
  24. dom = html.fromstring(resp.text)
  25. p = HTMLParser()
  26. for result in dom.xpath(results_xpath):
  27. url = base_url + result.xpath(url_xpath)[0]
  28. title = p.unescape(extract_text(result.xpath(title_xpath)))
  29. content = '<a href="{0}"> <img src="{2}"/> </a>'.format(url, title, extract_text(result.xpath(content_xpath)[0]))
  30. results.append({'url': url, 'title': title, 'content': content})
  31. return results