google_news.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. #!/usr/bin/env python
  2. from urllib import urlencode
  3. from json import loads
  4. from datetime import datetime, timedelta
  5. categories = ['news']
  6. url = 'https://ajax.googleapis.com/'
  7. search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
  8. paging = True
  9. language_support = True
  10. def request(query, params):
  11. offset = (params['pageno'] - 1) * 8
  12. language = 'en-US'
  13. if params['language'] != 'all':
  14. language = params['language'].replace('_', '-')
  15. params['url'] = search_url.format(offset=offset,
  16. query=urlencode({'q': query}),
  17. language=language)
  18. return params
  19. def response(resp):
  20. results = []
  21. search_res = loads(resp.text)
  22. if not search_res.get('responseData', {}).get('results'):
  23. return []
  24. for result in search_res['responseData']['results']:
  25. # S.149 (159), library.pdf
  26. # datetime.strptime("Mon, 10 Mar 2014 16:26:15 -0700", "%a, %d %b %Y %H:%M:%S %z")
  27. # publishedDate = parse(result['publishedDate'])
  28. publishedDate = datetime.strptime(str.join(' ',result['publishedDate'].split(None)[0:5]), "%a, %d %b %Y %H:%M:%S")
  29. #utc_offset = timedelta(result['publishedDate'].split(None)[5]) # local = utc + offset
  30. #publishedDate = publishedDate + utc_offset
  31. results.append({'url': result['unescapedUrl'],
  32. 'title': result['titleNoFormatting'],
  33. 'publishedDate': publishedDate,
  34. 'content': result['content']})
  35. return results