google_news.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. #!/usr/bin/env python
  2. from urllib import urlencode
  3. from json import loads
  4. from datetime import datetime
  5. categories = ['news']
  6. url = 'https://ajax.googleapis.com/'
  7. search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
  8. paging = True
  9. language_support = True
  10. def request(query, params):
  11. offset = (params['pageno'] - 1) * 8
  12. language = 'en-US'
  13. if params['language'] != 'all':
  14. language = params['language'].replace('_', '-')
  15. params['url'] = search_url.format(offset=offset,
  16. query=urlencode({'q': query}),
  17. language=language)
  18. return params
  19. def response(resp):
  20. results = []
  21. search_res = loads(resp.text)
  22. if not search_res.get('responseData', {}).get('results'):
  23. return []
  24. for result in search_res['responseData']['results']:
  25. # S.149 (159), library.pdf
  26. # datetime.strptime("Mon, 10 Mar 2014 16:26:15 -0700",
  27. # "%a, %d %b %Y %H:%M:%S %z")
  28. # publishedDate = parse(result['publishedDate'])
  29. publishedDate = datetime.strptime(
  30. str.join(' ', result['publishedDate'].split(None)[0:5]),
  31. "%a, %d %b %Y %H:%M:%S")
  32. #utc_offset = timedelta(result['publishedDate'].split(None)[5])
  33. # local = utc + offset
  34. #publishedDate = publishedDate + utc_offset
  35. results.append({'url': result['unescapedUrl'],
  36. 'title': result['titleNoFormatting'],
  37. 'publishedDate': publishedDate,
  38. 'content': result['content']})
  39. return results