reddit.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. """
  2. Reddit
  3. @website https://www.reddit.com/
  4. @provide-api yes (https://www.reddit.com/dev/api)
  5. @using-api yes
  6. @results JSON
  7. @stable yes
  8. @parse url, title, content, thumbnail, publishedDate
  9. """
  10. import json
  11. from cgi import escape
  12. from urllib import urlencode
  13. from urlparse import urlparse
  14. from datetime import datetime
  15. from searx.utils import searx_useragent
  16. # engine dependent config
  17. categories = ['general', 'images', 'news', 'social media']
  18. page_size = 25
  19. # search-url
  20. search_url = 'https://www.reddit.com/search.json?{query}'
  21. # do search-request
  22. def request(query, params):
  23. query = urlencode({'q': query,
  24. 'limit': page_size})
  25. params['url'] = search_url.format(query=query)
  26. # using searx User-Agent
  27. params['headers']['User-Agent'] = searx_useragent()
  28. return params
  29. # get response from search-request
  30. def response(resp):
  31. img_results = []
  32. text_results = []
  33. search_results = json.loads(resp.text)
  34. # return empty array if there are no results
  35. if 'data' not in search_results:
  36. return []
  37. posts = search_results.get('data', {}).get('children', [])
  38. # process results
  39. for post in posts:
  40. data = post['data']
  41. # extract post information
  42. params = {
  43. 'url': data['url'],
  44. 'title': data['title']
  45. }
  46. # if thumbnail field contains a valid URL, we need to change template
  47. thumbnail = data['thumbnail']
  48. url_info = urlparse(thumbnail)
  49. # netloc & path
  50. if url_info[1] != '' and url_info[2] != '':
  51. params['thumbnail_src'] = thumbnail
  52. params['template'] = 'images.html'
  53. img_results.append(params)
  54. else:
  55. created = datetime.fromtimestamp(data['created_utc'])
  56. content = escape(data['selftext'])
  57. if len(content) > 500:
  58. content = content[:500] + '...'
  59. params['content'] = content
  60. params['publishedDate'] = created
  61. text_results.append(params)
  62. # show images first and text results second
  63. return img_results + text_results