reddit.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Reddit
  4. """
  5. import json
  6. from datetime import datetime
  7. from urllib.parse import urlencode, urljoin, urlparse
  8. # about
  9. about = {
  10. "website": 'https://www.reddit.com/',
  11. "wikidata_id": 'Q1136',
  12. "official_api_documentation": 'https://www.reddit.com/dev/api',
  13. "use_official_api": True,
  14. "require_api_key": False,
  15. "results": 'JSON',
  16. }
  17. # engine dependent config
  18. categories = ['social media']
  19. page_size = 25
  20. # search-url
  21. base_url = 'https://www.reddit.com/'
  22. search_url = base_url + 'search.json?{query}'
  23. def request(query, params):
  24. query = urlencode({'q': query, 'limit': page_size})
  25. params['url'] = search_url.format(query=query)
  26. return params
  27. def response(resp):
  28. img_results = []
  29. text_results = []
  30. search_results = json.loads(resp.text)
  31. # return empty array if there are no results
  32. if 'data' not in search_results:
  33. return []
  34. posts = search_results.get('data', {}).get('children', [])
  35. # process results
  36. for post in posts:
  37. data = post['data']
  38. # extract post information
  39. params = {'url': urljoin(base_url, data['permalink']), 'title': data['title']}
  40. # if thumbnail field contains a valid URL, we need to change template
  41. thumbnail = data['thumbnail']
  42. url_info = urlparse(thumbnail)
  43. # netloc & path
  44. if url_info[1] != '' and url_info[2] != '':
  45. params['img_src'] = data['url']
  46. params['thumbnail_src'] = thumbnail
  47. params['template'] = 'images.html'
  48. img_results.append(params)
  49. else:
  50. created = datetime.fromtimestamp(data['created_utc'])
  51. content = data['selftext']
  52. if len(content) > 500:
  53. content = content[:500] + '...'
  54. params['content'] = content
  55. params['publishedDate'] = created
  56. text_results.append(params)
  57. # show images first and text results second
  58. return img_results + text_results