digg.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """
  4. Digg (News, Social media)
  5. """
  6. from json import loads
  7. from urllib.parse import urlencode
  8. from datetime import datetime
  9. from lxml import html
  10. # about
  11. about = {
  12. "website": 'https://digg.com',
  13. "wikidata_id": 'Q270478',
  14. "official_api_documentation": None,
  15. "use_official_api": False,
  16. "require_api_key": False,
  17. "results": 'HTML',
  18. }
  19. # engine dependent config
  20. categories = ['news', 'social media']
  21. paging = True
  22. base_url = 'https://digg.com'
  23. # search-url
  24. search_url = base_url + (
  25. '/api/search/'
  26. '?{query}'
  27. '&from={position}'
  28. '&size=20'
  29. '&format=html'
  30. )
  31. def request(query, params):
  32. offset = (params['pageno'] - 1) * 20
  33. params['url'] = search_url.format(
  34. query = urlencode({'q': query}),
  35. position = offset,
  36. )
  37. return params
  38. def response(resp):
  39. results = []
  40. # parse results
  41. for result in loads(resp.text)['mapped']:
  42. # strip html tags and superfluous quotation marks from content
  43. content = html.document_fromstring(
  44. result['excerpt']
  45. ).text_content()
  46. # 'created': {'ISO': '2020-10-16T14:09:55Z', ...}
  47. published = datetime.strptime(
  48. result['created']['ISO'], '%Y-%m-%dT%H:%M:%SZ'
  49. )
  50. results.append({
  51. 'url': result['url'],
  52. 'title': result['title'],
  53. 'content' : content,
  54. 'template': 'videos.html',
  55. 'publishedDate': published,
  56. 'thumbnail': result['images']['thumbImage'],
  57. })
  58. return results