digg.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """
  4. Digg (News, Social media)
  5. """
  6. # pylint: disable=missing-function-docstring
  7. from json import loads
  8. from urllib.parse import urlencode
  9. from datetime import datetime
  10. from lxml import html
  11. # about
  12. about = {
  13. "website": 'https://digg.com',
  14. "wikidata_id": 'Q270478',
  15. "official_api_documentation": None,
  16. "use_official_api": False,
  17. "require_api_key": False,
  18. "results": 'HTML',
  19. }
  20. # engine dependent config
  21. categories = ['news', 'social media']
  22. paging = True
  23. base_url = 'https://digg.com'
  24. # search-url
  25. search_url = base_url + (
  26. '/api/search/'
  27. '?{query}'
  28. '&from={position}'
  29. '&size=20'
  30. '&format=html'
  31. )
  32. def request(query, params):
  33. offset = (params['pageno'] - 1) * 20
  34. params['url'] = search_url.format(
  35. query = urlencode({'q': query}),
  36. position = offset,
  37. )
  38. return params
  39. def response(resp):
  40. results = []
  41. # parse results
  42. for result in loads(resp.text)['mapped']:
  43. # strip html tags and superfluous quotation marks from content
  44. content = html.document_fromstring(
  45. result['excerpt']
  46. ).text_content()
  47. # 'created': {'ISO': '2020-10-16T14:09:55Z', ...}
  48. published = datetime.strptime(
  49. result['created']['ISO'], '%Y-%m-%dT%H:%M:%SZ'
  50. )
  51. results.append({
  52. 'url': result['url'],
  53. 'title': result['title'],
  54. 'content' : content,
  55. 'template': 'videos.html',
  56. 'publishedDate': published,
  57. 'thumbnail': result['images']['thumbImage'],
  58. })
  59. return results