bpb.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """BPB refers to ``Bundeszentrale für poltische Bildung``, which is a German
  4. governmental institution aiming to reduce misinformation by providing resources
  5. about politics and history.
  6. """
  7. from datetime import datetime
  8. from urllib.parse import urlencode
  9. about = {
  10. 'website': "https://www.bpb.de",
  11. 'official_api_documentation': None,
  12. 'use_official_api': False,
  13. 'require_api_key': False,
  14. 'results': 'JSON',
  15. 'language': 'de',
  16. }
  17. paging = True
  18. categories = ['general']
  19. base_url = "https://www.bpb.de"
  20. def request(query, params):
  21. args = {
  22. 'query[term]': query,
  23. 'page': params['pageno'] - 1,
  24. 'sort[direction]': 'descending',
  25. 'payload[nid]': 65350,
  26. }
  27. params['url'] = f"{base_url}/bpbapi/filter/search?{urlencode(args)}"
  28. return params
  29. def response(resp):
  30. results = []
  31. json_resp = resp.json()
  32. for result in json_resp['teaser']:
  33. img_src = None
  34. if result['teaser']['image']:
  35. img_src = base_url + result['teaser']['image']['sources'][-1]['url']
  36. metadata = result['extension']['overline']
  37. authors = ', '.join(author['name'] for author in result['extension'].get('authors', []))
  38. if authors:
  39. metadata += f" | {authors}"
  40. publishedDate = None
  41. if result['extension'].get('publishingDate'):
  42. publishedDate = datetime.utcfromtimestamp(result['extension']['publishingDate'])
  43. results.append(
  44. {
  45. 'url': base_url + result['teaser']['link']['url'],
  46. 'title': result['teaser']['title'],
  47. 'content': result['teaser']['text'],
  48. 'img_src': img_src,
  49. 'publishedDate': publishedDate,
  50. 'metadata': metadata,
  51. }
  52. )
  53. return results