tagesschau.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """ARD: `Tagesschau API`_
  4. The Tagesschau is a news program of the ARD. Via the `Tagesschau API`_, current
  5. news and media reports are available in JSON format. The `Bundesstelle für Open
  6. Data`_ offers a `OpenAPI`_ portal at bundDEV_ where APIs are documented an can
  7. be tested.
  8. This SearXNG engine uses the `/api2u/search`_ API.
  9. .. _/api2u/search: http://tagesschau.api.bund.dev/
  10. .. _bundDEV: https://bund.dev/apis
  11. .. _Bundesstelle für Open Data: https://github.com/bundesAPI
  12. .. _Tagesschau API: https://github.com/AndreasFischer1985/tagesschau-api/blob/main/README_en.md
  13. .. _OpenAPI: https://swagger.io/specification/
  14. """
  15. from typing import TYPE_CHECKING
  16. from datetime import datetime
  17. from urllib.parse import urlencode
  18. import re
  19. if TYPE_CHECKING:
  20. import logging
  21. logger: logging.Logger
  22. about = {
  23. 'website': "https://tagesschau.de",
  24. 'wikidata_id': "Q703907",
  25. 'official_api_documentation': None,
  26. 'use_official_api': True,
  27. 'require_api_key': False,
  28. 'results': 'JSON',
  29. 'language': 'de',
  30. }
  31. categories = ['general', 'news']
  32. paging = True
  33. results_per_page = 10
  34. base_url = "https://www.tagesschau.de"
  35. use_source_url = True
  36. """When set to false, display URLs from Tagesschau, and not the actual source
  37. (e.g. NDR, WDR, SWR, HR, ...)
  38. .. note::
  39. The actual source may contain additional content, such as commentary, that is
  40. not displayed in the Tagesschau.
  41. """
  42. def request(query, params):
  43. args = {
  44. 'searchText': query,
  45. 'pageSize': results_per_page,
  46. 'resultPage': params['pageno'] - 1,
  47. }
  48. params['url'] = f"{base_url}/api2u/search?{urlencode(args)}"
  49. return params
  50. def response(resp):
  51. results = []
  52. json = resp.json()
  53. for item in json['searchResults']:
  54. item_type = item.get('type')
  55. if item_type in ('story', 'webview'):
  56. results.append(_story(item))
  57. elif item_type == 'video':
  58. results.append(_video(item))
  59. else:
  60. logger.error("unknow result type: %s", item_type)
  61. return results
  62. def _story(item):
  63. return {
  64. 'title': item['title'],
  65. 'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
  66. 'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
  67. 'content': item['firstSentence'],
  68. 'url': item['shareURL'] if use_source_url else item['detailsweb'],
  69. }
  70. def _video(item):
  71. streams = item['streams']
  72. video_url = streams.get('h264s') or streams.get('h264m') or streams.get('h264l') or streams.get('h264xl')
  73. title = item['title']
  74. if "_vapp.mxf" in title:
  75. title = title.replace("_vapp.mxf", "")
  76. title = re.sub(r"APP\d+ (FC-)?", "", title, count=1)
  77. return {
  78. 'template': 'videos.html',
  79. 'title': title,
  80. 'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
  81. 'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
  82. 'content': item.get('firstSentence', ''),
  83. 'iframe_src': video_url,
  84. 'url': video_url,
  85. }