tubearchivist.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """`Tube Archivist`_ - *Your self hosted YouTube media server.*
  3. .. _Tube Archivist: https://www.tubearchivist.com
  4. This engine connects with a self-hosted instance of `Tube Archivist`_ to allow
  5. searching for your hosted videos.
  6. `Tube Archivist`_ (TA) requires authentication for all image loads via cookie
  7. authentication. What this means is that by default, SearXNG will have no way to
  8. pull images from TA (as there is no way to pass cookies in a URL string only).
  9. In the meantime while work is done on the TA side, this can be worked around by
  10. bypassing auth for images in TA by altering the default TA nginx file.
  11. This is located in the main tubearchivist docker container at::
  12. /etc/nginx/sites-available/default
  13. It is **strongly** recommended first setting up the intial connection and
  14. verying searching works first with broken images, and then attempting this
  15. change. This will limit any debugging to only images, rather than
  16. tokens/networking.
  17. Steps to enable **unauthenticated** metadata access for channels and videos:
  18. #. Perform any backups of TA before editing core configurations.
  19. #. Copy the contents of the file ``/etc/nginx/sites-available/default`` in the
  20. TA docker container
  21. #. Edit ``location /cache/videos`` and ``location /cache/channels``. Comment
  22. out the line ``auth_request /api/ping/;`` to ``# auth_request /api/ping/;``.
  23. #. Save the file to wherever you normally store your docker configuration.
  24. #. Mount this new configuration over the default configuration. With ``docker
  25. run``, this would be::
  26. -v ./your-new-config.yml:/etc/nginx/sites-available/default
  27. With ``docker compose``, this would be::
  28. - "./your-new-config.yml:/etc/nginx/sites-available/default:ro"
  29. #. Start the TA container.
  30. After these steps, double check that TA works as normal (nothing should be
  31. different on the TA side). Searching again should now show images.
  32. Configuration
  33. =============
  34. The engine has the following required settings:
  35. - :py:obj:`base_url`
  36. - :py:obj:`ta_token`
  37. Optional settings:
  38. - :py:obj:`ta_link_to_mp4`
  39. .. code:: yaml
  40. - name: tubearchivist
  41. engine: tubearchivist
  42. shortcut: tuba
  43. base_url:
  44. ta_token:
  45. ta_link_to_mp4: true
  46. Implementations
  47. ===============
  48. """
  49. from __future__ import annotations
  50. from urllib.parse import urlencode
  51. from dateutil.parser import parse
  52. from searx.utils import html_to_text, humanize_number
  53. from searx.result_types import EngineResults
  54. about = {
  55. # pylint: disable=line-too-long
  56. "website": 'https://www.tubearchivist.com',
  57. "official_api_documentation": 'https://docs.tubearchivist.com/api/introduction/',
  58. "use_official_api": True,
  59. "require_api_key": False,
  60. "results": 'JSON',
  61. }
  62. # engine dependent config
  63. categories = ["videos"]
  64. paging = True
  65. base_url = ""
  66. """Base URL of the Tube Archivist instance. Fill this in with your own
  67. Tube Archivist URL (``http://your-instance:port``)."""
  68. ta_token: str = ""
  69. """The API key to use for Authorization_ header. Can be found under:
  70. :menuselection:`Settings --> User --> Admin Interface`.
  71. .. _Authorization: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Authorization
  72. """
  73. ta_link_to_mp4: bool = False
  74. """Optional, if true SearXNG will link directly to the mp4 of the video to play
  75. in the browser. The default behavior is to link into TubeArchivist's interface
  76. directly."""
  77. def absolute_url(relative_url):
  78. return f'{base_url.rstrip("/")}{relative_url}'
  79. def init(_):
  80. if not base_url:
  81. raise ValueError('tubearchivist engine: base_url is unset')
  82. if not ta_token:
  83. raise ValueError('tubearchivist engine: ta_token is unset')
  84. def request(query, params):
  85. if not query:
  86. return False
  87. args = {'query': query}
  88. params['url'] = f"{base_url.rstrip('/')}/api/search?{urlencode(args)}"
  89. params['headers']['Authorization'] = f'Token {ta_token}'
  90. return params
  91. def response(resp) -> EngineResults:
  92. results = EngineResults()
  93. video_response(resp, results)
  94. return results
  95. def video_response(resp, results: EngineResults) -> None:
  96. """Parse video response from Tubearchivist instances."""
  97. json_data = resp.json()
  98. if 'results' not in json_data:
  99. return
  100. for channel_result in json_data['results']['channel_results']:
  101. channel_url = absolute_url(f'/channel/{channel_result["channel_id"]}')
  102. res = results.types.MainResult(
  103. url=channel_url,
  104. title=channel_result['channel_name'],
  105. content=html_to_text(channel_result['channel_description']),
  106. author=channel_result['channel_name'],
  107. views=humanize_number(channel_result['channel_subs']),
  108. thumbnail=f'{absolute_url(channel_result["channel_thumb_url"])}?auth={ta_token}',
  109. )
  110. results.add(result=res)
  111. for video_result in json_data['results']['video_results']:
  112. metadata = list(filter(None, [video_result['channel']['channel_name'], *video_result.get('tags', [])]))[:5]
  113. if ta_link_to_mp4:
  114. url = f'{base_url.rstrip("/")}{video_result["media_url"]}'
  115. else:
  116. url = f'{base_url.rstrip("/")}/?videoId={video_result["youtube_id"]}'
  117. # a type for the video.html template is not yet implemented
  118. # --> using LegacyResult
  119. kwargs = {
  120. 'template': 'videos.html',
  121. 'url': url,
  122. 'title': video_result['title'],
  123. 'content': html_to_text(video_result['description']),
  124. 'author': video_result['channel']['channel_name'],
  125. 'length': video_result['player']['duration_str'],
  126. 'views': humanize_number(video_result['stats']['view_count']),
  127. 'publishedDate': parse(video_result['published']),
  128. 'thumbnail': f'{absolute_url(video_result["vid_thumb_url"])}?auth={ta_token}',
  129. 'metadata': ' | '.join(metadata),
  130. }
  131. results.add(results.types.LegacyResult(**kwargs))