123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """`Tube Archivist`_ - *Your self hosted YouTube media server.*
- .. _Tube Archivist: https://www.tubearchivist.com
- This engine connects with a self-hosted instance of `Tube Archivist`_ to allow
- searching for your hosted videos.
- `Tube Archivist`_ (TA) requires authentication for all image loads via cookie
- authentication. What this means is that by default, SearXNG will have no way to
- pull images from TA (as there is no way to pass cookies in a URL string only).
- In the meantime while work is done on the TA side, this can be worked around by
- bypassing auth for images in TA by altering the default TA nginx file.
- This is located in the main tubearchivist docker container at::
- /etc/nginx/sites-available/default
- It is **strongly** recommended first setting up the intial connection and
- verying searching works first with broken images, and then attempting this
- change. This will limit any debugging to only images, rather than
- tokens/networking.
- Steps to enable **unauthenticated** metadata access for channels and videos:
- #. Perform any backups of TA before editing core configurations.
- #. Copy the contents of the file ``/etc/nginx/sites-available/default`` in the
- TA docker container
- #. Edit ``location /cache/videos`` and ``location /cache/channels``. Comment
- out the line ``auth_request /api/ping/;`` to ``# auth_request /api/ping/;``.
- #. Save the file to wherever you normally store your docker configuration.
- #. Mount this new configuration over the default configuration. With ``docker
- run``, this would be::
- -v ./your-new-config.yml:/etc/nginx/sites-available/default
- With ``docker compose``, this would be::
- - "./your-new-config.yml:/etc/nginx/sites-available/default:ro"
- #. Start the TA container.
- After these steps, double check that TA works as normal (nothing should be
- different on the TA side). Searching again should now show images.
- Configuration
- =============
- The engine has the following required settings:
- - :py:obj:`base_url`
- - :py:obj:`ta_token`
- Optional settings:
- - :py:obj:`ta_link_to_mp4`
- .. code:: yaml
- - name: tubearchivist
- engine: tubearchivist
- shortcut: tuba
- base_url:
- ta_token:
- ta_link_to_mp4: true
- Implementations
- ===============
- """
- from __future__ import annotations
- from urllib.parse import urlencode
- from dateutil.parser import parse
- from searx.utils import html_to_text, humanize_number
- from searx.result_types import EngineResults
- about = {
- # pylint: disable=line-too-long
- "website": 'https://www.tubearchivist.com',
- "official_api_documentation": 'https://docs.tubearchivist.com/api/introduction/',
- "use_official_api": True,
- "require_api_key": False,
- "results": 'JSON',
- }
- # engine dependent config
- categories = ["videos"]
- paging = True
- base_url = ""
- """Base URL of the Tube Archivist instance. Fill this in with your own
- Tube Archivist URL (``http://your-instance:port``)."""
- ta_token: str = ""
- """The API key to use for Authorization_ header. Can be found under:
- :menuselection:`Settings --> User --> Admin Interface`.
- .. _Authorization: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Authorization
- """
- ta_link_to_mp4: bool = False
- """Optional, if true SearXNG will link directly to the mp4 of the video to play
- in the browser. The default behavior is to link into TubeArchivist's interface
- directly."""
- def absolute_url(relative_url):
- return f'{base_url.rstrip("/")}{relative_url}'
- def init(_):
- if not base_url:
- raise ValueError('tubearchivist engine: base_url is unset')
- if not ta_token:
- raise ValueError('tubearchivist engine: ta_token is unset')
- def request(query, params):
- if not query:
- return False
- args = {'query': query}
- params['url'] = f"{base_url.rstrip('/')}/api/search?{urlencode(args)}"
- params['headers']['Authorization'] = f'Token {ta_token}'
- return params
- def response(resp) -> EngineResults:
- results = EngineResults()
- video_response(resp, results)
- return results
- def video_response(resp, results: EngineResults) -> None:
- """Parse video response from Tubearchivist instances."""
- json_data = resp.json()
- if 'results' not in json_data:
- return
- for channel_result in json_data['results']['channel_results']:
- channel_url = absolute_url(f'/channel/{channel_result["channel_id"]}')
- res = results.types.MainResult(
- url=channel_url,
- title=channel_result['channel_name'],
- content=html_to_text(channel_result['channel_description']),
- author=channel_result['channel_name'],
- views=humanize_number(channel_result['channel_subs']),
- thumbnail=f'{absolute_url(channel_result["channel_thumb_url"])}?auth={ta_token}',
- )
- results.add(result=res)
- for video_result in json_data['results']['video_results']:
- metadata = list(filter(None, [video_result['channel']['channel_name'], *video_result.get('tags', [])]))[:5]
- if ta_link_to_mp4:
- url = f'{base_url.rstrip("/")}{video_result["media_url"]}'
- else:
- url = f'{base_url.rstrip("/")}/?videoId={video_result["youtube_id"]}'
- # a type for the video.html template is not yet implemented
- # --> using LegacyResult
- kwargs = {
- 'template': 'videos.html',
- 'url': url,
- 'title': video_result['title'],
- 'content': html_to_text(video_result['description']),
- 'author': video_result['channel']['channel_name'],
- 'length': video_result['player']['duration_str'],
- 'views': humanize_number(video_result['stats']['view_count']),
- 'publishedDate': parse(video_result['published']),
- 'thumbnail': f'{absolute_url(video_result["vid_thumb_url"])}?auth={ta_token}',
- 'metadata': ' | '.join(metadata),
- }
- results.add(results.types.LegacyResult(**kwargs))
|