peertube.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
  4. (more or less) the same REST API and the schema of the JSON result is identical.
  5. """
  6. import re
  7. from urllib.parse import urlencode
  8. from datetime import datetime
  9. from dateutil.parser import parse
  10. from dateutil.relativedelta import relativedelta
  11. import babel
  12. from searx.network import get # see https://github.com/searxng/searxng/issues/762
  13. from searx.locales import language_tag
  14. from searx.utils import html_to_text
  15. from searx.enginelib.traits import EngineTraits
  16. traits: EngineTraits
  17. about = {
  18. # pylint: disable=line-too-long
  19. "website": 'https://joinpeertube.org',
  20. "wikidata_id": 'Q50938515',
  21. "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
  22. "use_official_api": True,
  23. "require_api_key": False,
  24. "results": 'JSON',
  25. }
  26. # engine dependent config
  27. categories = ["videos"]
  28. paging = True
  29. base_url = "https://peer.tube"
  30. """Base URL of the Peertube instance. A list of instances is available at:
  31. - https://instances.joinpeertube.org/instances
  32. """
  33. time_range_support = True
  34. time_range_table = {
  35. 'day': relativedelta(),
  36. 'week': relativedelta(weeks=-1),
  37. 'month': relativedelta(months=-1),
  38. 'year': relativedelta(years=-1),
  39. }
  40. safesearch = True
  41. safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
  42. def minute_to_hm(minute):
  43. if isinstance(minute, int):
  44. return "%d:%02d" % (divmod(minute, 60))
  45. return None
  46. def request(query, params):
  47. """Assemble request for the Peertube API"""
  48. if not query:
  49. return False
  50. # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
  51. eng_lang = traits.get_language(params['searxng_locale'], None)
  52. params['url'] = (
  53. base_url.rstrip("/")
  54. + "/api/v1/search/videos?"
  55. + urlencode(
  56. {
  57. 'search': query,
  58. 'searchTarget': 'search-index', # Vidiversum
  59. 'resultType': 'videos',
  60. 'start': (params['pageno'] - 1) * 10,
  61. 'count': 10,
  62. # -createdAt: sort by date ascending / createdAt: date descending
  63. 'sort': '-match', # sort by *match descending*
  64. 'nsfw': safesearch_table[params['safesearch']],
  65. }
  66. )
  67. )
  68. if eng_lang is not None:
  69. params['url'] += '&languageOneOf[]=' + eng_lang
  70. params['url'] += '&boostLanguages[]=' + eng_lang
  71. if params['time_range'] in time_range_table:
  72. time = datetime.now().date() + time_range_table[params['time_range']]
  73. params['url'] += '&startDate=' + time.isoformat()
  74. return params
  75. def response(resp):
  76. return video_response(resp)
  77. def video_response(resp):
  78. """Parse video response from SepiaSearch and Peertube instances."""
  79. results = []
  80. json_data = resp.json()
  81. if 'data' not in json_data:
  82. return []
  83. for result in json_data['data']:
  84. metadata = [
  85. x
  86. for x in [
  87. result.get('channel', {}).get('displayName'),
  88. result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
  89. ', '.join(result.get('tags', [])),
  90. ]
  91. if x
  92. ]
  93. results.append(
  94. {
  95. 'url': result['url'],
  96. 'title': result['name'],
  97. 'content': html_to_text(result.get('description') or ''),
  98. 'author': result.get('account', {}).get('displayName'),
  99. 'length': minute_to_hm(result.get('duration')),
  100. 'template': 'videos.html',
  101. 'publishedDate': parse(result['publishedAt']),
  102. 'iframe_src': result.get('embedUrl'),
  103. 'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
  104. 'metadata': ' | '.join(metadata),
  105. }
  106. )
  107. return results
  108. def fetch_traits(engine_traits: EngineTraits):
  109. """Fetch languages from peertube's search-index source code.
  110. See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
  111. .. _8ed5c729 - Refactor and redesign client:
  112. https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
  113. .. _videoLanguages:
  114. https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
  115. """
  116. resp = get(
  117. 'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
  118. # the response from search-index repository is very slow
  119. timeout=60,
  120. )
  121. if not resp.ok: # type: ignore
  122. print("ERROR: response from peertube is not OK.")
  123. return
  124. js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) # type: ignore
  125. if not js_lang:
  126. print("ERROR: can't determine languages from peertube")
  127. return
  128. for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
  129. eng_tag = lang.group(1)
  130. if eng_tag == 'oc':
  131. # Occitanis not known by babel, its closest relative is Catalan
  132. # but 'ca' is already in the list of engine_traits.languages -->
  133. # 'oc' will be ignored.
  134. continue
  135. try:
  136. sxng_tag = language_tag(babel.Locale.parse(eng_tag))
  137. except babel.UnknownLocaleError:
  138. print("ERROR: %s is unknown by babel" % eng_tag)
  139. continue
  140. conflict = engine_traits.languages.get(sxng_tag)
  141. if conflict:
  142. if conflict != eng_tag:
  143. print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
  144. continue
  145. engine_traits.languages[sxng_tag] = eng_tag
  146. engine_traits.languages['zh_Hans'] = 'zh'
  147. engine_traits.languages['zh_Hant'] = 'zh'