Browse Source

[mod] Peertube: re-engineered & upgrade to data_type: traits_v1

- fetch_traits(): Fetch languages from peertube's search-index source code.

  [mod] Include migration of the request methode from 'supported_languages'
        to 'traits' (EngineTraits) object.
  [fix] old supported_languages_url is no longer valid since the sources
        has been moved to a different path.

- fixed code to pass pylint
- request(): complete re-implementation based on the API docs [1]
- response(): complete re-implementation, adds serveral fields missed before
- add source code documentation

[1] https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 2 years ago
parent
commit
a7fe22770a
4 changed files with 195 additions and 75 deletions
  1. 19 0
      docs/src/searx.engines.peertube.rst
  2. 26 25
      searx/data/engine_traits.json
  3. 148 47
      searx/engines/peertube.py
  4. 2 3
      searx/settings.yml

+ 19 - 0
docs/src/searx.engines.peertube.rst

@@ -0,0 +1,19 @@
+.. _peertube engines:
+
+================
+Peertube Engines
+================
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+
+.. _peertube video engine:
+
+Peertube Video
+==============
+
+.. automodule:: searx.engines.peertube
+  :members:

+ 26 - 25
searx/data/engine_traits.json

@@ -1468,31 +1468,32 @@
   "peertube": {
     "all_locale": null,
     "custom": {},
-    "data_type": "supported_languages",
-    "languages": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ca": "ca",
+      "cs": "cs",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "eo": "eo",
+      "es": "es",
+      "eu": "eu",
+      "fi": "fi",
+      "fr": "fr",
+      "gd": "gd",
+      "it": "it",
+      "ja": "ja",
+      "nl": "nl",
+      "pl": "pl",
+      "pt": "pt",
+      "ru": "ru",
+      "sv": "sv",
+      "zh": "zh",
+      "zh_Hans": "zh",
+      "zh_Hant": "zh"
+    },
     "regions": {},
-    "supported_languages": [
-      "ca",
-      "cs",
-      "de",
-      "el",
-      "en",
-      "eo",
-      "es",
-      "eu",
-      "fi",
-      "fr",
-      "gd",
-      "it",
-      "ja",
-      "nl",
-      "oc",
-      "pl",
-      "pt",
-      "ru",
-      "sv",
-      "zh"
-    ]
+    "supported_languages": {}
   },
   "qwant": {
     "all_locale": null,
@@ -4531,4 +4532,4 @@
       "zh_cht"
     ]
   }
-}
+}

+ 148 - 47
searx/engines/peertube.py

@@ -1,18 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- peertube (Videos)
+# lint: pylint
+"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
+(more or less) the same REST API and the schema of the JSON result is identical.
+
 """
 
-from json import loads
-from datetime import datetime
+import re
 from urllib.parse import urlencode
+from datetime import datetime
+from dateutil.parser import parse
+from dateutil.relativedelta import relativedelta
+
+import babel
+
+from searx import network
+from searx.locales import language_tag
 from searx.utils import html_to_text
+from searx.enginelib.traits import EngineTraits
+
+traits: EngineTraits
 
-# about
 about = {
+    # pylint: disable=line-too-long
     "website": 'https://joinpeertube.org',
     "wikidata_id": 'Q50938515',
-    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
     "use_official_api": True,
     "require_api_key": False,
     "results": 'JSON',
@@ -22,66 +34,155 @@ about = {
 categories = ["videos"]
 paging = True
 base_url = "https://peer.tube"
-supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
+"""Base URL of the Peertube instance.  A list of instances is available at:
+
+- https://instances.joinpeertube.org/instances
+"""
+
+time_range_support = True
+time_range_table = {
+    'day': relativedelta(),
+    'week': relativedelta(weeks=-1),
+    'month': relativedelta(months=-1),
+    'year': relativedelta(years=-1),
+}
+
+safesearch = True
+safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
+
+
+def minute_to_hm(minute):
+    if isinstance(minute, int):
+        return "%d:%02d" % (divmod(minute, 60))
+    return None
 
 
-# do search-request
 def request(query, params):
-    sanitized_url = base_url.rstrip("/")
-    pageno = (params["pageno"] - 1) * 15
-    search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}"
-    query_dict = {"search": query}
-    language = params["language"].split("-")[0]
-    if "all" != language and language in supported_languages:
-        query_dict["languageOneOf"] = language
-    params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
-    return params
+    """Assemble request for the Peertube API"""
+
+    if not query:
+        return False
+
+    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], None)
+
+    params['url'] = (
+        base_url.rstrip("/")
+        + "/api/v1/search/videos?"
+        + urlencode(
+            {
+                'search': query,
+                'searchTarget': 'search-index',  # Vidiversum
+                'resultType': 'videos',
+                'start': (params['pageno'] - 1) * 10,
+                'count': 10,
+                # -createdAt: sort by date ascending / createdAt: date descending
+                'sort': '-match',  # sort by *match descending*
+                'nsfw': safesearch_table[params['safesearch']],
+            }
+        )
+    )
+
+    if eng_lang is not None:
+        params['url'] += '&languageOneOf[]=' + eng_lang
+        params['url'] += '&boostLanguages[]=' + eng_lang
 
+    if params['time_range'] in time_range_table:
+        time = datetime.now().date() + time_range_table[params['time_range']]
+        params['url'] += '&startDate=' + time.isoformat()
 
-def _get_offset_from_pageno(pageno):
-    return (pageno - 1) * 15 + 1
+    return params
 
 
-# get response from search-request
 def response(resp):
-    sanitized_url = base_url.rstrip("/")
+    return video_response(resp)
+
+
+def video_response(resp):
+    """Parse video response from SepiaSearch and Peertube instances."""
     results = []
 
-    search_res = loads(resp.text)
+    json_data = resp.json()
 
-    # return empty array if there are no results
-    if "data" not in search_res:
+    if 'data' not in json_data:
         return []
 
-    # parse results
-    for res in search_res["data"]:
-        title = res["name"]
-        url = sanitized_url + "/videos/watch/" + res["uuid"]
-        description = res["description"]
-        if description:
-            content = html_to_text(res["description"])
-        else:
-            content = ""
-        thumbnail = sanitized_url + res["thumbnailPath"]
-        publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+    for result in json_data['data']:
+        metadata = [
+            x
+            for x in [
+                result.get('channel', {}).get('displayName'),
+                result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
+                ', '.join(result.get('tags', [])),
+            ]
+            if x
+        ]
 
         results.append(
             {
-                "template": "videos.html",
-                "url": url,
-                "title": title,
-                "content": content,
-                "publishedDate": publishedDate,
-                "iframe_src": sanitized_url + res["embedPath"],
-                "thumbnail": thumbnail,
+                'url': result['url'],
+                'title': result['name'],
+                'content': html_to_text(result.get('description') or ''),
+                'author': result.get('account', {}).get('displayName'),
+                'length': minute_to_hm(result.get('duration')),
+                'template': 'videos.html',
+                'publishedDate': parse(result['publishedAt']),
+                'iframe_src': result.get('embedUrl'),
+                'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
+                'metadata': ' | '.join(metadata),
             }
         )
 
-    # return results
     return results
 
 
-def _fetch_supported_languages(resp):
-    videolanguages = resp.json()
-    peertube_languages = list(videolanguages.keys())
-    return peertube_languages
+def fetch_traits(engine_traits: EngineTraits):
+    """Fetch languages from peertube's search-index source code.
+
+    See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
+
+    .. _8ed5c729 - Refactor and redesign client:
+       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
+    .. _videoLanguages:
+       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
+    """
+
+    resp = network.get(
+        'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
+        # the response from search-index repository is very slow
+        timeout=60,
+    )
+
+    if not resp.ok:
+        print("ERROR: response from peertube is not OK.")
+        return
+
+    js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
+    if not js_lang:
+        print("ERROR: can't determine languages from peertube")
+        return
+
+    for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
+        try:
+            eng_tag = lang.group(1)
+            if eng_tag == 'oc':
+                # Occitanis not known by babel, its closest relative is Catalan
+                # but 'ca' is already in the list of engine_traits.languages -->
+                # 'oc' will be ignored.
+                continue
+
+            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
+
+        except babel.UnknownLocaleError:
+            print("ERROR: %s is unknown by babel" % eng_tag)
+            continue
+
+        conflict = engine_traits.languages.get(sxng_tag)
+        if conflict:
+            if conflict != eng_tag:
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
+            continue
+        engine_traits.languages[sxng_tag] = eng_tag
+
+    engine_traits.languages['zh_Hans'] = 'zh'
+    engine_traits.languages['zh_Hant'] = 'zh'

+ 2 - 3
searx/settings.yml

@@ -1758,9 +1758,8 @@ engines:
     engine: peertube
     shortcut: ptb
     paging: true
-    # https://instances.joinpeertube.org/instances
-    base_url: https://peertube.biz/
-    # base_url: https://tube.tardis.world/
+    # alternatives see: https://instances.joinpeertube.org/instances
+    # base_url: https://tube.4aem.com
     categories: videos
     disabled: true
     timeout: 6.0