Browse Source

[mod] SepiaSearch: re-engineered & upgrade to data_type: traits_v1

- fetch_traits() SepiaSearch and Peertube are using identical languages.
  Replace module's dictionary `supported_languages` by `engine.traits.languages`
  (data_type: `traits_v1`).
- fixed code to pass pylint
- request(): add argument boostLanguages
- response(): is replaced by peertube's video_response() function, which adds
  metadata from channel name, host & tags

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 2 years ago
parent
commit
057e9bc1d1
3 changed files with 83 additions and 65 deletions
  1. 8 0
      docs/src/searx.engines.peertube.rst
  2. 30 0
      searx/data/engine_traits.json
  3. 45 65
      searx/engines/sepiasearch.py

+ 8 - 0
docs/src/searx.engines.peertube.rst

@@ -17,3 +17,11 @@ Peertube Video
 
 .. automodule:: searx.engines.peertube
   :members:
+
+.. _sepiasearch engine:
+
+SepiaSearch
+===========
+
+.. automodule:: searx.engines.sepiasearch
+  :members:

+ 30 - 0
searx/data/engine_traits.json

@@ -3301,6 +3301,36 @@
     },
     "supported_languages": {}
   },
+  "sepiasearch": {
+    "all_locale": null,
+    "custom": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ca": "ca",
+      "cs": "cs",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "eo": "eo",
+      "es": "es",
+      "eu": "eu",
+      "fi": "fi",
+      "fr": "fr",
+      "gd": "gd",
+      "it": "it",
+      "ja": "ja",
+      "nl": "nl",
+      "pl": "pl",
+      "pt": "pt",
+      "ru": "ru",
+      "sv": "sv",
+      "zh": "zh",
+      "zh_Hans": "zh",
+      "zh_Hant": "zh"
+    },
+    "regions": {},
+    "supported_languages": {}
+  },
   "startpage": {
     "all_locale": null,
     "custom": {},

+ 45 - 65
searx/engines/sepiasearch.py

@@ -1,70 +1,80 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""SepiaSearch uses the same languages as :py:obj:`Peertube
+<searx.engines.peertube>` and the response is identical to the response from the
+peertube engines.
+
 """
- SepiaSearch (Videos)
-"""
 
-from json import loads
-from dateutil import parser, relativedelta
+from typing import TYPE_CHECKING
+
 from urllib.parse import urlencode
 from datetime import datetime
 
-# about
+from searx.engines.peertube import fetch_traits  # pylint: disable=unused-import
+from searx.engines.peertube import (
+    # pylint: disable=unused-import
+    video_response,
+    safesearch_table,
+    time_range_table,
+)
+from searx.enginelib.traits import EngineTraits
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 about = {
+    # pylint: disable=line-too-long
     "website": 'https://sepiasearch.org',
     "wikidata_id": None,
-    "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api",  # NOQA
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
     "use_official_api": True,
     "require_api_key": False,
     "results": 'JSON',
 }
 
+# engine dependent config
 categories = ['videos']
 paging = True
+
+base_url = 'https://sepiasearch.org'
+
 time_range_support = True
 safesearch = True
-supported_languages = [
-    # fmt: off
-    'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el',
-    'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt',
-    'sv', 'pl', 'fi', 'ru'
-    # fmt: on
-]
-base_url = 'https://sepiasearch.org/api/v1/search/videos'
-
-safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
-
-time_range_table = {
-    'day': relativedelta.relativedelta(),
-    'week': relativedelta.relativedelta(weeks=-1),
-    'month': relativedelta.relativedelta(months=-1),
-    'year': relativedelta.relativedelta(years=-1),
-}
 
 
-def minute_to_hm(minute):
-    if isinstance(minute, int):
-        return "%d:%02d" % (divmod(minute, 60))
-    return None
+def request(query, params):
+    """Assemble request for the SepiaSearch API"""
+
+    if not query:
+        return False
 
+    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], None)
 
-def request(query, params):
     params['url'] = (
-        base_url
-        + '?'
+        base_url.rstrip("/")
+        + "/api/v1/search/videos?"
         + urlencode(
             {
                 'search': query,
                 'start': (params['pageno'] - 1) * 10,
                 'count': 10,
-                'sort': '-match',
+                # -createdAt: sort by date ascending / createdAt: date descending
+                'sort': '-match',  # sort by *match descending*
                 'nsfw': safesearch_table[params['safesearch']],
             }
         )
     )
 
-    language = params['language'].split('-')[0]
-    if language in supported_languages:
-        params['url'] += '&languageOneOf[]=' + language
+    if eng_lang is not None:
+        params['url'] += '&languageOneOf[]=' + eng_lang
+        params['url'] += '&boostLanguages[]=' + eng_lang
+
     if params['time_range'] in time_range_table:
         time = datetime.now().date() + time_range_table[params['time_range']]
         params['url'] += '&startDate=' + time.isoformat()
@@ -73,34 +83,4 @@ def request(query, params):
 
 
 def response(resp):
-    results = []
-
-    search_results = loads(resp.text)
-
-    if 'data' not in search_results:
-        return []
-
-    for result in search_results['data']:
-        title = result['name']
-        content = result['description']
-        thumbnail = result['thumbnailUrl']
-        publishedDate = parser.parse(result['publishedAt'])
-        author = result.get('account', {}).get('displayName')
-        length = minute_to_hm(result.get('duration'))
-        url = result['url']
-
-        results.append(
-            {
-                'url': url,
-                'title': title,
-                'content': content,
-                'author': author,
-                'length': length,
-                'template': 'videos.html',
-                'publishedDate': publishedDate,
-                'iframe_src': result.get('embedUrl'),
-                'thumbnail': thumbnail,
-            }
-        )
-
-    return results
+    return video_response(resp)