Browse Source

[mod] Dailymotion: improved request API & upgrade to data_type: traits_v1

- fetch_traits(): fetch locales (and languages) from dailymotion API
- removed obsolete data-type "supported_languages"
- add documentation
- improved argument list of the HTTP request:
  - add argument: family_filter_map
  - add conditional argument: localization
    Don't add localization and country arguments if the user does select a
    language (:de, :en, ..)
- improve code quality (mainly improve readability)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 2 years ago
parent
commit
8a8c584fec
3 changed files with 126 additions and 108 deletions
  1. 8 0
      docs/src/searx.engine.dailymotion.rst
  2. 24 54
      searx/data/engine_traits.json
  3. 94 54
      searx/engines/dailymotion.py

+ 8 - 0
docs/src/searx.engine.dailymotion.rst

@@ -0,0 +1,8 @@
+.. _dailymotion engine:
+
+===========
+Dailymotion
+===========
+
+.. automodule:: searx.engines.dailymotion
+  :members:

+ 24 - 54
searx/data/engine_traits.json

@@ -366,8 +366,29 @@
   "dailymotion": {
     "all_locale": null,
     "custom": {},
-    "data_type": "supported_languages",
-    "languages": {},
+    "data_type": "traits_v1",
+    "languages": {
+      "ar": "ar",
+      "de": "de",
+      "el": "el",
+      "en": "en",
+      "es": "es",
+      "fr": "fr",
+      "id": "id",
+      "it": "it",
+      "ja": "ja",
+      "ko": "ko",
+      "ms": "ms",
+      "nl": "nl",
+      "pl": "pl",
+      "pt": "pt",
+      "ro": "ro",
+      "ru": "ru",
+      "th": "th",
+      "tr": "tr",
+      "vi": "vi",
+      "zh": "zh"
+    },
     "regions": {
       "ar-AE": "ar_AE",
       "ar-EG": "ar_EG",
@@ -418,58 +439,7 @@
       "zh-CN": "zh_CN",
       "zh-TW": "zh_TW"
     },
-    "supported_languages": [
-      "ar_AA",
-      "ar_AE",
-      "ar_EG",
-      "ar_SA",
-      "de_AT",
-      "de_CH",
-      "de_DE",
-      "el_GR",
-      "en_AU",
-      "en_CA",
-      "en_EN",
-      "en_GB",
-      "en_HK",
-      "en_IE",
-      "en_IN",
-      "en_NG",
-      "en_PH",
-      "en_PK",
-      "en_SG",
-      "en_US",
-      "en_ZA",
-      "es_AR",
-      "es_ES",
-      "es_MX",
-      "fr_BE",
-      "fr_CA",
-      "fr_CH",
-      "fr_CI",
-      "fr_FR",
-      "fr_MA",
-      "fr_SN",
-      "fr_TN",
-      "id_ID",
-      "it_CH",
-      "it_IT",
-      "ja_JP",
-      "ko_KR",
-      "ms_MY",
-      "nl_BE",
-      "nl_NL",
-      "pl_PL",
-      "pt_BR",
-      "pt_PT",
-      "ro_RO",
-      "ru_RU",
-      "th_TH",
-      "tr_TR",
-      "vi_VN",
-      "zh_CN",
-      "zh_TW"
-    ]
+    "supported_languages": {}
   },
   "duckduckgo": {
     "all_locale": "wt-wt",

+ 94 - 54
searx/engines/dailymotion.py

@@ -1,9 +1,18 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Dailymotion (Videos)
+# lint: pylint
+"""
+Dailymotion (Videos)
+~~~~~~~~~~~~~~~~~~~~
+
+.. _REST GET: https://developers.dailymotion.com/tools/
+.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
+.. _Video filters API: https://developers.dailymotion.com/api/#video-filters
+.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
 
 """
 
-from typing import Set
+from typing import TYPE_CHECKING
+
 from datetime import datetime, timedelta
 from urllib.parse import urlencode
 import time
@@ -12,8 +21,16 @@ import babel
 from searx.exceptions import SearxEngineAPIException
 from searx import network
 from searx.utils import html_to_text
+from searx.locales import region_tag, language_tag
 from searx.enginelib.traits import EngineTraits
 
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+traits: EngineTraits
+
 # about
 about = {
     "website": 'https://www.dailymotion.com',
@@ -38,11 +55,24 @@ time_delta_dict = {
 }
 
 safesearch = True
-safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''}
+safesearch_params = {
+    2: {'is_created_for_kids': 'true'},
+    1: {'is_created_for_kids': 'true'},
+    0: {},
+}
+"""True if this video is "Created for Kids" / intends to target an audience
+under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
+"""
 
-# search-url
-# - https://developers.dailymotion.com/tools/
-# - https://www.dailymotion.com/doc/api/obj-video.html
+family_filter_map = {
+    2: 'true',
+    1: 'true',
+    0: 'false',
+}
+"""By default, the family filter is turned on. Setting this parameter to
+``false`` will stop filtering-out explicit content from searches and global
+contexts (``family_filter`` in `Global API Parameters`_ ).
+"""
 
 result_fields = [
     'allow_embed',
@@ -54,27 +84,21 @@ result_fields = [
     'thumbnail_360_url',
     'id',
 ]
-search_url = (
-    'https://api.dailymotion.com/videos?'
-    'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
-).format(
-    fields=','.join(result_fields),
-    password_protected='false',
-    private='false',
-    sort='relevance',
-    limit=number_of_results,
-)
-iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+"""`Fields selection`_, by default, a few fields are returned. To request more
+specific fields, the ``fields`` parameter is used with the list of fields
+SearXNG needs in the response to build a video result list.
+"""
 
-# The request query filters by 'languages' & 'country', therefore instead of
-# fetching only languages we need to fetch locales.
-supported_languages_url = 'https://api.dailymotion.com/locales'
-supported_languages_iso639: Set[str] = set()
+search_url = 'https://api.dailymotion.com/videos?'
+"""URL to retrieve a list of videos.
 
+- `REST GET`_
+- `Global API Parameters`_
+- `Video filters API`_
+"""
 
-def init(_engine_settings):
-    global supported_languages_iso639
-    supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages])
+iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
+"""URL template to embed video in SearXNG's result list."""
 
 
 def request(query, params):
@@ -82,34 +106,42 @@ def request(query, params):
     if not query:
         return False
 
-    language = params['language']
-    if language == 'all':
-        language = 'en-US'
-    locale = babel.Locale.parse(language, sep='-')
-
-    language_iso639 = locale.language
-    if locale.language not in supported_languages_iso639:
-        language_iso639 = 'en'
+    eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+    eng_lang = traits.get_language(params['searxng_locale'], 'en')
 
-    query_args = {
+    args = {
         'search': query,
-        'languages': language_iso639,
+        'family_filter': family_filter_map.get(params['safesearch'], 'false'),
+        'thumbnail_ratio': 'original',  # original|widescreen|square
+        # https://developers.dailymotion.com/api/#video-filters
+        'languages': eng_lang,
         'page': params['pageno'],
+        'password_protected': 'false',
+        'private': 'false',
+        'sort': 'relevance',
+        'limit': number_of_results,
+        'fields': ','.join(result_fields),
     }
 
-    if locale.territory:
-        localization = locale.language + '_' + locale.territory
-        if localization in supported_languages:
-            query_args['country'] = locale.territory
+    args.update(safesearch_params.get(params['safesearch'], {}))
+
+    # Don't add localization and country arguments if the user does select a
+    # language (:de, :en, ..)
+
+    if len(params['searxng_locale'].split('-')) > 1:
+        # https://developers.dailymotion.com/api/#global-parameters
+        args['localization'] = eng_region
+        args['country'] = eng_region.split('_')[1]
+        # Insufficient rights for the `ams_country' parameter of route `GET /videos'
+        # 'ams_country': eng_region.split('_')[1],
 
     time_delta = time_delta_dict.get(params["time_range"])
     if time_delta:
         created_after = datetime.now() - time_delta
-        query_args['created_after'] = datetime.timestamp(created_after)
+        args['created_after'] = datetime.timestamp(created_after)
 
-    query_str = urlencode(query_args)
-    params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '')
-    params['raise_for_httperror'] = False
+    query_str = urlencode(args)
+    params['url'] = search_url + query_str
 
     return params
 
@@ -168,31 +200,27 @@ def response(resp):
     return results
 
 
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    response_json = resp.json()
-    return [item['locale'] for item in response_json['list']]
-
-
 def fetch_traits(engine_traits: EngineTraits):
-    """Fetch regions from dailymotion.
+    """Fetch locales & languages from dailymotion.
 
+    Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
     There are duplications in the locale codes returned from Dailymotion which
     can be ignored::
 
       en_EN --> en_GB, en_US
       ar_AA --> ar_EG, ar_AE, ar_SA
 
-    """
-    # pylint: disable=import-outside-toplevel
+    The language list `api/languages <https://api.dailymotion.com/languages>`_
+    contains over 7000 *languages* codes (see PR1071_).  We use only those
+    language codes that are used in the locales.
 
-    engine_traits.data_type = 'supported_languages'  # deprecated
+    .. _PR1071: https://github.com/searxng/searxng/pull/1071
 
-    from searx.locales import region_tag
+    """
 
     resp = network.get('https://api.dailymotion.com/locales')
     if not resp.ok:
-        print("ERROR: response from peertube is not OK.")
+        print("ERROR: response from dailymotion/locales is not OK.")
 
     for item in resp.json()['list']:
         eng_tag = item['locale']
@@ -210,3 +238,15 @@ def fetch_traits(engine_traits: EngineTraits):
                 print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
             continue
         engine_traits.regions[sxng_tag] = eng_tag
+
+    locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
+
+    resp = network.get('https://api.dailymotion.com/languages')
+    if not resp.ok:
+        print("ERROR: response from dailymotion/languages is not OK.")
+
+    for item in resp.json()['list']:
+        eng_tag = item['code']
+        if eng_tag in locale_lang_list:
+            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
+            engine_traits.languages[sxng_tag] = eng_tag