Browse Source

[feat] duckduckgo: support for videos and news

Bnyro 1 year ago
parent
commit
48cb58bd2e

+ 1 - 1
docs/dev/engines/online/duckduckgo.rst

@@ -12,7 +12,7 @@ DuckDuckGo Engines
 .. automodule:: searx.engines.duckduckgo
    :members:
 
-.. automodule:: searx.engines.duckduckgo_images
+.. automodule:: searx.engines.duckduckgo_extra
    :members:
 
 .. automodule:: searx.engines.duckduckgo_definitions

+ 328 - 0
searx/data/engine_traits.json

@@ -2390,6 +2390,334 @@
       "zh-TW": "tw-tzh"
     }
   },
+  "duckduckgo videos": {
+    "all_locale": "wt-wt",
+    "custom": {
+      "lang_region": {
+        "ar-DZ": "ar_DZ",
+        "ar-JO": "ar_JO",
+        "ar-SA": "ar_SA",
+        "bn-IN": "bn_IN",
+        "de-CH": "de_CH",
+        "en-AU": "en_AU",
+        "en-CA": "en_CA",
+        "en-GB": "en_GB",
+        "es-AR": "es_AR",
+        "es-CL": "es_CL",
+        "es-CO": "es_CO",
+        "es-CR": "es_CR",
+        "es-EC": "es_EC",
+        "es-MX": "es_MX",
+        "es-PE": "es_PE",
+        "es-UY": "es_UY",
+        "es-VE": "es_VE",
+        "fr-BE": "fr_BE",
+        "fr-CA": "fr_CA",
+        "fr-CH": "fr_CH",
+        "nl-BE": "nl_BE",
+        "pt-BR": "pt_BR"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af_ZA",
+      "ar": "ar_EG",
+      "ast": "ast_ES",
+      "az_Latn": "az_AZ",
+      "be": "be_BY",
+      "bg": "bg_BG",
+      "bn": "bn_BD",
+      "br": "br_FR",
+      "bs_Latn": "bs_BA",
+      "ca": "ca_ES",
+      "cs": "cs_CZ",
+      "cy": "cy_GB",
+      "da": "da_DK",
+      "de": "de_DE",
+      "el": "el_GR",
+      "en": "en_US",
+      "eo": "eo_XX",
+      "es": "es_ES",
+      "et": "et_EE",
+      "eu": "eu_ES",
+      "fa": "fa_IR",
+      "fi": "fi_FI",
+      "fil": "tl_PH",
+      "fr": "fr_FR",
+      "ga": "ga_IE",
+      "gd": "gd_GB",
+      "gl": "gl_ES",
+      "he": "he_IL",
+      "hi": "hi_IN",
+      "hr": "hr_HR",
+      "hu": "hu_HU",
+      "hy": "hy_AM",
+      "id": "id_ID",
+      "is": "is_IS",
+      "it": "it_IT",
+      "ja": "ja_JP",
+      "kab": "kab_DZ",
+      "kn": "kn_IN",
+      "ko": "ko_KR",
+      "ku": "ku",
+      "kw": "kw_GB",
+      "lt": "lt_LT",
+      "lv": "lv_LV",
+      "ml": "ml_IN",
+      "mr": "mr_IN",
+      "ms": "ms_MY",
+      "nb": "nb_NO",
+      "nl": "nl_NL",
+      "nn": "nn_NO",
+      "pl": "pl_PL",
+      "pt": "pt_PT",
+      "ro": "ro_RO",
+      "ru": "ru_RU",
+      "sc": "sc_IT",
+      "si": "si_LK",
+      "sk": "sk_SK",
+      "sl": "sl_SI",
+      "sq": "sq_AL",
+      "sr_Cyrl": "sr_RS",
+      "sv": "sv_SE",
+      "ta": "ta_IN",
+      "te": "te_IN",
+      "th": "th_TH",
+      "tr": "tr_TR",
+      "uk": "uk_UA",
+      "ur": "ur_PK",
+      "vi": "vi_VN",
+      "zh_Hans": "zh_CN",
+      "zh_Hant": "zh_TW"
+    },
+    "regions": {
+      "ar-SA": "xa-ar",
+      "bg-BG": "bg-bg",
+      "ca-ES": "es-ca",
+      "cs-CZ": "cz-cs",
+      "da-DK": "dk-da",
+      "de-AT": "at-de",
+      "de-CH": "ch-de",
+      "de-DE": "de-de",
+      "el-GR": "gr-el",
+      "en-AU": "au-en",
+      "en-CA": "ca-en",
+      "en-GB": "uk-en",
+      "en-IE": "ie-en",
+      "en-IL": "il-en",
+      "en-IN": "in-en",
+      "en-MY": "my-en",
+      "en-NZ": "nz-en",
+      "en-PH": "ph-en",
+      "en-PK": "pk-en",
+      "en-SG": "sg-en",
+      "en-US": "us-en",
+      "en-ZA": "za-en",
+      "es-AR": "ar-es",
+      "es-CL": "cl-es",
+      "es-CO": "co-es",
+      "es-ES": "es-es",
+      "es-MX": "mx-es",
+      "es-PE": "pe-es",
+      "es-US": "us-es",
+      "et-EE": "ee-et",
+      "fi-FI": "fi-fi",
+      "fr-BE": "be-fr",
+      "fr-CA": "ca-fr",
+      "fr-CH": "ch-fr",
+      "fr-FR": "fr-fr",
+      "hr-HR": "hr-hr",
+      "hu-HU": "hu-hu",
+      "id-ID": "id-en",
+      "it-IT": "it-it",
+      "ja-JP": "jp-jp",
+      "ko-KR": "kr-kr",
+      "lt-LT": "lt-lt",
+      "lv-LV": "lv-lv",
+      "nb-NO": "no-no",
+      "nl-BE": "be-nl",
+      "nl-NL": "nl-nl",
+      "pl-PL": "pl-pl",
+      "pt-BR": "br-pt",
+      "pt-PT": "pt-pt",
+      "ro-RO": "ro-ro",
+      "ru-RU": "ru-ru",
+      "sk-SK": "sk-sk",
+      "sl-SI": "sl-sl",
+      "sv-SE": "se-sv",
+      "th-TH": "th-en",
+      "tr-TR": "tr-tr",
+      "uk-UA": "ua-uk",
+      "vi-VN": "vn-en",
+      "zh-CN": "cn-zh",
+      "zh-HK": "hk-tzh",
+      "zh-TW": "tw-tzh"
+    }
+  },
+  "duckduckgo news": {
+    "all_locale": "wt-wt",
+    "custom": {
+      "lang_region": {
+        "ar-DZ": "ar_DZ",
+        "ar-JO": "ar_JO",
+        "ar-SA": "ar_SA",
+        "bn-IN": "bn_IN",
+        "de-CH": "de_CH",
+        "en-AU": "en_AU",
+        "en-CA": "en_CA",
+        "en-GB": "en_GB",
+        "es-AR": "es_AR",
+        "es-CL": "es_CL",
+        "es-CO": "es_CO",
+        "es-CR": "es_CR",
+        "es-EC": "es_EC",
+        "es-MX": "es_MX",
+        "es-PE": "es_PE",
+        "es-UY": "es_UY",
+        "es-VE": "es_VE",
+        "fr-BE": "fr_BE",
+        "fr-CA": "fr_CA",
+        "fr-CH": "fr_CH",
+        "nl-BE": "nl_BE",
+        "pt-BR": "pt_BR"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af_ZA",
+      "ar": "ar_EG",
+      "ast": "ast_ES",
+      "az_Latn": "az_AZ",
+      "be": "be_BY",
+      "bg": "bg_BG",
+      "bn": "bn_BD",
+      "br": "br_FR",
+      "bs_Latn": "bs_BA",
+      "ca": "ca_ES",
+      "cs": "cs_CZ",
+      "cy": "cy_GB",
+      "da": "da_DK",
+      "de": "de_DE",
+      "el": "el_GR",
+      "en": "en_US",
+      "eo": "eo_XX",
+      "es": "es_ES",
+      "et": "et_EE",
+      "eu": "eu_ES",
+      "fa": "fa_IR",
+      "fi": "fi_FI",
+      "fil": "tl_PH",
+      "fr": "fr_FR",
+      "ga": "ga_IE",
+      "gd": "gd_GB",
+      "gl": "gl_ES",
+      "he": "he_IL",
+      "hi": "hi_IN",
+      "hr": "hr_HR",
+      "hu": "hu_HU",
+      "hy": "hy_AM",
+      "id": "id_ID",
+      "is": "is_IS",
+      "it": "it_IT",
+      "ja": "ja_JP",
+      "kab": "kab_DZ",
+      "kn": "kn_IN",
+      "ko": "ko_KR",
+      "ku": "ku",
+      "kw": "kw_GB",
+      "lt": "lt_LT",
+      "lv": "lv_LV",
+      "ml": "ml_IN",
+      "mr": "mr_IN",
+      "ms": "ms_MY",
+      "nb": "nb_NO",
+      "nl": "nl_NL",
+      "nn": "nn_NO",
+      "pl": "pl_PL",
+      "pt": "pt_PT",
+      "ro": "ro_RO",
+      "ru": "ru_RU",
+      "sc": "sc_IT",
+      "si": "si_LK",
+      "sk": "sk_SK",
+      "sl": "sl_SI",
+      "sq": "sq_AL",
+      "sr_Cyrl": "sr_RS",
+      "sv": "sv_SE",
+      "ta": "ta_IN",
+      "te": "te_IN",
+      "th": "th_TH",
+      "tr": "tr_TR",
+      "uk": "uk_UA",
+      "ur": "ur_PK",
+      "vi": "vi_VN",
+      "zh_Hans": "zh_CN",
+      "zh_Hant": "zh_TW"
+    },
+    "regions": {
+      "ar-SA": "xa-ar",
+      "bg-BG": "bg-bg",
+      "ca-ES": "es-ca",
+      "cs-CZ": "cz-cs",
+      "da-DK": "dk-da",
+      "de-AT": "at-de",
+      "de-CH": "ch-de",
+      "de-DE": "de-de",
+      "el-GR": "gr-el",
+      "en-AU": "au-en",
+      "en-CA": "ca-en",
+      "en-GB": "uk-en",
+      "en-IE": "ie-en",
+      "en-IL": "il-en",
+      "en-IN": "in-en",
+      "en-MY": "my-en",
+      "en-NZ": "nz-en",
+      "en-PH": "ph-en",
+      "en-PK": "pk-en",
+      "en-SG": "sg-en",
+      "en-US": "us-en",
+      "en-ZA": "za-en",
+      "es-AR": "ar-es",
+      "es-CL": "cl-es",
+      "es-CO": "co-es",
+      "es-ES": "es-es",
+      "es-MX": "mx-es",
+      "es-PE": "pe-es",
+      "es-US": "us-es",
+      "et-EE": "ee-et",
+      "fi-FI": "fi-fi",
+      "fr-BE": "be-fr",
+      "fr-CA": "ca-fr",
+      "fr-CH": "ch-fr",
+      "fr-FR": "fr-fr",
+      "hr-HR": "hr-hr",
+      "hu-HU": "hu-hu",
+      "id-ID": "id-en",
+      "it-IT": "it-it",
+      "ja-JP": "jp-jp",
+      "ko-KR": "kr-kr",
+      "lt-LT": "lt-lt",
+      "lv-LV": "lv-lv",
+      "nb-NO": "no-no",
+      "nl-BE": "be-nl",
+      "nl-NL": "nl-nl",
+      "pl-PL": "pl-pl",
+      "pt-BR": "br-pt",
+      "pt-PT": "pt-pt",
+      "ro-RO": "ro-ro",
+      "ru-RU": "ru-ru",
+      "sk-SK": "sk-sk",
+      "sl-SI": "sl-sl",
+      "sv-SE": "se-sv",
+      "th-TH": "th-en",
+      "tr-TR": "tr-tr",
+      "uk-UA": "ua-uk",
+      "vi-VN": "vn-en",
+      "zh-CN": "cn-zh",
+      "zh-HK": "hk-tzh",
+      "zh-TW": "tw-tzh"
+    }
+  },
   "duckduckgo weather": {
     "all_locale": "wt-wt",
     "custom": {

+ 4 - 2
searx/engines/duckduckgo.py

@@ -66,8 +66,10 @@ def cache_vqd(query, value):
     The vqd value depends on the query string and is needed for the follow up
     pages or the images loaded by a XMLHttpRequest:
 
-    - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
-    - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
+    - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
+    - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
+    - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
+    - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
 
     """
     c = redisdb.client()

+ 55 - 16
searx/engines/duckduckgo_images.py → searx/engines/duckduckgo_extra.py

@@ -1,9 +1,10 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
-DuckDuckGo Images
-~~~~~~~~~~~~~~~~~
+DuckDuckGo Extra (images, videos, news)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
 
+from datetime import datetime
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode
 
@@ -32,6 +33,9 @@ about = {
 
 # engine dependent config
 categories = ['images', 'web']
+ddg_category = 'images'
+"""The category must be any of ``images``, ``videos`` and ``news``
+"""
 paging = True
 safesearch = True
 send_accept_language_header = True
@@ -39,6 +43,8 @@ send_accept_language_header = True
 safesearch_cookies = {0: '-2', 1: None, 2: '1'}
 safesearch_args = {0: '1', 1: None, 2: '1'}
 
+search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
+
 
 def request(query, params):
 
@@ -69,28 +75,61 @@ def request(query, params):
         args['p'] = safe_search  # "-1", "1"
 
     logger.debug("cookies: %s", params['cookies'])
-    args = urlencode(args)
-    params['url'] = 'https://duckduckgo.com/i.js?{args}'.format(args=args)
+
+    params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
 
     return params
 
 
+def _image_result(result):
+    return {
+        'template': 'images.html',
+        'url': result['url'],
+        'title': result['title'],
+        'content': '',
+        'thumbnail_src': result['thumbnail'],
+        'img_src': result['image'],
+        'img_format': '%s x %s' % (result['width'], result['height']),
+        'source': result['source'],
+    }
+
+
+def _video_result(result):
+    return {
+        'template': 'videos.html',
+        'url': result['content'],
+        'title': result['title'],
+        'content': result['description'],
+        'thumbnail': result['images'].get('small') or result['images'].get('medium'),
+        'iframe_src': result['embed_url'],
+        'source': result['provider'],
+        'length': result['duration'],
+        'metadata': result.get('uploader'),
+    }
+
+
+def _news_result(result):
+    return {
+        'url': result['url'],
+        'title': result['title'],
+        'content': result['excerpt'],
+        'source': result['source'],
+        'publishedDate': datetime.utcfromtimestamp(result['date']),
+    }
+
+
 def response(resp):
     results = []
     res_json = resp.json()
 
     for result in res_json['results']:
-        results.append(
-            {
-                'template': 'images.html',
-                'title': result['title'],
-                'content': '',
-                'thumbnail_src': result['thumbnail'],
-                'img_src': result['image'],
-                'url': result['url'],
-                'img_format': '%s x %s' % (result['width'], result['height']),
-                'source': result['source'],
-            }
-        )
+        if ddg_category == 'images':
+            results.append(_image_result(result))
+        elif ddg_category == 'videos':
+            results.append(_video_result(result))
+        elif ddg_category == 'news':
+            results.append(_news_result(result))
+        else:
+            raise ValueError(f"Invalid duckduckgo category: {ddg_category}")
 
     return results

+ 17 - 2
searx/settings.yml

@@ -603,9 +603,24 @@ engines:
     shortcut: ddg
 
   - name: duckduckgo images
-    engine: duckduckgo_images
+    engine: duckduckgo_extra
+    categories: [images, web]
+    ddg_category: images
     shortcut: ddi
-    timeout: 3.0
+    disabled: true
+
+  - name: duckduckgo videos
+    engine: duckduckgo_extra
+    categories: [videos, web]
+    ddg_category: videos
+    shortcut: ddv
+    disabled: true
+
+  - name: duckduckgo news
+    engine: duckduckgo_extra
+    categories: [news, web]
+    ddg_category: news
+    shortcut: ddn
     disabled: true
 
   - name: duckduckgo weather