Browse Source

[feat] duckduckgo: support for videos and news

Bnyro 1 year ago
parent
commit
48cb58bd2e

+ 1 - 1
docs/dev/engines/online/duckduckgo.rst

@@ -12,7 +12,7 @@ DuckDuckGo Engines
 .. automodule:: searx.engines.duckduckgo
 .. automodule:: searx.engines.duckduckgo
    :members:
    :members:
 
 
-.. automodule:: searx.engines.duckduckgo_images
+.. automodule:: searx.engines.duckduckgo_extra
    :members:
    :members:
 
 
 .. automodule:: searx.engines.duckduckgo_definitions
 .. automodule:: searx.engines.duckduckgo_definitions

+ 328 - 0
searx/data/engine_traits.json

@@ -2390,6 +2390,334 @@
       "zh-TW": "tw-tzh"
       "zh-TW": "tw-tzh"
     }
     }
   },
   },
+  "duckduckgo videos": {
+    "all_locale": "wt-wt",
+    "custom": {
+      "lang_region": {
+        "ar-DZ": "ar_DZ",
+        "ar-JO": "ar_JO",
+        "ar-SA": "ar_SA",
+        "bn-IN": "bn_IN",
+        "de-CH": "de_CH",
+        "en-AU": "en_AU",
+        "en-CA": "en_CA",
+        "en-GB": "en_GB",
+        "es-AR": "es_AR",
+        "es-CL": "es_CL",
+        "es-CO": "es_CO",
+        "es-CR": "es_CR",
+        "es-EC": "es_EC",
+        "es-MX": "es_MX",
+        "es-PE": "es_PE",
+        "es-UY": "es_UY",
+        "es-VE": "es_VE",
+        "fr-BE": "fr_BE",
+        "fr-CA": "fr_CA",
+        "fr-CH": "fr_CH",
+        "nl-BE": "nl_BE",
+        "pt-BR": "pt_BR"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af_ZA",
+      "ar": "ar_EG",
+      "ast": "ast_ES",
+      "az_Latn": "az_AZ",
+      "be": "be_BY",
+      "bg": "bg_BG",
+      "bn": "bn_BD",
+      "br": "br_FR",
+      "bs_Latn": "bs_BA",
+      "ca": "ca_ES",
+      "cs": "cs_CZ",
+      "cy": "cy_GB",
+      "da": "da_DK",
+      "de": "de_DE",
+      "el": "el_GR",
+      "en": "en_US",
+      "eo": "eo_XX",
+      "es": "es_ES",
+      "et": "et_EE",
+      "eu": "eu_ES",
+      "fa": "fa_IR",
+      "fi": "fi_FI",
+      "fil": "tl_PH",
+      "fr": "fr_FR",
+      "ga": "ga_IE",
+      "gd": "gd_GB",
+      "gl": "gl_ES",
+      "he": "he_IL",
+      "hi": "hi_IN",
+      "hr": "hr_HR",
+      "hu": "hu_HU",
+      "hy": "hy_AM",
+      "id": "id_ID",
+      "is": "is_IS",
+      "it": "it_IT",
+      "ja": "ja_JP",
+      "kab": "kab_DZ",
+      "kn": "kn_IN",
+      "ko": "ko_KR",
+      "ku": "ku",
+      "kw": "kw_GB",
+      "lt": "lt_LT",
+      "lv": "lv_LV",
+      "ml": "ml_IN",
+      "mr": "mr_IN",
+      "ms": "ms_MY",
+      "nb": "nb_NO",
+      "nl": "nl_NL",
+      "nn": "nn_NO",
+      "pl": "pl_PL",
+      "pt": "pt_PT",
+      "ro": "ro_RO",
+      "ru": "ru_RU",
+      "sc": "sc_IT",
+      "si": "si_LK",
+      "sk": "sk_SK",
+      "sl": "sl_SI",
+      "sq": "sq_AL",
+      "sr_Cyrl": "sr_RS",
+      "sv": "sv_SE",
+      "ta": "ta_IN",
+      "te": "te_IN",
+      "th": "th_TH",
+      "tr": "tr_TR",
+      "uk": "uk_UA",
+      "ur": "ur_PK",
+      "vi": "vi_VN",
+      "zh_Hans": "zh_CN",
+      "zh_Hant": "zh_TW"
+    },
+    "regions": {
+      "ar-SA": "xa-ar",
+      "bg-BG": "bg-bg",
+      "ca-ES": "es-ca",
+      "cs-CZ": "cz-cs",
+      "da-DK": "dk-da",
+      "de-AT": "at-de",
+      "de-CH": "ch-de",
+      "de-DE": "de-de",
+      "el-GR": "gr-el",
+      "en-AU": "au-en",
+      "en-CA": "ca-en",
+      "en-GB": "uk-en",
+      "en-IE": "ie-en",
+      "en-IL": "il-en",
+      "en-IN": "in-en",
+      "en-MY": "my-en",
+      "en-NZ": "nz-en",
+      "en-PH": "ph-en",
+      "en-PK": "pk-en",
+      "en-SG": "sg-en",
+      "en-US": "us-en",
+      "en-ZA": "za-en",
+      "es-AR": "ar-es",
+      "es-CL": "cl-es",
+      "es-CO": "co-es",
+      "es-ES": "es-es",
+      "es-MX": "mx-es",
+      "es-PE": "pe-es",
+      "es-US": "us-es",
+      "et-EE": "ee-et",
+      "fi-FI": "fi-fi",
+      "fr-BE": "be-fr",
+      "fr-CA": "ca-fr",
+      "fr-CH": "ch-fr",
+      "fr-FR": "fr-fr",
+      "hr-HR": "hr-hr",
+      "hu-HU": "hu-hu",
+      "id-ID": "id-en",
+      "it-IT": "it-it",
+      "ja-JP": "jp-jp",
+      "ko-KR": "kr-kr",
+      "lt-LT": "lt-lt",
+      "lv-LV": "lv-lv",
+      "nb-NO": "no-no",
+      "nl-BE": "be-nl",
+      "nl-NL": "nl-nl",
+      "pl-PL": "pl-pl",
+      "pt-BR": "br-pt",
+      "pt-PT": "pt-pt",
+      "ro-RO": "ro-ro",
+      "ru-RU": "ru-ru",
+      "sk-SK": "sk-sk",
+      "sl-SI": "sl-sl",
+      "sv-SE": "se-sv",
+      "th-TH": "th-en",
+      "tr-TR": "tr-tr",
+      "uk-UA": "ua-uk",
+      "vi-VN": "vn-en",
+      "zh-CN": "cn-zh",
+      "zh-HK": "hk-tzh",
+      "zh-TW": "tw-tzh"
+    }
+  },
+  "duckduckgo news": {
+    "all_locale": "wt-wt",
+    "custom": {
+      "lang_region": {
+        "ar-DZ": "ar_DZ",
+        "ar-JO": "ar_JO",
+        "ar-SA": "ar_SA",
+        "bn-IN": "bn_IN",
+        "de-CH": "de_CH",
+        "en-AU": "en_AU",
+        "en-CA": "en_CA",
+        "en-GB": "en_GB",
+        "es-AR": "es_AR",
+        "es-CL": "es_CL",
+        "es-CO": "es_CO",
+        "es-CR": "es_CR",
+        "es-EC": "es_EC",
+        "es-MX": "es_MX",
+        "es-PE": "es_PE",
+        "es-UY": "es_UY",
+        "es-VE": "es_VE",
+        "fr-BE": "fr_BE",
+        "fr-CA": "fr_CA",
+        "fr-CH": "fr_CH",
+        "nl-BE": "nl_BE",
+        "pt-BR": "pt_BR"
+      }
+    },
+    "data_type": "traits_v1",
+    "languages": {
+      "af": "af_ZA",
+      "ar": "ar_EG",
+      "ast": "ast_ES",
+      "az_Latn": "az_AZ",
+      "be": "be_BY",
+      "bg": "bg_BG",
+      "bn": "bn_BD",
+      "br": "br_FR",
+      "bs_Latn": "bs_BA",
+      "ca": "ca_ES",
+      "cs": "cs_CZ",
+      "cy": "cy_GB",
+      "da": "da_DK",
+      "de": "de_DE",
+      "el": "el_GR",
+      "en": "en_US",
+      "eo": "eo_XX",
+      "es": "es_ES",
+      "et": "et_EE",
+      "eu": "eu_ES",
+      "fa": "fa_IR",
+      "fi": "fi_FI",
+      "fil": "tl_PH",
+      "fr": "fr_FR",
+      "ga": "ga_IE",
+      "gd": "gd_GB",
+      "gl": "gl_ES",
+      "he": "he_IL",
+      "hi": "hi_IN",
+      "hr": "hr_HR",
+      "hu": "hu_HU",
+      "hy": "hy_AM",
+      "id": "id_ID",
+      "is": "is_IS",
+      "it": "it_IT",
+      "ja": "ja_JP",
+      "kab": "kab_DZ",
+      "kn": "kn_IN",
+      "ko": "ko_KR",
+      "ku": "ku",
+      "kw": "kw_GB",
+      "lt": "lt_LT",
+      "lv": "lv_LV",
+      "ml": "ml_IN",
+      "mr": "mr_IN",
+      "ms": "ms_MY",
+      "nb": "nb_NO",
+      "nl": "nl_NL",
+      "nn": "nn_NO",
+      "pl": "pl_PL",
+      "pt": "pt_PT",
+      "ro": "ro_RO",
+      "ru": "ru_RU",
+      "sc": "sc_IT",
+      "si": "si_LK",
+      "sk": "sk_SK",
+      "sl": "sl_SI",
+      "sq": "sq_AL",
+      "sr_Cyrl": "sr_RS",
+      "sv": "sv_SE",
+      "ta": "ta_IN",
+      "te": "te_IN",
+      "th": "th_TH",
+      "tr": "tr_TR",
+      "uk": "uk_UA",
+      "ur": "ur_PK",
+      "vi": "vi_VN",
+      "zh_Hans": "zh_CN",
+      "zh_Hant": "zh_TW"
+    },
+    "regions": {
+      "ar-SA": "xa-ar",
+      "bg-BG": "bg-bg",
+      "ca-ES": "es-ca",
+      "cs-CZ": "cz-cs",
+      "da-DK": "dk-da",
+      "de-AT": "at-de",
+      "de-CH": "ch-de",
+      "de-DE": "de-de",
+      "el-GR": "gr-el",
+      "en-AU": "au-en",
+      "en-CA": "ca-en",
+      "en-GB": "uk-en",
+      "en-IE": "ie-en",
+      "en-IL": "il-en",
+      "en-IN": "in-en",
+      "en-MY": "my-en",
+      "en-NZ": "nz-en",
+      "en-PH": "ph-en",
+      "en-PK": "pk-en",
+      "en-SG": "sg-en",
+      "en-US": "us-en",
+      "en-ZA": "za-en",
+      "es-AR": "ar-es",
+      "es-CL": "cl-es",
+      "es-CO": "co-es",
+      "es-ES": "es-es",
+      "es-MX": "mx-es",
+      "es-PE": "pe-es",
+      "es-US": "us-es",
+      "et-EE": "ee-et",
+      "fi-FI": "fi-fi",
+      "fr-BE": "be-fr",
+      "fr-CA": "ca-fr",
+      "fr-CH": "ch-fr",
+      "fr-FR": "fr-fr",
+      "hr-HR": "hr-hr",
+      "hu-HU": "hu-hu",
+      "id-ID": "id-en",
+      "it-IT": "it-it",
+      "ja-JP": "jp-jp",
+      "ko-KR": "kr-kr",
+      "lt-LT": "lt-lt",
+      "lv-LV": "lv-lv",
+      "nb-NO": "no-no",
+      "nl-BE": "be-nl",
+      "nl-NL": "nl-nl",
+      "pl-PL": "pl-pl",
+      "pt-BR": "br-pt",
+      "pt-PT": "pt-pt",
+      "ro-RO": "ro-ro",
+      "ru-RU": "ru-ru",
+      "sk-SK": "sk-sk",
+      "sl-SI": "sl-sl",
+      "sv-SE": "se-sv",
+      "th-TH": "th-en",
+      "tr-TR": "tr-tr",
+      "uk-UA": "ua-uk",
+      "vi-VN": "vn-en",
+      "zh-CN": "cn-zh",
+      "zh-HK": "hk-tzh",
+      "zh-TW": "tw-tzh"
+    }
+  },
   "duckduckgo weather": {
   "duckduckgo weather": {
     "all_locale": "wt-wt",
     "all_locale": "wt-wt",
     "custom": {
     "custom": {

+ 4 - 2
searx/engines/duckduckgo.py

@@ -66,8 +66,10 @@ def cache_vqd(query, value):
     The vqd value depends on the query string and is needed for the follow up
     The vqd value depends on the query string and is needed for the follow up
     pages or the images loaded by a XMLHttpRequest:
     pages or the images loaded by a XMLHttpRequest:
 
 
-    - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
-    - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
+    - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
+    - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
+    - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
+    - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
 
 
     """
     """
     c = redisdb.client()
     c = redisdb.client()

+ 55 - 16
searx/engines/duckduckgo_images.py → searx/engines/duckduckgo_extra.py

@@ -1,9 +1,10 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
-DuckDuckGo Images
-~~~~~~~~~~~~~~~~~
+DuckDuckGo Extra (images, videos, news)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
 """
 
 
+from datetime import datetime
 from typing import TYPE_CHECKING
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
@@ -32,6 +33,9 @@ about = {
 
 
 # engine dependent config
 # engine dependent config
 categories = ['images', 'web']
 categories = ['images', 'web']
+ddg_category = 'images'
+"""The category must be any of ``images``, ``videos`` and ``news``
+"""
 paging = True
 paging = True
 safesearch = True
 safesearch = True
 send_accept_language_header = True
 send_accept_language_header = True
@@ -39,6 +43,8 @@ send_accept_language_header = True
 safesearch_cookies = {0: '-2', 1: None, 2: '1'}
 safesearch_cookies = {0: '-2', 1: None, 2: '1'}
 safesearch_args = {0: '1', 1: None, 2: '1'}
 safesearch_args = {0: '1', 1: None, 2: '1'}
 
 
+search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
+
 
 
 def request(query, params):
 def request(query, params):
 
 
@@ -69,28 +75,61 @@ def request(query, params):
         args['p'] = safe_search  # "-1", "1"
         args['p'] = safe_search  # "-1", "1"
 
 
     logger.debug("cookies: %s", params['cookies'])
     logger.debug("cookies: %s", params['cookies'])
-    args = urlencode(args)
-    params['url'] = 'https://duckduckgo.com/i.js?{args}'.format(args=args)
+
+    params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
 
 
     return params
     return params
 
 
 
 
+def _image_result(result):
+    return {
+        'template': 'images.html',
+        'url': result['url'],
+        'title': result['title'],
+        'content': '',
+        'thumbnail_src': result['thumbnail'],
+        'img_src': result['image'],
+        'img_format': '%s x %s' % (result['width'], result['height']),
+        'source': result['source'],
+    }
+
+
+def _video_result(result):
+    return {
+        'template': 'videos.html',
+        'url': result['content'],
+        'title': result['title'],
+        'content': result['description'],
+        'thumbnail': result['images'].get('small') or result['images'].get('medium'),
+        'iframe_src': result['embed_url'],
+        'source': result['provider'],
+        'length': result['duration'],
+        'metadata': result.get('uploader'),
+    }
+
+
+def _news_result(result):
+    return {
+        'url': result['url'],
+        'title': result['title'],
+        'content': result['excerpt'],
+        'source': result['source'],
+        'publishedDate': datetime.utcfromtimestamp(result['date']),
+    }
+
+
 def response(resp):
 def response(resp):
     results = []
     results = []
     res_json = resp.json()
     res_json = resp.json()
 
 
     for result in res_json['results']:
     for result in res_json['results']:
-        results.append(
-            {
-                'template': 'images.html',
-                'title': result['title'],
-                'content': '',
-                'thumbnail_src': result['thumbnail'],
-                'img_src': result['image'],
-                'url': result['url'],
-                'img_format': '%s x %s' % (result['width'], result['height']),
-                'source': result['source'],
-            }
-        )
+        if ddg_category == 'images':
+            results.append(_image_result(result))
+        elif ddg_category == 'videos':
+            results.append(_video_result(result))
+        elif ddg_category == 'news':
+            results.append(_news_result(result))
+        else:
+            raise ValueError(f"Invalid duckduckgo category: {ddg_category}")
 
 
     return results
     return results

+ 17 - 2
searx/settings.yml

@@ -603,9 +603,24 @@ engines:
     shortcut: ddg
     shortcut: ddg
 
 
   - name: duckduckgo images
   - name: duckduckgo images
-    engine: duckduckgo_images
+    engine: duckduckgo_extra
+    categories: [images, web]
+    ddg_category: images
     shortcut: ddi
     shortcut: ddi
-    timeout: 3.0
+    disabled: true
+
+  - name: duckduckgo videos
+    engine: duckduckgo_extra
+    categories: [videos, web]
+    ddg_category: videos
+    shortcut: ddv
+    disabled: true
+
+  - name: duckduckgo news
+    engine: duckduckgo_extra
+    categories: [news, web]
+    ddg_category: news
+    shortcut: ddn
     disabled: true
     disabled: true
 
 
   - name: duckduckgo weather
   - name: duckduckgo weather