10 months ago · cbf1e90979
--- a/searx/engines/brave.py
+++ b/searx/engines/brave.py
@@ -123,7 +123,6 @@ from typing import Any, TYPE_CHECKING
 
				 from urllib.parse import (
			
 
				     urlencode,
			
 
				     urlparse,
			
 
				-    parse_qs,
			
 
				 )
			
 
				 
			
 
				 from dateutil import parser
			
@@ -137,6 +136,7 @@ from searx.utils import (
 
				     eval_xpath_list,
			
 
				     eval_xpath_getindex,
			
 
				     js_variable_to_python,
			
 
				+    get_embeded_stream_url,
			
 
				 )
			
 
				 from searx.enginelib.traits import EngineTraits
			
 
				 
			
@@ -311,7 +311,7 @@ def _parse_search(resp):
 
				             # In my tests a video tag in the WEB search was most often not a
			
 
				             # video, except the ones from youtube ..
			
 
				 
			
 
				-            iframe_src = _get_iframe_src(url)
			
 
				+            iframe_src = get_embeded_stream_url(url)
			
 
				             if iframe_src:
			
 
				                 item['iframe_src'] = iframe_src
			
 
				                 item['template'] = 'videos.html'
			
@@ -328,15 +328,6 @@ def _parse_search(resp):
 
				     return result_list
			
 
				 
			
 
				 
			
 
				-def _get_iframe_src(url):
			
 
				-    parsed_url = urlparse(url)
			
 
				-    if parsed_url.path == '/watch' and parsed_url.query:
			
 
				-        video_id = parse_qs(parsed_url.query).get('v', [])  # type: ignore
			
 
				-        if video_id:
			
 
				-            return 'https://www.youtube-nocookie.com/embed/' + video_id[0]  # type: ignore
			
 
				-    return None
			
 
				-
			
 
				-
			
 
				 def _parse_news(json_resp):
			
 
				     result_list = []
			
 
				 
			
@@ -392,7 +383,7 @@ def _parse_videos(json_resp):
 
				         if result['thumbnail'] is not None:
			
 
				             item['thumbnail'] = result['thumbnail']['src']
			
 
				 
			
 
				-        iframe_src = _get_iframe_src(url)
			
 
				+        iframe_src = get_embeded_stream_url(url)
			
 
				         if iframe_src:
			
 
				             item['iframe_src'] = iframe_src
			
 
				 
			
--- a/searx/engines/duckduckgo_extra.py
+++ b/searx/engines/duckduckgo_extra.py
@@ -7,6 +7,7 @@ DuckDuckGo Extra (images, videos, news)
 
				 from datetime import datetime
			
 
				 from typing import TYPE_CHECKING
			
 
				 from urllib.parse import urlencode
			
 
				+from searx.utils import get_embeded_stream_url
			
 
				 
			
 
				 from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
			
 
				 from searx.engines.duckduckgo import (
			
@@ -108,7 +109,7 @@ def _video_result(result):
 
				         'title': result['title'],
			
 
				         'content': result['description'],
			
 
				         'thumbnail': result['images'].get('small') or result['images'].get('medium'),
			
 
				-        'iframe_src': result['embed_url'],
			
 
				+        'iframe_src': get_embeded_stream_url(result['content']),
			
 
				         'source': result['provider'],
			
 
				         'length': result['duration'],
			
 
				         'metadata': result.get('uploader'),
			
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -34,6 +34,7 @@ from searx.engines.google import (
 
				     detect_google_sorry,
			
 
				 )
			
 
				 from searx.enginelib.traits import EngineTraits
			
 
				+from searx.utils import get_embeded_stream_url
			
 
				 
			
 
				 if TYPE_CHECKING:
			
 
				     import logging
			
@@ -125,6 +126,7 @@ def response(resp):
 
				                 'content': content,
			
 
				                 'author': pub_info,
			
 
				                 'thumbnail': thumbnail,
			
 
				+                'iframe_src': get_embeded_stream_url(url),
			
 
				                 'template': 'videos.html',
			
 
				             }
			
 
				         )
			
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -61,6 +61,7 @@ from searx.utils import (
 
				     eval_xpath,
			
 
				     eval_xpath_list,
			
 
				     extract_text,
			
 
				+    get_embeded_stream_url,
			
 
				 )
			
 
				 
			
 
				 traits: EngineTraits
			
@@ -303,6 +304,7 @@ def parse_web_api(resp):
 
				                         'title': title,
			
 
				                         'url': res_url,
			
 
				                         'content': content,
			
 
				+                        'iframe_src': get_embeded_stream_url(res_url),
			
 
				                         'publishedDate': pub_date,
			
 
				                         'thumbnail': thumbnail,
			
 
				                         'template': 'videos.html',
			
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -17,7 +17,7 @@ from os.path import splitext, join
 
				 from random import choice
			
 
				 from html.parser import HTMLParser
			
 
				 from html import escape
			
 
				-from urllib.parse import urljoin, urlparse
			
 
				+from urllib.parse import urljoin, urlparse, parse_qs, urlencode
			
 
				 from markdown_it import MarkdownIt
			
 
				 
			
 
				 from lxml import html
			
@@ -615,6 +615,52 @@ def _get_fasttext_model() -> "fasttext.FastText._FastText":  # type: ignore
 
				     return _FASTTEXT_MODEL
			
 
				 
			
 
				 
			
 
				+def get_embeded_stream_url(url):
			
 
				+    """
			
 
				+    Converts a standard video URL into its embed format. Supported services include Youtube,
			
 
				+    Facebook, Instagram, TikTok, and Dailymotion.
			
 
				+    """
			
 
				+    parsed_url = urlparse(url)
			
 
				+    iframe_src = None
			
 
				+
			
 
				+    # YouTube
			
 
				+    if parsed_url.netloc in ['www.youtube.com', 'youtube.com'] and parsed_url.path == '/watch' and parsed_url.query:
			
 
				+        video_id = parse_qs(parsed_url.query).get('v', [])
			
 
				+        if video_id:
			
 
				+            iframe_src = 'https://www.youtube-nocookie.com/embed/' + video_id[0]
			
 
				+
			
 
				+    # Facebook
			
 
				+    elif parsed_url.netloc in ['www.facebook.com', 'facebook.com']:
			
 
				+        encoded_href = urlencode({'href': url})
			
 
				+        iframe_src = 'https://www.facebook.com/plugins/video.php?allowfullscreen=true&' + encoded_href
			
 
				+
			
 
				+    # Instagram
			
 
				+    elif parsed_url.netloc in ['www.instagram.com', 'instagram.com'] and parsed_url.path.startswith('/p/'):
			
 
				+        if parsed_url.path.endswith('/'):
			
 
				+            iframe_src = url + 'embed'
			
 
				+        else:
			
 
				+            iframe_src = url + '/embed'
			
 
				+
			
 
				+    # TikTok
			
 
				+    elif (
			
 
				+        parsed_url.netloc in ['www.tiktok.com', 'tiktok.com']
			
 
				+        and parsed_url.path.startswith('/@')
			
 
				+        and '/video/' in parsed_url.path
			
 
				+    ):
			
 
				+        path_parts = parsed_url.path.split('/video/')
			
 
				+        video_id = path_parts[1]
			
 
				+        iframe_src = 'https://www.tiktok.com/embed/' + video_id
			
 
				+
			
 
				+    # Dailymotion
			
 
				+    elif parsed_url.netloc in ['www.dailymotion.com', 'dailymotion.com'] and parsed_url.path.startswith('/video/'):
			
 
				+        path_parts = parsed_url.path.split('/')
			
 
				+        if len(path_parts) == 3:
			
 
				+            video_id = path_parts[2]
			
 
				+            iframe_src = 'https://www.dailymotion.com/embed/video/' + video_id
			
 
				+
			
 
				+    return iframe_src
			
 
				+
			
 
				+
			
 
				 def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]:
			
 
				     """Detect the language of the ``text`` parameter.