Browse Source

Adds two engines : Youtube with or without API
The API needs an API_KEY
The NOAPI doesn't have the published dates.

Cqoicebordel 10 years ago
parent
commit
f965c97822

+ 83 - 0
searx/engines/youtube_api.py

@@ -0,0 +1,83 @@
+# Youtube (Videos)
+#
+# @website     https://www.youtube.com/
+# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
+#
+# @using-api   yes
+# @results     JSON
+# @stable      yes
+# @parse       url, title, content, publishedDate, thumbnail, embedded
+
+from json import loads
+from urllib import urlencode
+from dateutil import parser
+
+# engine dependent config
+categories = ['videos', 'music']
+paging = False
+language_support = True
+api_key = None
+
+# search-url
+base_url = 'https://www.googleapis.com/youtube/v3/search'
+search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}'
+
+embedded_url = '<iframe width="540" height="304" ' +\
+    'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
+    'frameborder="0" allowfullscreen></iframe>'
+
+base_youtube_url = 'https://www.youtube.com/watch?v='
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(query=urlencode({'q': query}),
+                                      api_key=api_key)
+
+    # add language tag if specified
+    if params['language'] != 'all':
+        params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    search_results = loads(resp.text)
+
+    # return empty array if there are no results
+    if 'items' not in search_results:
+        return []
+
+    # parse results
+    for result in search_results['items']:
+        videoid = result['id']['videoId']
+
+        title = result['snippet']['title']
+        content = ''
+        thumbnail = ''
+
+        pubdate = result['snippet']['publishedAt']
+        publishedDate = parser.parse(pubdate)
+
+        thumbnail = result['snippet']['thumbnails']['high']['url']
+
+        content = result['snippet']['description']
+
+        url = base_youtube_url + videoid
+
+        embedded = embedded_url.format(videoid=videoid)
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'content': content,
+                        'template': 'videos.html',
+                        'publishedDate': publishedDate,
+                        'embedded': embedded,
+                        'thumbnail': thumbnail})
+
+    # return results
+    return results

+ 72 - 0
searx/engines/youtube_noapi.py

@@ -0,0 +1,72 @@
+# Youtube (Videos)
+#
+# @website     https://www.youtube.com/
+# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
+#
+# @using-api   no
+# @results     HTML
+# @stable      no
+# @parse       url, title, content, publishedDate, thumbnail, embedded
+
+from urllib import quote_plus
+from lxml import html
+from searx.engines.xpath import extract_text
+
+# engine dependent config
+categories = ['videos', 'music']
+paging = True
+language_support = False
+
+# search-url
+base_url = 'https://www.youtube.com/results'
+search_url = base_url + '?search_query={query}&page={page}'
+
+embedded_url = '<iframe width="540" height="304" ' +\
+    'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
+    'frameborder="0" allowfullscreen></iframe>'
+
+base_youtube_url = 'https://www.youtube.com/watch?v='
+
+# specific xpath variables
+results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]"
+url_xpath = './/h3/a/@href'
+title_xpath = './/div[@class="yt-lockup-content"]/h3/a'
+content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(query=quote_plus(query),
+                                      page=params['pageno'])
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    dom = html.fromstring(resp.text)
+
+    # parse results
+    for result in dom.xpath(results_xpath):
+        videoid = result.xpath('@data-context-item-id')[0]
+
+        url = base_youtube_url + videoid
+        thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg'
+
+        title = extract_text(result.xpath(title_xpath)[0])
+        content = extract_text(result.xpath(content_xpath)[0])
+
+        embedded = embedded_url.format(videoid=videoid)
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'content': content,
+                        'template': 'videos.html',
+                        'embedded': embedded,
+                        'thumbnail': thumbnail})
+
+    # return results
+    return results

+ 6 - 1
searx/settings.yml

@@ -242,8 +242,13 @@ engines:
     shortcut : yhn
 
   - name : youtube
-    engine : youtube
     shortcut : yt
+    # You can use the engine using the official stable API, but you need an API key
+    # See : https://console.developers.google.com/project
+    #    engine : youtube_api
+    #    api_key: 'apikey' # required!
+    # Or you can use the html non-stable engine, activated by default
+    engine : youtube_noapi
 
   - name : dailymotion
     engine : dailymotion

+ 111 - 0
searx/tests/engines/test_youtube_api.py

@@ -0,0 +1,111 @@
+from collections import defaultdict
+import mock
+from searx.engines import youtube_api
+from searx.testing import SearxTestCase
+
+
+class TestYoutubeAPIEngine(SearxTestCase):
+
+    def test_request(self):
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        dicto['pageno'] = 0
+        dicto['language'] = 'fr_FR'
+        params = youtube_api.request(query, dicto)
+        self.assertTrue('url' in params)
+        self.assertTrue(query in params['url'])
+        self.assertIn('googleapis.com', params['url'])
+        self.assertIn('youtube', params['url'])
+        self.assertIn('fr', params['url'])
+
+        dicto['language'] = 'all'
+        params = youtube_api.request(query, dicto)
+        self.assertFalse('fr' in params['url'])
+
+    def test_response(self):
+        self.assertRaises(AttributeError, youtube_api.response, None)
+        self.assertRaises(AttributeError, youtube_api.response, [])
+        self.assertRaises(AttributeError, youtube_api.response, '')
+        self.assertRaises(AttributeError, youtube_api.response, '[]')
+
+        response = mock.Mock(text='{}')
+        self.assertEqual(youtube_api.response(response), [])
+
+        response = mock.Mock(text='{"data": []}')
+        self.assertEqual(youtube_api.response(response), [])
+
+        json = """
+        {
+         "kind": "youtube#searchListResponse",
+         "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME",
+         "nextPageToken": "CAUQAA",
+         "pageInfo": {
+          "totalResults": 1000000,
+          "resultsPerPage": 20
+         },
+         "items": [
+          {
+           "kind": "youtube#searchResult",
+           "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/IbLO64BMhbHIgWLwLw7MDYe7Hs4",
+           "id": {
+            "kind": "youtube#video",
+            "videoId": "DIVZCPfAOeM"
+           },
+           "snippet": {
+            "publishedAt": "2015-05-29T22:41:04.000Z",
+            "channelId": "UCNodmx1ERIjKqvcJLtdzH5Q",
+            "title": "Title",
+            "description": "Description",
+            "thumbnails": {
+             "default": {
+              "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/default.jpg"
+             },
+             "medium": {
+              "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
+             },
+             "high": {
+              "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg"
+             }
+            },
+            "channelTitle": "MinecraftUniverse",
+            "liveBroadcastContent": "none"
+           }
+          }
+          ]
+        }
+        """
+        response = mock.Mock(text=json)
+        results = youtube_api.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'], 'Title')
+        self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM')
+        self.assertEqual(results[0]['content'], 'Description')
+        self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg')
+        self.assertTrue('DIVZCPfAOeM' in results[0]['embedded'])
+
+        json = """
+        {
+         "kind": "youtube#searchListResponse",
+         "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME",
+         "nextPageToken": "CAUQAA",
+         "pageInfo": {
+          "totalResults": 1000000,
+          "resultsPerPage": 20
+         }
+        }
+        """
+        response = mock.Mock(text=json)
+        results = youtube_api.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)
+
+        json = """
+        {"toto":{"entry":[]
+        }
+        }
+        """
+        response = mock.Mock(text=json)
+        results = youtube_api.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)

+ 103 - 0
searx/tests/engines/test_youtube_noapi.py

@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import youtube_noapi
+from searx.testing import SearxTestCase
+
+
+class TestYoutubeNoAPIEngine(SearxTestCase):
+
+    def test_request(self):
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        dicto['pageno'] = 0
+        params = youtube_noapi.request(query, dicto)
+        self.assertIn('url', params)
+        self.assertIn(query, params['url'])
+        self.assertIn('youtube.com', params['url'])
+
+    def test_response(self):
+        self.assertRaises(AttributeError, youtube_noapi.response, None)
+        self.assertRaises(AttributeError, youtube_noapi.response, [])
+        self.assertRaises(AttributeError, youtube_noapi.response, '')
+        self.assertRaises(AttributeError, youtube_noapi.response, '[]')
+
+        response = mock.Mock(text='<html></html>')
+        self.assertEqual(youtube_noapi.response(response), [])
+
+        html = """
+        <ol id="item-section-063864" class="item-section">
+            <li>
+                <div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile"
+                data-context-item-id="DIVZCPfAOeM"
+                data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB">
+                <div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto">
+                <a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link"
+                data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA">
+                <div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg"
+                width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a>
+                <span class="thumb-menu dark-overflow-action-menu video-actions">
+                </span>
+                </div>
+                <div class="yt-lockup-content">
+                <h3 class="yt-lockup-title">
+                <a href="/watch?v=DIVZCPfAOeM"
+                class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link"
+                data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"
+                title="Top Speed Test Kawasaki Ninja H2 (Thailand) By. MEHAY SUPERBIKE"
+                aria-describedby="description-id-259079" rel="spf-prefetch" dir="ltr">
+                Title
+                </a>
+                <span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span>
+                </h3>
+                <div class="yt-lockup-byline">de
+                <a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard"
+                data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA"
+                data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta">
+                <ul class="yt-lockup-meta-info">
+                    <li>il y a 20 heures</li>
+                    <li>8 424 vues</li>
+                </ul>
+                </div>
+                <div class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2" dir="ltr">
+                Description
+                </div>
+                <div class="yt-lockup-badges">
+                <ul class="yt-badge-list ">
+                    <li class="yt-badge-item" >
+                        <span class="yt-badge">Nouveauté</span>
+                    </li>
+                    <li class="yt-badge-item" ><span class="yt-badge " >HD</span></li>
+                </ul>
+                </div>
+                <div class="yt-lockup-action-menu yt-uix-menu-container">
+                <div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded"
+                data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu">
+                </div>
+                </div>
+                </div>
+                </div>
+                </div>
+            </li>
+        </ol>
+        """
+        response = mock.Mock(text=html)
+        results = youtube_noapi.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'], 'Title')
+        self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM')
+        self.assertEqual(results[0]['content'], 'Description')
+        self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg')
+        self.assertTrue('DIVZCPfAOeM' in results[0]['embedded'])
+
+        html = """
+        <ol id="item-section-063864" class="item-section">
+            <li>
+            </li>
+        </ol>
+        """
+        response = mock.Mock(text=html)
+        results = youtube_noapi.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 0)

+ 2 - 0
searx/tests/test_engines.py

@@ -39,4 +39,6 @@ from searx.tests.engines.test_www500px import *  # noqa
 from searx.tests.engines.test_yacy import *  # noqa
 from searx.tests.engines.test_yahoo import *  # noqa
 from searx.tests.engines.test_youtube import *  # noqa
+from searx.tests.engines.test_youtube_api import *  # noqa
+from searx.tests.engines.test_youtube_noapi import *  # noqa
 from searx.tests.engines.test_yahoo_news import *  # noqa