logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: f965c978222cf48e8dd4b7dd6c9a28ccca9bc62f
parent aac8d3a7bfdd77a5369e52a4ece99b20669a4625
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date:   Sun, 31 May 2015 00:25:59 +0200

Adds two engines : Youtube with or without API
The API needs an API_KEY
The NOAPI doesn't have the published dates.

Diffstat:

Asearx/engines/youtube_api.py83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/engines/youtube_noapi.py72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml7++++++-
Asearx/tests/engines/test_youtube_api.py111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_youtube_noapi.py103+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/tests/test_engines.py2++
6 files changed, 377 insertions(+), 1 deletion(-)

diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py @@ -0,0 +1,83 @@ +# Youtube (Videos) +# +# @website https://www.youtube.com/ +# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content, publishedDate, thumbnail, embedded + +from json import loads +from urllib import urlencode +from dateutil import parser + +# engine dependent config +categories = ['videos', 'music'] +paging = False +language_support = True +api_key = None + +# search-url +base_url = 'https://www.googleapis.com/youtube/v3/search' +search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}' + +embedded_url = '<iframe width="540" height="304" ' +\ + 'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\ + 'frameborder="0" allowfullscreen></iframe>' + +base_youtube_url = 'https://www.youtube.com/watch?v=' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'q': query}), + api_key=api_key) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'items' not in search_results: + return [] + + # parse results + for result in search_results['items']: + videoid = result['id']['videoId'] + + title = result['snippet']['title'] + content = '' + thumbnail = '' + + pubdate = result['snippet']['publishedAt'] + publishedDate = parser.parse(pubdate) + + thumbnail = result['snippet']['thumbnails']['high']['url'] + + content = result['snippet']['description'] + + url = base_youtube_url + videoid + + embedded = embedded_url.format(videoid=videoid) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail}) + + # return results + return results diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py @@ -0,0 +1,72 @@ +# Youtube (Videos) +# +# @website https://www.youtube.com/ +# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) +# +# @using-api no +# @results HTML +# @stable no +# @parse url, title, content, publishedDate, thumbnail, embedded + +from urllib import quote_plus +from lxml import html +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['videos', 'music'] +paging = True +language_support = False + +# search-url +base_url = 'https://www.youtube.com/results' +search_url = base_url + '?search_query={query}&page={page}' + +embedded_url = '<iframe width="540" height="304" ' +\ + 'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\ + 'frameborder="0" allowfullscreen></iframe>' + +base_youtube_url = 'https://www.youtube.com/watch?v=' + +# specific xpath variables +results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]" +url_xpath = './/h3/a/@href' +title_xpath = './/div[@class="yt-lockup-content"]/h3/a' +content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=quote_plus(query), + page=params['pageno']) + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath(results_xpath): + videoid = result.xpath('@data-context-item-id')[0] + + url = base_youtube_url + videoid + thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg' + + title = extract_text(result.xpath(title_xpath)[0]) + content = extract_text(result.xpath(content_xpath)[0]) + + embedded = embedded_url.format(videoid=videoid) + + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'embedded': embedded, + 'thumbnail': thumbnail}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -242,8 +242,13 @@ engines: shortcut : yhn - name : youtube - engine : youtube shortcut : yt + # You can use the engine using the official stable API, but you need an API key + # See : https://console.developers.google.com/project + # engine : youtube_api + # api_key: 'apikey' # required! + # Or you can use the html non-stable engine, activated by default + engine : youtube_noapi - name : dailymotion engine : dailymotion diff --git a/searx/tests/engines/test_youtube_api.py b/searx/tests/engines/test_youtube_api.py @@ -0,0 +1,111 @@ +from collections import defaultdict +import mock +from searx.engines import youtube_api +from searx.testing import SearxTestCase + + +class TestYoutubeAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = youtube_api.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertIn('googleapis.com', params['url']) + self.assertIn('youtube', params['url']) + self.assertIn('fr', params['url']) + + dicto['language'] = 'all' + params = youtube_api.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, youtube_api.response, None) + self.assertRaises(AttributeError, youtube_api.response, []) + self.assertRaises(AttributeError, youtube_api.response, '') + self.assertRaises(AttributeError, youtube_api.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(youtube_api.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(youtube_api.response(response), []) + + json = """ + { + "kind": "youtube#searchListResponse", + "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME", + "nextPageToken": "CAUQAA", + "pageInfo": { + "totalResults": 1000000, + "resultsPerPage": 20 + }, + "items": [ + { + "kind": "youtube#searchResult", + "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/IbLO64BMhbHIgWLwLw7MDYe7Hs4", + "id": { + "kind": "youtube#video", + "videoId": "DIVZCPfAOeM" + }, + "snippet": { + "publishedAt": "2015-05-29T22:41:04.000Z", + "channelId": "UCNodmx1ERIjKqvcJLtdzH5Q", + "title": "Title", + "description": "Description", + "thumbnails": { + "default": { + "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/default.jpg" + }, + "medium": { + "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg" + }, + "high": { + "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg" + } + }, + "channelTitle": "MinecraftUniverse", + "liveBroadcastContent": "none" + } + } + ] + } + """ + response = mock.Mock(text=json) + results = youtube_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM') + self.assertEqual(results[0]['content'], 'Description') + self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg') + self.assertTrue('DIVZCPfAOeM' in results[0]['embedded']) + + json = """ + { + "kind": "youtube#searchListResponse", + "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME", + "nextPageToken": "CAUQAA", + "pageInfo": { + "totalResults": 1000000, + "resultsPerPage": 20 + } + } + """ + response = mock.Mock(text=json) + results = youtube_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + {"toto":{"entry":[] + } + } + """ + response = mock.Mock(text=json) + results = youtube_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_youtube_noapi.py b/searx/tests/engines/test_youtube_noapi.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import youtube_noapi +from searx.testing import SearxTestCase + + +class TestYoutubeNoAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = youtube_noapi.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('youtube.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, youtube_noapi.response, None) + self.assertRaises(AttributeError, youtube_noapi.response, []) + self.assertRaises(AttributeError, youtube_noapi.response, '') + self.assertRaises(AttributeError, youtube_noapi.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(youtube_noapi.response(response), []) + + html = """ + <ol id="item-section-063864" class="item-section"> + <li> + <div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile" + data-context-item-id="DIVZCPfAOeM" + data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB"> + <div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto"> + <a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link" + data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"> + <div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg" + width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a> + <span class="thumb-menu dark-overflow-action-menu video-actions"> + </span> + </div> + <div class="yt-lockup-content"> + <h3 class="yt-lockup-title"> + <a href="/watch?v=DIVZCPfAOeM" + class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link" + data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA" + title="Top Speed Test Kawasaki Ninja H2 (Thailand) By. MEHAY SUPERBIKE" + aria-describedby="description-id-259079" rel="spf-prefetch" dir="ltr"> + Title + </a> + <span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span> + </h3> + <div class="yt-lockup-byline">de + <a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard" + data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA" + data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta"> + <ul class="yt-lockup-meta-info"> + <li>il y a 20 heures</li> + <li>8 424 vues</li> + </ul> + </div> + <div class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2" dir="ltr"> + Description + </div> + <div class="yt-lockup-badges"> + <ul class="yt-badge-list "> + <li class="yt-badge-item" > + <span class="yt-badge">Nouveauté</span> + </li> + <li class="yt-badge-item" ><span class="yt-badge " >HD</span></li> + </ul> + </div> + <div class="yt-lockup-action-menu yt-uix-menu-container"> + <div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded" + data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu"> + </div> + </div> + </div> + </div> + </div> + </li> + </ol> + """ + response = mock.Mock(text=html) + results = youtube_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM') + self.assertEqual(results[0]['content'], 'Description') + self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg') + self.assertTrue('DIVZCPfAOeM' in results[0]['embedded']) + + html = """ + <ol id="item-section-063864" class="item-section"> + <li> + </li> + </ol> + """ + response = mock.Mock(text=html) + results = youtube_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py @@ -39,4 +39,6 @@ from searx.tests.engines.test_www500px import * # noqa from searx.tests.engines.test_yacy import * # noqa from searx.tests.engines.test_yahoo import * # noqa from searx.tests.engines.test_youtube import * # noqa +from searx.tests.engines.test_youtube_api import * # noqa +from searx.tests.engines.test_youtube_noapi import * # noqa from searx.tests.engines.test_yahoo_news import * # noqa