logo

searx

My custom branche(s) on searx, a meta-search engine
commit: c9b8c7f8deffce93b920273171a4f8f9d6339ea7
parent: 1b77befe1fb1dbf82f99db69f857d21a7f0fdd6e
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Mon, 15 Jun 2015 04:43:15 -0400

Merge pull request #370 from Cqoicebordel/fix_vimeo

Fix vimeo

Diffstat:

Msearx/engines/vimeo.py12++++++------
Msearx/tests/engines/test_swisscows.py4++++
Msearx/tests/engines/test_vimeo.py53+++++++++++++++++++++++++++++++++++------------------
Msearx/tests/engines/test_yahoo_news.py19++++++++++++++++++-
Msearx/tests/engines/test_youtube_noapi.py51+++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 114 insertions(+), 25 deletions(-)

diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py @@ -27,11 +27,11 @@ base_url = 'https://vimeo.com' search_url = base_url + '/search/page:{pageno}?{query}' # specific xpath variables -results_xpath = '//div[@id="browse_content"]/ol/li' -url_xpath = './a/@href' -title_xpath = './a/div[@class="data"]/p[@class="title"]' -content_xpath = './a/img/@src' -publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' +results_xpath = '//div[contains(@class,"results_grid")]/ul/li' +url_xpath = './/a/@href' +title_xpath = './/span[@class="title"]' +thumbnail_xpath = './/img[@class="js-clip_thumbnail_image"]/@src' +publishedDate_xpath = './/time/attribute::datetime' embedded_url = '<iframe data-src="//player.vimeo.com/video{videoid}" ' +\ 'width="540" height="304" frameborder="0" ' +\ @@ -58,7 +58,7 @@ def response(resp): videoid = result.xpath(url_xpath)[0] url = base_url + videoid title = p.unescape(extract_text(result.xpath(title_xpath))) - thumbnail = extract_text(result.xpath(content_xpath)[0]) + thumbnail = extract_text(result.xpath(thumbnail_xpath)[0]) publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0])) embedded = embedded_url.format(videoid=videoid) diff --git a/searx/tests/engines/test_swisscows.py b/searx/tests/engines/test_swisscows.py @@ -23,6 +23,10 @@ class TestSwisscowsEngine(SearxTestCase): self.assertTrue('uiLanguage=browser' in params['url']) self.assertTrue('region=browser' in params['url']) + dicto['category'] = 'images' + params = swisscows.request(query, dicto) + self.assertIn('image', params['url']) + def test_response(self): self.assertRaises(AttributeError, swisscows.response, None) self.assertRaises(AttributeError, swisscows.response, []) diff --git a/searx/tests/engines/test_vimeo.py b/searx/tests/engines/test_vimeo.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from collections import defaultdict import mock from searx.engines import vimeo @@ -25,26 +26,42 @@ class TestVimeoEngine(SearxTestCase): self.assertEqual(vimeo.response(response), []) html = """ - <div id="browse_content" class="" data-search-id="696d5f8366914ec4ffec33cf7652de384976d4f4"> - <ol class="js-browse_list clearfix browse browse_videos browse_videos_thumbnails kane" + <div id="browse_content" class="results_grid" data-search-id="696d5f8366914ec4ffec33cf7652de384976d4f4"> + <ul class="js-browse_list clearfix browse browse_videos browse_videos_thumbnails kane" data-stream="c2VhcmNoOjo6ZGVzYzp7InF1ZXJ5IjoidGVzdCJ9"> - <li id="clip_100785455" data-start-page="/search/page:1/sort:relevant/" data-position="1"> - <a href="/videoid" title="Futurama 3d (test shot)"> - <img src="http://image.url.webp" - srcset="http://i.vimeocdn.com/video/482375085_590x332.webp 2x" alt="" - class="thumbnail thumbnail_lg_wide"> - <div class="data"> - <p class="title"> - This is the title - </p> - <p class="meta"> - <time datetime="2014-07-15T04:16:27-04:00" - title="mardi 15 juillet 2014 04:16">Il y a 6 mois</time> - </p> - </div> - </a> + <li data-position="7" data-result-id="clip_79600943"> + <div class="clip_thumbnail"> + <a href="/videoid" class="js-result_url"> + <div class="thumbnail_wrapper"> + <img src="http://image.url.webp" class="js-clip_thumbnail_image"> + <div class="overlay overlay_clip_meta"> + <div class="meta_data_footer"> + <span class="clip_upload_date"> + <time datetime="2013-11-17T08:49:09-05:00" + title="dimanche 17 novembre 2013 08:49">Il y a 1 an</time> + </span> + <span class="clip_likes"> + <img src="https://f.vimeocdn.com/images_v6/svg/heart-icon.svg">2 215 + </span> + <span class="clip_comments"> + <img src="https://f.vimeocdn.com/images_v6/svg/comment-icon.svg">75 + </span> + <span class="overlay meta_data_footer clip_duration">01:12</span> + </div> + </div> + </div> + <span class="title">This is the title</span> + </a> + </div> + <div class="clip_thumbnail_attribution"> + <a href="/fedorshmidt"> + <img src="https://i.vimeocdn.com/portrait/6628061_100x100.jpg" class="avatar"> + <span class="display_name">Fedor Shmidt</span> + </a> + <span class="plays">2,1M lectures</span> + </div> </li> - </ol> + </ul> </div> """ response = mock.Mock(text=html) diff --git a/searx/tests/engines/test_yahoo_news.py b/searx/tests/engines/test_yahoo_news.py @@ -29,6 +29,13 @@ class TestYahooNewsEngine(SearxTestCase): self.assertIn('en', params['cookies']['sB']) self.assertIn('en', params['url']) + def test_sanitize_url(self): + url = "test.url" + self.assertEqual(url, yahoo_news.sanitize_url(url)) + + url = "www.yahoo.com/;_ylt=test" + self.assertEqual("www.yahoo.com/", yahoo_news.sanitize_url(url)) + def test_response(self): self.assertRaises(AttributeError, yahoo_news.response, None) self.assertRaises(AttributeError, yahoo_news.response, []) @@ -57,7 +64,17 @@ class TestYahooNewsEngine(SearxTestCase): This is the content </div> </li> - </div> + <li class="first"> + <div class="compTitle"> + <h3> + <a class="yschttl spt" target="_blank"> + </a> + </h3> + </div> + <div class="compText"> + </div> + </li> + </ol> """ response = mock.Mock(text=html) results = yahoo_news.response(response) diff --git a/searx/tests/engines/test_youtube_noapi.py b/searx/tests/engines/test_youtube_noapi.py @@ -94,6 +94,57 @@ class TestYoutubeNoAPIEngine(SearxTestCase): html = """ <ol id="item-section-063864" class="item-section"> <li> + <div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile" + data-context-item-id="DIVZCPfAOeM" + data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB"> + <div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto"> + <a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link" + data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"> + <div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg" + width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a> + <span class="thumb-menu dark-overflow-action-menu video-actions"> + </span> + </div> + <div class="yt-lockup-content"> + <h3 class="yt-lockup-title"> + <span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span> + </h3> + <div class="yt-lockup-byline">de + <a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard" + data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA" + data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta"> + <ul class="yt-lockup-meta-info"> + <li>il y a 20 heures</li> + <li>8 424 vues</li> + </ul> + </div> + <div class="yt-lockup-badges"> + <ul class="yt-badge-list "> + <li class="yt-badge-item" > + <span class="yt-badge">Nouveauté</span> + </li> + <li class="yt-badge-item" ><span class="yt-badge " >HD</span></li> + </ul> + </div> + <div class="yt-lockup-action-menu yt-uix-menu-container"> + <div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded" + data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu"> + </div> + </div> + </div> + </div> + </div> + </li> + </ol> + """ + response = mock.Mock(text=html) + results = youtube_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + + html = """ + <ol id="item-section-063864" class="item-section"> + <li> </li> </ol> """