logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 5a16077455ef9e821a2b5f5f7e975be8a37ce83d
parent c6535dd65ebf110d00d633db1170f35cf60b8df0
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date:   Sun,  1 Feb 2015 15:23:26 +0100

PirateBay unit test + reactivation in Settings

Diffstat:

Msearx/engines/piratebay.py12++++++++----
Msearx/settings.yml6+++---
Asearx/tests/engines/test_piratebay.py137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/tests/test_engines.py1+
4 files changed, 149 insertions(+), 7 deletions(-)

diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py @@ -13,6 +13,7 @@ from cgi import escape from urllib import quote from lxml import html from operator import itemgetter +from searx.engines.xpath import extract_text # engine dependent config categories = ['videos', 'music', 'files'] @@ -29,7 +30,8 @@ search_types = {'files': '0', # specific xpath variables magnet_xpath = './/a[@title="Download this torrent using magnet"]' -content_xpath = './/font[@class="detDesc"]//text()' +torrent_xpath = './/a[@title="Download this torrent"]' +content_xpath = './/font[@class="detDesc"]' # do search-request @@ -59,8 +61,8 @@ def response(resp): for result in search_res[1:]: link = result.xpath('.//div[@class="detName"]//a')[0] href = urljoin(url, link.attrib.get('href')) - title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath(content_xpath))) + title = extract_text(link) + content = escape(extract_text(result.xpath(content_xpath))) seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] # convert seed to int if possible @@ -76,6 +78,7 @@ def response(resp): leech = 0 magnetlink = result.xpath(magnet_xpath)[0] + torrentfile = result.xpath(torrent_xpath)[0] # append result results.append({'url': href, @@ -83,7 +86,8 @@ def response(resp): 'content': content, 'seed': seed, 'leech': leech, - 'magnetlink': magnetlink.attrib['href'], + 'magnetlink': magnetlink.attrib.get('href'), + 'torrentfile': torrentfile.attrib.get('href'), 'template': 'torrent.html'}) # return results sorted by seeder diff --git a/searx/settings.yml b/searx/settings.yml @@ -152,9 +152,9 @@ engines: engine : photon shortcut : ph -# - name : piratebay -# engine : piratebay -# shortcut : tpb + - name : piratebay + engine : piratebay + shortcut : tpb - name : kickass engine : kickass diff --git a/searx/tests/engines/test_piratebay.py b/searx/tests/engines/test_piratebay.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import piratebay +from searx.testing import SearxTestCase + + +class TestPiratebayEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['category'] = 'Toto' + params = piratebay.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('piratebay.cr', params['url']) + self.assertIn('0', params['url']) + + dicto['category'] = 'music' + params = piratebay.request(query, dicto) + self.assertIn('100', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, piratebay.response, None) + self.assertRaises(AttributeError, piratebay.response, []) + self.assertRaises(AttributeError, piratebay.response, '') + self.assertRaises(AttributeError, piratebay.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(piratebay.response(response), []) + + html = """ + <table id="searchResult"> + <tr> + </tr> + <tr> + <td class="vertTh"> + <center> + <a href="#" title="More from this category">Anime</a><br/> + (<a href="#" title="More from this category">Anime</a>) + </center> + </td> + <td> + <div class="detName"> + <a href="/this.is.the.link" class="detLink" title="Title"> + This is the title + </a> + </div> + <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet"> + <img src="/static/img/icon-magnet.gif" alt="Magnet link"/> + </a> + <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent"> + <img src="/static/img/dl.gif" class="dl" alt="Download"/> + </a> + <a href="/user/HorribleSubs"> + <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/> + </a> + <img src="/static/img/11x11p.png"/> + <font class="detDesc"> + This is the content <span>and should be</span> OK + </font> + </td> + <td align="right">13</td> + <td align="right">334</td> + </tr> + </table> + """ + response = mock.Mock(text=html) + results = piratebay.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link') + self.assertEqual(results[0]['content'], 'This is the content and should be OK') + self.assertEqual(results[0]['seed'], 13) + self.assertEqual(results[0]['leech'], 334) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK') + self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent') + + html = """ + <table id="searchResult"> + <tr> + </tr> + <tr> + <td class="vertTh"> + <center> + <a href="#" title="More from this category">Anime</a><br/> + (<a href="#" title="More from this category">Anime</a>) + </center> + </td> + <td> + <div class="detName"> + <a href="/this.is.the.link" class="detLink" title="Title"> + This is the title + </a> + </div> + <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet"> + <img src="/static/img/icon-magnet.gif" alt="Magnet link"/> + </a> + <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent"> + <img src="/static/img/dl.gif" class="dl" alt="Download"/> + </a> + <a href="/user/HorribleSubs"> + <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/> + </a> + <img src="/static/img/11x11p.png"/> + <font class="detDesc"> + This is the content <span>and should be</span> OK + </font> + </td> + <td align="right">s</td> + <td align="right">d</td> + </tr> + </table> + """ + response = mock.Mock(text=html) + results = piratebay.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link') + self.assertEqual(results[0]['content'], 'This is the content and should be OK') + self.assertEqual(results[0]['seed'], 0) + self.assertEqual(results[0]['leech'], 0) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK') + self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent') + + html = """ + <table id="searchResult"> + </table> + """ + response = mock.Mock(text=html) + results = piratebay.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py @@ -14,6 +14,7 @@ from searx.tests.engines.test_google_images import * # noqa from searx.tests.engines.test_google_news import * # noqa from searx.tests.engines.test_kickass import * # noqa from searx.tests.engines.test_mixcloud import * # noqa +from searx.tests.engines.test_piratebay import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa