commit: 5a16077455ef9e821a2b5f5f7e975be8a37ce83d
parent c6535dd65ebf110d00d633db1170f35cf60b8df0
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 1 Feb 2015 15:23:26 +0100
PirateBay unit test + reactivation in Settings
Diffstat:
4 files changed, 149 insertions(+), 7 deletions(-)
diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py
@@ -13,6 +13,7 @@ from cgi import escape
from urllib import quote
from lxml import html
from operator import itemgetter
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['videos', 'music', 'files']
@@ -29,7 +30,8 @@ search_types = {'files': '0',
# specific xpath variables
magnet_xpath = './/a[@title="Download this torrent using magnet"]'
-content_xpath = './/font[@class="detDesc"]//text()'
+torrent_xpath = './/a[@title="Download this torrent"]'
+content_xpath = './/font[@class="detDesc"]'
# do search-request
@@ -59,8 +61,8 @@ def response(resp):
for result in search_res[1:]:
link = result.xpath('.//div[@class="detName"]//a')[0]
href = urljoin(url, link.attrib.get('href'))
- title = ' '.join(link.xpath('.//text()'))
- content = escape(' '.join(result.xpath(content_xpath)))
+ title = extract_text(link)
+ content = escape(extract_text(result.xpath(content_xpath)))
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
# convert seed to int if possible
@@ -76,6 +78,7 @@ def response(resp):
leech = 0
magnetlink = result.xpath(magnet_xpath)[0]
+ torrentfile = result.xpath(torrent_xpath)[0]
# append result
results.append({'url': href,
@@ -83,7 +86,8 @@ def response(resp):
'content': content,
'seed': seed,
'leech': leech,
- 'magnetlink': magnetlink.attrib['href'],
+ 'magnetlink': magnetlink.attrib.get('href'),
+ 'torrentfile': torrentfile.attrib.get('href'),
'template': 'torrent.html'})
# return results sorted by seeder
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -152,9 +152,9 @@ engines:
engine : photon
shortcut : ph
-# - name : piratebay
-# engine : piratebay
-# shortcut : tpb
+ - name : piratebay
+ engine : piratebay
+ shortcut : tpb
- name : kickass
engine : kickass
diff --git a/searx/tests/engines/test_piratebay.py b/searx/tests/engines/test_piratebay.py
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import piratebay
+from searx.testing import SearxTestCase
+
+
+class TestPiratebayEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 1
+ dicto['category'] = 'Toto'
+ params = piratebay.request(query, dicto)
+ self.assertIn('url', params)
+ self.assertIn(query, params['url'])
+ self.assertIn('piratebay.cr', params['url'])
+ self.assertIn('0', params['url'])
+
+ dicto['category'] = 'music'
+ params = piratebay.request(query, dicto)
+ self.assertIn('100', params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, piratebay.response, None)
+ self.assertRaises(AttributeError, piratebay.response, [])
+ self.assertRaises(AttributeError, piratebay.response, '')
+ self.assertRaises(AttributeError, piratebay.response, '[]')
+
+ response = mock.Mock(text='<html></html>')
+ self.assertEqual(piratebay.response(response), [])
+
+ html = """
+ <table id="searchResult">
+ <tr>
+ </tr>
+ <tr>
+ <td class="vertTh">
+ <center>
+ <a href="#" title="More from this category">Anime</a><br/>
+ (<a href="#" title="More from this category">Anime</a>)
+ </center>
+ </td>
+ <td>
+ <div class="detName">
+ <a href="/this.is.the.link" class="detLink" title="Title">
+ This is the title
+ </a>
+ </div>
+ <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
+ <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
+ </a>
+ <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
+ <img src="/static/img/dl.gif" class="dl" alt="Download"/>
+ </a>
+ <a href="/user/HorribleSubs">
+ <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
+ </a>
+ <img src="/static/img/11x11p.png"/>
+ <font class="detDesc">
+ This is the content <span>and should be</span> OK
+ </font>
+ </td>
+ <td align="right">13</td>
+ <td align="right">334</td>
+ </tr>
+ </table>
+ """
+ response = mock.Mock(text=html)
+ results = piratebay.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the title')
+ self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
+ self.assertEqual(results[0]['content'], 'This is the content and should be OK')
+ self.assertEqual(results[0]['seed'], 13)
+ self.assertEqual(results[0]['leech'], 334)
+ self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
+ self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
+
+ html = """
+ <table id="searchResult">
+ <tr>
+ </tr>
+ <tr>
+ <td class="vertTh">
+ <center>
+ <a href="#" title="More from this category">Anime</a><br/>
+ (<a href="#" title="More from this category">Anime</a>)
+ </center>
+ </td>
+ <td>
+ <div class="detName">
+ <a href="/this.is.the.link" class="detLink" title="Title">
+ This is the title
+ </a>
+ </div>
+ <a href="magnet:?xt=urn:btih:MAGNETLINK" title="Download this torrent using magnet">
+ <img src="/static/img/icon-magnet.gif" alt="Magnet link"/>
+ </a>
+ <a href="http://torcache.net/torrent/TORRENTFILE.torrent" title="Download this torrent">
+ <img src="/static/img/dl.gif" class="dl" alt="Download"/>
+ </a>
+ <a href="/user/HorribleSubs">
+ <img src="/static/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" border='0'/>
+ </a>
+ <img src="/static/img/11x11p.png"/>
+ <font class="detDesc">
+ This is the content <span>and should be</span> OK
+ </font>
+ </td>
+ <td align="right">s</td>
+ <td align="right">d</td>
+ </tr>
+ </table>
+ """
+ response = mock.Mock(text=html)
+ results = piratebay.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the title')
+ self.assertEqual(results[0]['url'], 'https://thepiratebay.cr/this.is.the.link')
+ self.assertEqual(results[0]['content'], 'This is the content and should be OK')
+ self.assertEqual(results[0]['seed'], 0)
+ self.assertEqual(results[0]['leech'], 0)
+ self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:MAGNETLINK')
+ self.assertEqual(results[0]['torrentfile'], 'http://torcache.net/torrent/TORRENTFILE.torrent')
+
+ html = """
+ <table id="searchResult">
+ </table>
+ """
+ response = mock.Mock(text=html)
+ results = piratebay.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
@@ -14,6 +14,7 @@ from searx.tests.engines.test_google_images import * # noqa
from searx.tests.engines.test_google_news import * # noqa
from searx.tests.engines.test_kickass import * # noqa
from searx.tests.engines.test_mixcloud import * # noqa
+from searx.tests.engines.test_piratebay import * # noqa
from searx.tests.engines.test_searchcode_code import * # noqa
from searx.tests.engines.test_searchcode_doc import * # noqa
from searx.tests.engines.test_soundcloud import * # noqa