logo

searx

Unnamed repository; edit this file 'description' to name the repository.
commit: 00e2bc4df0673daff91bcf4a84b649e44c6e8bc3
parent: 752c6a28dad5d1abce54e083015669b5ca0a682e
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Sun, 27 Aug 2017 21:05:30 +0200

[engines] remove torrentz (stopped since 2016-08-05)

Diffstat:

searx/engines/torrentz.py | 92-------------------------------------------------------------------------------
tests/unit/engines/test_torrentz.py | 91-------------------------------------------------------------------------------
2 files changed, 0 insertions(+), 183 deletions(-)

diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py @@ -1,92 +0,0 @@ -""" - Torrentz.eu (BitTorrent meta-search engine) - - @website https://torrentz.eu/ - @provide-api no - - @using-api no - @results HTML - @stable no (HTML can change, although unlikely, - see https://torrentz.eu/torrentz.btsearch) - @parse url, title, publishedDate, seed, leech, filesize, magnetlink -""" - -import re -from lxml import html -from datetime import datetime -from searx.engines.nyaa import int_or_zero, get_filesize_mul -from searx.engines.xpath import extract_text -from searx.url_utils import urlencode - -# engine dependent config -categories = ['files', 'videos', 'music'] -paging = True - -# search-url -# https://torrentz.eu/search?f=EXAMPLE&p=6 -base_url = 'https://torrentz.eu/' -search_url = base_url + 'search?{query}' - - -# do search-request -def request(query, params): - page = params['pageno'] - 1 - query = urlencode({'q': query, 'p': page}) - params['url'] = search_url.format(query=query) - return params - - -# get response from search-request -def response(resp): - results = [] - - dom = html.fromstring(resp.text) - - for result in dom.xpath('//div[@class="results"]/dl'): - name_cell = result.xpath('./dt')[0] - title = extract_text(name_cell) - - # skip rows that do not contain a link to a torrent - links = name_cell.xpath('./a') - if len(links) != 1: - continue - - # extract url and remove a slash in the beginning - link = links[0].attrib.get('href').lstrip('/') - - seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '') - leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '') - - params = { - 'url': base_url + link, - 'title': title, - 'seed': int_or_zero(seed), - 'leech': int_or_zero(leech), - 'template': 'torrent.html' - } - - # let's try to calculate the torrent size - try: - size_str = result.xpath('./dd/span[@class="s"]/text()')[0] - size, suffix = size_str.split() - params['filesize'] = int(size) * get_filesize_mul(suffix) - except: - pass - - # does our link contain a valid SHA1 sum? - if re.compile('[0-9a-fA-F]{40}').match(link): - # add a magnet link to the result - params['magnetlink'] = 'magnet:?xt=urn:btih:' + link - - # extract and convert creation date - try: - date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title') - # Fri, 25 Mar 2016 16:29:01 - date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S') - params['publishedDate'] = date - except: - pass - - results.append(params) - - return results diff --git a/tests/unit/engines/test_torrentz.py b/tests/unit/engines/test_torrentz.py @@ -1,91 +0,0 @@ -import mock -from collections import defaultdict -from searx.engines import torrentz -from searx.testing import SearxTestCase -from datetime import datetime - - -class TestTorrentzEngine(SearxTestCase): - - def test_request(self): - query = 'test_query' - dic = defaultdict(dict) - dic['pageno'] = 1 - params = torrentz.request(query, dic) - self.assertTrue('url' in params) - self.assertTrue(query in params['url']) - self.assertTrue('torrentz.eu' in params['url']) - - def test_response(self): - resp = mock.Mock(text='<html></html>') - self.assertEqual(torrentz.response(resp), []) - - html = """ - <div class="results"> - <dl> - <dt> - <a href="/4362e08b1d80e1820fb2550b752f9f3126fe76d6"> - Completely valid info - </a> - books ebooks - </dt> - <dd> - <span class="v">1</span> - <span class="a"> - <span title="Sun, 22 Nov 2015 03:01:42">4 months</span> - </span> - <span class="s">30 MB</span> - <span class="u">14</span> - <span class="d">1</span> - </dd> - </dl> - - <dl> - <dt> - <a href="/poaskdpokaspod"> - Invalid hash and date and filesize - </a> - books ebooks - </dt> - <dd> - <span class="v">1</span> - <span class="a"> - <span title="Sun, 2124091j0j190gm42">4 months</span> - </span> - <span class="s">30MB</span> - <span class="u">5,555</span> - <span class="d">1,234,567</span> - </dd> - </dl> - </div> - """ - - resp = mock.Mock(text=html) - results = torrentz.response(resp) - - self.assertEqual(type(results), list) - self.assertEqual(len(results), 2) - - # testing against the first result - r = results[0] - self.assertEqual(r['url'], 'https://torrentz.eu/4362e08b1d80e1820fb2550b752f9f3126fe76d6') - self.assertEqual(r['title'], 'Completely valid info books ebooks') - # 22 Nov 2015 03:01:42 - self.assertEqual(r['publishedDate'], datetime(2015, 11, 22, 3, 1, 42)) - self.assertEqual(r['seed'], 14) - self.assertEqual(r['leech'], 1) - self.assertEqual(r['filesize'], 30 * 1024 * 1024) - self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4362e08b1d80e1820fb2550b752f9f3126fe76d6') - - # testing against the second result - r = results[1] - self.assertEqual(r['url'], 'https://torrentz.eu/poaskdpokaspod') - self.assertEqual(r['title'], 'Invalid hash and date and filesize books ebooks') - self.assertEqual(r['seed'], 5555) - self.assertEqual(r['leech'], 1234567) - - # in the second result we have invalid hash, creation date & torrent size, - # so these tests should fail - self.assertFalse('magnetlink' in r) - self.assertFalse('filesize' in r) - self.assertFalse('publishedDate' in r)