logo

searx

My custom branche(s) on searx, a meta-search engine
commit: f4df27fa59c21f6c776081364afb2ab1a8b83b74
parent: 688801076d173de67bf4543ca289a35f28b6c245
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Thu,  7 Sep 2017 19:33:07 +0200

Merge pull request #1022 from misnyo/nyaa

nyaa.si fixed

Diffstat:

Msearx/engines/nyaa.py89++++++++++++++++++++++++++++++++++++-------------------------------------------
Msearx/engines/tokyotoshokan.py5++---
Msearx/utils.py9+++++++++
Mtests/unit/engines/test_nyaa.py124++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
4 files changed, 142 insertions(+), 85 deletions(-)

diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py @@ -1,7 +1,7 @@ """ - Nyaa.se (Anime Bittorrent tracker) + Nyaa.si (Anime Bittorrent tracker) - @website http://www.nyaa.se/ + @website http://www.nyaa.si/ @provide-api no @using-api no @results HTML @@ -12,50 +12,25 @@ from lxml import html from searx.engines.xpath import extract_text from searx.url_utils import urlencode +from searx.utils import get_torrent_size, int_or_zero # engine dependent config categories = ['files', 'images', 'videos', 'music'] paging = True # search-url -base_url = 'http://www.nyaa.se/' +base_url = 'http://www.nyaa.si/' search_url = base_url + '?page=search&{query}&offset={offset}' # xpath queries -xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' -xpath_category = './/td[@class="tlisticon"]/a' -xpath_title = './/td[@class="tlistname"]/a' -xpath_torrent_file = './/td[@class="tlistdownload"]/a' -xpath_filesize = './/td[@class="tlistsize"]/text()' -xpath_seeds = './/td[@class="tlistsn"]/text()' -xpath_leeches = './/td[@class="tlistln"]/text()' -xpath_downloads = './/td[@class="tlistdn"]/text()' - - -# convert a variable to integer or return 0 if it's not a number -def int_or_zero(num): - if isinstance(num, list): - if len(num) < 1: - return 0 - num = num[0] - if num.isdigit(): - return int(num) - return 0 - - -# get multiplier to convert torrent size to bytes -def get_filesize_mul(suffix): - return { - 'KB': 1024, - 'MB': 1024 ** 2, - 'GB': 1024 ** 3, - 'TB': 1024 ** 4, - - 'KIB': 1024, - 'MIB': 1024 ** 2, - 'GIB': 1024 ** 3, - 'TIB': 1024 ** 4 - }[str(suffix).upper()] +xpath_results = '//table[contains(@class, "torrent-list")]//tr[not(th)]' +xpath_category = './/td[1]/a[1]' +xpath_title = './/td[2]/a[last()]' +xpath_torrent_links = './/td[3]/a' +xpath_filesize = './/td[4]/text()' +xpath_seeds = './/td[6]/text()' +xpath_leeches = './/td[7]/text()' +xpath_downloads = './/td[8]/text()' # do search-request @@ -72,25 +47,32 @@ def response(resp): dom = html.fromstring(resp.text) for result in dom.xpath(xpath_results): + # defaults + filesize = 0 + magnet_link = "" + torrent_link = "" + # category in which our torrent belongs - category = result.xpath(xpath_category)[0].attrib.get('title') + try: + category = result.xpath(xpath_category)[0].attrib.get('title') + except: + pass # torrent title page_a = result.xpath(xpath_title)[0] title = extract_text(page_a) # link to the page - href = page_a.attrib.get('href') + href = base_url + page_a.attrib.get('href') - # link to the torrent file - torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') - - # torrent size - try: - file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') - file_size = int(float(file_size) * get_filesize_mul(suffix)) - except: - file_size = None + for link in result.xpath(xpath_torrent_links): + url = link.attrib.get('href') + if 'magnet' in url: + # link to the magnet + magnet_link = url + else: + # link to the torrent file + torrent_link = url # seed count seed = int_or_zero(result.xpath(xpath_seeds)) @@ -101,6 +83,14 @@ def response(resp): # torrent downloads count downloads = int_or_zero(result.xpath(xpath_downloads)) + # let's try to calculate the torrent size + try: + filesize_info = result.xpath(xpath_filesize)[0] + filesize, filesize_multiplier = filesize_info.split() + filesize = get_torrent_size(filesize, filesize_multiplier) + except: + pass + # content string contains all information not included into template content = 'Category: "{category}". Downloaded {downloads} times.' content = content.format(category=category, downloads=downloads) @@ -110,8 +100,9 @@ def response(resp): 'content': content, 'seed': seed, 'leech': leech, - 'filesize': file_size, + 'filesize': filesize, 'torrentfile': torrent_link, + 'magnetlink': magnet_link, 'template': 'torrent.html'}) return results diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py @@ -14,8 +14,8 @@ import re from lxml import html from searx.engines.xpath import extract_text from datetime import datetime -from searx.engines.nyaa import int_or_zero, get_filesize_mul from searx.url_utils import urlencode +from searx.utils import get_torrent_size, int_or_zero # engine dependent config categories = ['files', 'videos', 'music'] @@ -76,8 +76,7 @@ def response(resp): try: # ('1.228', 'GB') groups = size_re.match(item).groups() - multiplier = get_filesize_mul(groups[1]) - params['filesize'] = int(multiplier * float(groups[0])) + params['filesize'] = get_torrent_size(groups[0], groups[1]) except: pass elif item.startswith('Date:'): diff --git a/searx/utils.py b/searx/utils.py @@ -290,6 +290,15 @@ def convert_str_to_int(number_str): return 0 +# convert a variable to integer or return 0 if it's not a number +def int_or_zero(num): + if isinstance(num, list): + if len(num) < 1: + return 0 + num = num[0] + return convert_str_to_int(num) + + def is_valid_lang(lang): is_abbr = (len(lang) == 2) if is_abbr: diff --git a/tests/unit/engines/test_nyaa.py b/tests/unit/engines/test_nyaa.py @@ -13,38 +13,92 @@ class TestNyaaEngine(SearxTestCase): params = nyaa.request(query, dic) self.assertTrue('url' in params) self.assertTrue(query in params['url']) - self.assertTrue('nyaa.se' in params['url']) + self.assertTrue('nyaa.si' in params['url']) def test_response(self): resp = mock.Mock(text='<html></html>') self.assertEqual(nyaa.response(resp), []) html = """ - <table class="tlist"> - <tbody> - <tr class="trusted tlistrow"> - <td class="tlisticon"> - <a href="//www.nyaa.se" title="English-translated Anime"> - <img src="//files.nyaa.se" alt="English-translated Anime"> - </a> - </td> - <td class="tlistname"> - <a href="//www.nyaa.se/?page3"> - Sample torrent title - </a> - </td> - <td class="tlistdownload"> - <a href="//www.nyaa.se/?page_dl" title="Download"> - <img src="//files.nyaa.se/www-dl.png" alt="DL"> - </a> - </td> - <td class="tlistsize">10 MiB</td> - <td class="tlistsn">1</td> - <td class="tlistln">3</td> - <td class="tlistdn">666</td> - <td class="tlistmn">0</td> - </tr> - </tbody> + <table class="table table-bordered table-hover table-striped torrent-list"> + <thead> + <tr> + <th class="hdr-category text-center" style="width:80px;"> + <div>Category</div> + </th> + <th class="hdr-name" style="width:auto;"> + <div>Name</div> + </th> + <th class="hdr-comments sorting text-center" title="Comments" style="width:50px;"> + <a href="/?f=0&amp;c=0_0&amp;q=Death+Parade&amp;s=comments&amp;o=desc"></a> + <i class="fa fa-comments-o"></i> + </th> + <th class="hdr-link text-center" style="width:70px;"> + <div>Link</div> + </th> + <th class="hdr-size sorting text-center" style="width:100px;"> + <a href="/?f=0&amp;c=0_0&amp;q=Death+Parade&amp;s=size&amp;o=desc"></a> + <div>Size</div> + </th> + <th class="hdr-date sorting_desc text-center" title="In local time" style="width:140px;"> + <a href="/?f=0&amp;c=0_0&amp;q=Death+Parade&amp;s=id&amp;o=asc"></a> + <div>Date</div> + </th> + <th class="hdr-seeders sorting text-center" title="Seeders" style="width:50px;"> + <a href="/?f=0&amp;c=0_0&amp;q=Death+Parade&amp;s=seeders&amp;o=desc"></a> + <i class="fa fa-arrow-up" aria-hidden="true"></i> + </th> + <th class="hdr-leechers sorting text-center" title="Leechers" style="width:50px;"> + <a href="/?f=0&amp;c=0_0&amp;q=Death+Parade&amp;s=leechers&amp;o=desc"></a> + <i class="fa fa-arrow-down" aria-hidden="true"></i> + </th> + <th class="hdr-downloads sorting text-center" title="Completed downloads" style="width:50px;"> + <a href="/?f=0&amp;c=0_0&amp;q=Death+Parade&amp;s=downloads&amp;o=desc"></a> + <i class="fa fa-check" aria-hidden="true"></i> + </th> + </tr> + </thead> + <tbody> + <tr class="default"> + <td style="padding:0 4px;"> + <a href="/?c=1_2" title="Anime - English-translated"> + <img src="/static/img/icons/nyaa/1_2.png" alt="Anime - English-translated"> + </a> + </td> + <td colspan="2"> + <a href="/view/1" title="Sample title 1">Sample title 1</a> + </td> + <td class="text-center" style="white-space: nowrap;"> + <a href="/download/1.torrent"><i class="fa fa-fw fa-download"></i></a> + <a href="magnet:?xt=urn:btih:2"><i class="fa fa-fw fa-magnet"></i></a> + </td> + <td class="text-center">723.7 MiB</td> + <td class="text-center" data-timestamp="1503307456" title="1 week 3 + days 9 hours 44 minutes 39 seconds ago">2017-08-21 11:24</td> + <td class="text-center" style="color: green;">1</td> + <td class="text-center" style="color: red;">3</td> + <td class="text-center">12</td> + </tr> + <tr class="default"> + <td style="padding:0 4px;"> + <a href="/?c=1_2" title="Anime - English-translated"> + <img src="/static/img/icons/nyaa/1_2.png" alt="Anime - English-translated"> + </a> + </td> + <td colspan="2"> + <a href="/view/2" title="Sample title 2">Sample title 2</a> + </td> + <td class="text-center" style="white-space: nowrap;"> + <a href="magnet:?xt=urn:btih:2"><i class="fa fa-fw fa-magnet"></i></a> + </td> + <td class="text-center">8.2 GiB</td> + <td class="text-center" data-timestamp="1491608400" title="4 months 3 + weeks 4 days 19 hours 28 minutes 55 seconds ago">2017-04-08 01:40</td> + <td class="text-center" style="color: green;">10</td> + <td class="text-center" style="color: red;">1</td> + <td class="text-center">206</td> + </tr> + </tbody> </table> """ @@ -52,15 +106,19 @@ class TestNyaaEngine(SearxTestCase): results = nyaa.response(resp) self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) + self.assertEqual(len(results), 2) r = results[0] - self.assertTrue(r['url'].find('www.nyaa.se/?page3') >= 0) - self.assertTrue(r['torrentfile'].find('www.nyaa.se/?page_dl') >= 0) - self.assertTrue(r['content'].find('English-translated Anime') >= 0) - self.assertTrue(r['content'].find('Downloaded 666 times.') >= 0) + self.assertTrue(r['url'].find('1') >= 0) + self.assertTrue(r['torrentfile'].find('1.torrent') >= 0) + self.assertTrue(r['content'].find('Anime - English-translated') >= 0) + self.assertTrue(r['content'].find('Downloaded 12 times.') >= 0) - self.assertEqual(r['title'], 'Sample torrent title') + self.assertEqual(r['title'], 'Sample title 1') self.assertEqual(r['seed'], 1) self.assertEqual(r['leech'], 3) - self.assertEqual(r['filesize'], 10 * 1024 * 1024) + self.assertEqual(r['filesize'], 723700000) + + r = results[1] + self.assertTrue(r['url'].find('2') >= 0) + self.assertTrue(r['magnetlink'].find('magnet:') >= 0)