logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 13bed1f8727683c74d04c52396fabbfab99df76f
parent: acfe843ecd038ee3518f2afcee68bfedf4366366
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Tue, 16 Aug 2016 10:37:17 +0200

Merge pull request #639 from kvch/digbt-engine

add digbt engine - fixes #638

Diffstat:

Msearx/engines/btdigg.py16++--------------
Asearx/engines/digbt.py58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml8+++++++-
Msearx/utils.py18++++++++++++++++++
Atests/unit/engines/test_digbt.py59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 144 insertions(+), 15 deletions(-)

diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py @@ -16,6 +16,7 @@ from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size # engine dependent config categories = ['videos', 'music', 'files'] @@ -68,20 +69,7 @@ def response(resp): leech = 0 # convert filesize to byte if possible - try: - filesize = float(filesize) - - # convert filesize to byte - if filesize_multiplier == 'TB': - filesize = int(filesize * 1024 * 1024 * 1024 * 1024) - elif filesize_multiplier == 'GB': - filesize = int(filesize * 1024 * 1024 * 1024) - elif filesize_multiplier == 'MB': - filesize = int(filesize * 1024 * 1024) - elif filesize_multiplier == 'KB': - filesize = int(filesize * 1024) - except: - filesize = None + filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible if files.isdigit(): diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py @@ -0,0 +1,58 @@ +""" + DigBT (Videos, Music, Files) + + @website https://digbt.org + @provide-api no + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, magnetlink +""" + +from urlparse import urljoin +from lxml import html +from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size + +categories = ['videos', 'music', 'files'] +paging = True + +URL = 'https://digbt.org' +SEARCH_URL = URL + '/search/{query}-time-{pageno}' +FILESIZE = 3 +FILESIZE_MULTIPLIER = 4 + + +def request(query, params): + params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno']) + + return params + + +def response(resp): + dom = html.fromstring(resp.content) + search_res = dom.xpath('.//td[@class="x-item"]') + + if not search_res: + return list() + + results = list() + for result in search_res: + url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) + title = result.xpath('.//a[@title]/text()')[0] + content = extract_text(result.xpath('.//div[@class="files"]')) + files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() + filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) + magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] + + results.append({'url': url, + 'title': title, + 'content': content, + 'filesize': filesize, + 'magnetlink': magnetlink, + 'seed': 'N/A', + 'leech': 'N/A', + 'template': 'torrent.html'}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -87,7 +87,7 @@ engines: - name : btdigg engine : btdigg shortcut : bt - + - name : crossref engine : json_engine paging : True @@ -118,6 +118,12 @@ engines: weight : 2 disabled : True + - name : digbt + engine : digbt + shortcut : dbt + timeout : 6.0 + disabled : True + - name : digg engine : digg shortcut : dg diff --git a/searx/utils.py b/searx/utils.py @@ -237,3 +237,21 @@ def list_get(a_list, index, default=None): return a_list[index] else: return default + + +def get_torrent_size(filesize, filesize_multiplier): + try: + filesize = float(filesize) + + if filesize_multiplier == 'TB': + filesize = int(filesize * 1024 * 1024 * 1024 * 1024) + elif filesize_multiplier == 'GB': + filesize = int(filesize * 1024 * 1024 * 1024) + elif filesize_multiplier == 'MB': + filesize = int(filesize * 1024 * 1024) + elif filesize_multiplier == 'KB': + filesize = int(filesize * 1024) + except: + filesize = None + + return filesize diff --git a/tests/unit/engines/test_digbt.py b/tests/unit/engines/test_digbt.py @@ -0,0 +1,59 @@ +from collections import defaultdict +import mock +from searx.engines import digbt +from searx.testing import SearxTestCase + + +class TestDigBTEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = digbt.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('digbt.org', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, digbt.response, None) + self.assertRaises(AttributeError, digbt.response, []) + self.assertRaises(AttributeError, digbt.response, '') + self.assertRaises(AttributeError, digbt.response, '[]') + + response = mock.Mock(content='<html></html>') + self.assertEqual(digbt.response(response), []) + + html = """ + <table class="table"> + <tr><td class="x-item"> + <div> + <a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a> + <span class="ctime"><span style="color:red;">4 hours ago</span></span> + </div> + <div class="files"> + <ul> + <li>The Big Bang Theory 2.9 GB</li> + <li>....</li> + </ul> + </div> + <div class="tail"> + Files: 1 Size: 2.9 GB Downloads: 1 Updated: <span style="color:red;">4 hours ago</span> + &nbsp; &nbsp; + <a class="title" href="magnet:?xt=urn:btih:a&amp;dn=The+Big+Bang+Theory"> + <span class="glyphicon glyphicon-magnet"></span> magnet-link + </a> + &nbsp; &nbsp; + </div> + </td></tr> + </table> + """ + response = mock.Mock(content=html) + results = digbt.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'The Big Bang Theory') + self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html') + self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....') + self.assertEqual(results[0]['filesize'], 3113851289) + self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory')