commit: 13bed1f8727683c74d04c52396fabbfab99df76f
parent: acfe843ecd038ee3518f2afcee68bfedf4366366
Author: Adam Tauber <asciimoo@gmail.com>
Date: Tue, 16 Aug 2016 10:37:17 +0200
Merge pull request #639 from kvch/digbt-engine
add digbt engine - fixes #638
Diffstat:
5 files changed, 144 insertions(+), 15 deletions(-)
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py
@@ -16,6 +16,7 @@ from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
+from searx.utils import get_torrent_size
# engine dependent config
categories = ['videos', 'music', 'files']
@@ -68,20 +69,7 @@ def response(resp):
leech = 0
# convert filesize to byte if possible
- try:
- filesize = float(filesize)
-
- # convert filesize to byte
- if filesize_multiplier == 'TB':
- filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
- elif filesize_multiplier == 'GB':
- filesize = int(filesize * 1024 * 1024 * 1024)
- elif filesize_multiplier == 'MB':
- filesize = int(filesize * 1024 * 1024)
- elif filesize_multiplier == 'KB':
- filesize = int(filesize * 1024)
- except:
- filesize = None
+ filesize = get_torrent_size(filesize, filesize_multiplier)
# convert files to int if possible
if files.isdigit():
diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py
@@ -0,0 +1,58 @@
+"""
+ DigBT (Videos, Music, Files)
+
+ @website https://digbt.org
+ @provide-api no
+
+ @using-api no
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content, magnetlink
+"""
+
+from urlparse import urljoin
+from lxml import html
+from searx.engines.xpath import extract_text
+from searx.utils import get_torrent_size
+
+categories = ['videos', 'music', 'files']
+paging = True
+
+URL = 'https://digbt.org'
+SEARCH_URL = URL + '/search/{query}-time-{pageno}'
+FILESIZE = 3
+FILESIZE_MULTIPLIER = 4
+
+
+def request(query, params):
+ params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
+
+ return params
+
+
+def response(resp):
+ dom = html.fromstring(resp.content)
+ search_res = dom.xpath('.//td[@class="x-item"]')
+
+ if not search_res:
+ return list()
+
+ results = list()
+ for result in search_res:
+ url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
+ title = result.xpath('.//a[@title]/text()')[0]
+ content = extract_text(result.xpath('.//div[@class="files"]'))
+ files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
+ filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
+ magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
+
+ results.append({'url': url,
+ 'title': title,
+ 'content': content,
+ 'filesize': filesize,
+ 'magnetlink': magnetlink,
+ 'seed': 'N/A',
+ 'leech': 'N/A',
+ 'template': 'torrent.html'})
+
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -87,7 +87,7 @@ engines:
- name : btdigg
engine : btdigg
shortcut : bt
-
+
- name : crossref
engine : json_engine
paging : True
@@ -118,6 +118,12 @@ engines:
weight : 2
disabled : True
+ - name : digbt
+ engine : digbt
+ shortcut : dbt
+ timeout : 6.0
+ disabled : True
+
- name : digg
engine : digg
shortcut : dg
diff --git a/searx/utils.py b/searx/utils.py
@@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
return a_list[index]
else:
return default
+
+
+def get_torrent_size(filesize, filesize_multiplier):
+ try:
+ filesize = float(filesize)
+
+ if filesize_multiplier == 'TB':
+ filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
+ elif filesize_multiplier == 'GB':
+ filesize = int(filesize * 1024 * 1024 * 1024)
+ elif filesize_multiplier == 'MB':
+ filesize = int(filesize * 1024 * 1024)
+ elif filesize_multiplier == 'KB':
+ filesize = int(filesize * 1024)
+ except:
+ filesize = None
+
+ return filesize
diff --git a/tests/unit/engines/test_digbt.py b/tests/unit/engines/test_digbt.py
@@ -0,0 +1,59 @@
+from collections import defaultdict
+import mock
+from searx.engines import digbt
+from searx.testing import SearxTestCase
+
+
+class TestDigBTEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 0
+ params = digbt.request(query, dicto)
+ self.assertIn('url', params)
+ self.assertIn(query, params['url'])
+ self.assertIn('digbt.org', params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, digbt.response, None)
+ self.assertRaises(AttributeError, digbt.response, [])
+ self.assertRaises(AttributeError, digbt.response, '')
+ self.assertRaises(AttributeError, digbt.response, '[]')
+
+ response = mock.Mock(content='<html></html>')
+ self.assertEqual(digbt.response(response), [])
+
+ html = """
+ <table class="table">
+ <tr><td class="x-item">
+ <div>
+ <a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a>
+ <span class="ctime"><span style="color:red;">4 hours ago</span></span>
+ </div>
+ <div class="files">
+ <ul>
+ <li>The Big Bang Theory 2.9 GB</li>
+ <li>....</li>
+ </ul>
+ </div>
+ <div class="tail">
+ Files: 1 Size: 2.9 GB Downloads: 1 Updated: <span style="color:red;">4 hours ago</span>
+
+ <a class="title" href="magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory">
+ <span class="glyphicon glyphicon-magnet"></span> magnet-link
+ </a>
+
+ </div>
+ </td></tr>
+ </table>
+ """
+ response = mock.Mock(content=html)
+ results = digbt.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'The Big Bang Theory')
+ self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html')
+ self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....')
+ self.assertEqual(results[0]['filesize'], 3113851289)
+ self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory')