logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 3a1c5876b16a51f64505c119283447b06f2a3d99
parent 104cdb7d03771d4eca5b5126532ccf47642bb9de
Author: Noemi Vanyi <sitbackandwait@gmail.com>
Date:   Sat, 13 Aug 2016 14:55:47 +0200

add digbt engine

Unfortunately, it is quite slow so it is disabled.
Furthermore, the display of number of files is wrong
on digbt.org, so it is not displayed on searx.

Diffstat:

Msearx/engines/btdigg.py16++--------------
Asearx/engines/digbt.py58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml7++++++-
Msearx/utils.py18++++++++++++++++++
4 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py @@ -16,6 +16,7 @@ from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size # engine dependent config categories = ['videos', 'music', 'files'] @@ -68,20 +69,7 @@ def response(resp): leech = 0 # convert filesize to byte if possible - try: - filesize = float(filesize) - - # convert filesize to byte - if filesize_multiplier == 'TB': - filesize = int(filesize * 1024 * 1024 * 1024 * 1024) - elif filesize_multiplier == 'GB': - filesize = int(filesize * 1024 * 1024 * 1024) - elif filesize_multiplier == 'MB': - filesize = int(filesize * 1024 * 1024) - elif filesize_multiplier == 'KB': - filesize = int(filesize * 1024) - except: - filesize = None + filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible if files.isdigit(): diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py @@ -0,0 +1,58 @@ +""" + DigBT (Videos, Music, Files) + + @website https://digbt.org + @provide-api no + + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content, magnetlink +""" + +from urlparse import urljoin +from lxml import html +from searx.engines.xpath import extract_text +from searx.utils import get_torrent_size + +categories = ['videos', 'music', 'files'] +paging = True + +URL = 'https://digbt.org' +SEARCH_URL = URL + '/search/{query}-time-{pageno}' +FILESIZE = 3 +FILESIZE_MULTIPLIER = 4 + + +def request(query, params): + params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno']) + + return params + + +def response(resp): + dom = html.fromstring(resp.content) + search_res = dom.xpath('.//td[@class="x-item"]') + + if not search_res: + return list() + + results = list() + for result in search_res: + url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) + title = result.xpath('.//a[@title]/text()')[0] + content = extract_text(result.xpath('.//div[@class="files"]')) + files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() + filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) + magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] + + results.append({'url': url, + 'title': title, + 'content': content, + 'filesize': filesize, + 'magnetlink': magnetlink, + 'seed': 'N/A', + 'leech': 'N/A', + 'template': 'torrent.html'}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -87,7 +87,7 @@ engines: - name : btdigg engine : btdigg shortcut : bt - + - name : crossref engine : json_engine paging : True @@ -118,6 +118,11 @@ engines: weight : 2 disabled : True + - name : digbt + engine : digbt + shortcut : dbt + timeout : 6.0 + - name : digg engine : digg shortcut : dg diff --git a/searx/utils.py b/searx/utils.py @@ -237,3 +237,21 @@ def list_get(a_list, index, default=None): return a_list[index] else: return default + + +def get_torrent_size(filesize, filesize_multiplier): + try: + filesize = float(filesize) + + if filesize_multiplier == 'TB': + filesize = int(filesize * 1024 * 1024 * 1024 * 1024) + elif filesize_multiplier == 'GB': + filesize = int(filesize * 1024 * 1024 * 1024) + elif filesize_multiplier == 'MB': + filesize = int(filesize * 1024 * 1024) + elif filesize_multiplier == 'KB': + filesize = int(filesize * 1024) + except: + filesize = None + + return filesize