logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 9ad8013a4578dc8069227d3e9de88d734089828e
parent: 8520be3cd6bb2ae4aa4ee483f632ef7fca318e1a
Author: asciimoo <asciimoo@gmail.com>
Date:   Tue, 22 Oct 2013 18:58:01 +0200

[enh] piratebay engine added

Diffstat:

Asearx/engines/piratebay.py34++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+), 0 deletions(-)

diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py @@ -0,0 +1,34 @@ +from lxml import html +from urlparse import urljoin +from cgi import escape +from urllib import quote + +categories = ['videos', 'music'] + +base_url = 'https://thepiratebay.sx/' +search_url = base_url + 'search/{search_term}/0/99/{search_type}' +search_types = {'videos': '200' + ,'music' : '100' + } + +def request(query, params): + global search_url, search_types + # 200 is the video category + params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category'])) + return params + + +def response(resp): + global base_url + results = [] + dom = html.fromstring(resp.text) + search_res = dom.xpath('//table[@id="searchResult"]//tr') + if not search_res: + return results + for result in search_res[1:]: + link = result.xpath('.//div[@class="detName"]//a')[0] + url = urljoin(base_url, link.attrib.get('href')) + title = ' '.join(link.xpath('.//text()')) + content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()'))) + results.append({'url': url, 'title': title, 'content': content}) + return results