logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: c5d83059d537d8efb296ffbe743828a884ac4e10
parent 80b9312e42087351bb081ceab717e479e75a1ab0
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Tue,  2 Sep 2014 17:28:35 +0200

update generalfile engine and add comments

Diffstat:

Msearx/engines/generalfile.py31++++++++++++++++++++++++++++---
Msearx/settings.yml1-
2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/searx/engines/generalfile.py b/searx/engines/generalfile.py @@ -1,35 +1,60 @@ +## General Files (Files) +# +# @website http://www.general-files.org +# @provide-api no (nothing found) +# +# @using-api no (because nothing found) +# @results HTML (using search portal) +# @stable no (HTML can change) +# @parse url, title, content +# +# @todo detect torrents? + from lxml import html +# engine dependent config +categories = ['files'] +paging = True +# search-url base_url = 'http://www.general-file.com' search_url = base_url + '/files-{letter}/{query}/{pageno}' +# specific xpath variables result_xpath = '//table[@class="block-file"]' title_xpath = './/h2/a//text()' url_xpath = './/h2/a/@href' content_xpath = './/p//text()' -paging = True - +# do search-request def request(query, params): + params['url'] = search_url.format(query=query, letter=query[0], pageno=params['pageno']) + return params +# get response from search-request def response(resp): - results = [] + dom = html.fromstring(resp.text) + + # parse results for result in dom.xpath(result_xpath): url = result.xpath(url_xpath)[0] + # skip fast download links if not url.startswith('/'): continue + + # append result results.append({'url': base_url + url, 'title': ''.join(result.xpath(title_xpath)), 'content': ''.join(result.xpath(content_xpath))}) + # return results return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -62,7 +62,6 @@ engines: - name : general-file engine : generalfile - categories : files shortcut : gf - name : github