logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 04f7118d0a0693906ef57fa83f01d29eb366a45e
parent 7c075aa73197030d01b210054488ce99ec861d70
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Sun,  8 Feb 2015 14:12:14 +0100

[enh] add gigablast engine

Diffstat:

Asearx/engines/gigablast.py63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml4++++
Asearx/tests/engines/test_gigablast.py57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/tests/test_engines.py1+
4 files changed, 125 insertions(+), 0 deletions(-)

diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py @@ -0,0 +1,63 @@ +## Gigablast (Web) +# +# @website http://gigablast.com +# @provide-api yes (http://gigablast.com/api.html) +# +# @using-api yes +# @results XML +# @stable yes +# @parse url, title, content + +from urllib import urlencode +from cgi import escape +from lxml import etree + +# engine dependent config +categories = ['general'] +paging = True +number_of_results = 5 + +# search-url +base_url = 'http://gigablast.com/' +search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' + +# specific xpath variables +results_xpath = '//response//result' +url_xpath = './/url' +title_xpath = './/title' +content_xpath = './/sum' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * number_of_results + + search_path = search_string.format( + query=urlencode({'q': query}), + offset=offset, + number_of_results=number_of_results) + + params['url'] = base_url + search_path + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = etree.fromstring(resp.content) + + # parse results + for result in dom.xpath(results_xpath): + url = result.xpath(url_xpath)[0].text + title = result.xpath(title_xpath)[0].text + content = escape(result.xpath(content_xpath)[0].text) + + # append result + results.append({'url': url, + 'title': title, + 'content': content}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -103,6 +103,10 @@ engines: shortcut : gf disabled : True + - name : gigablast + engine : gigablast + shortcut : gb + - name : github engine : github shortcut : gh diff --git a/searx/tests/engines/test_gigablast.py b/searx/tests/engines/test_gigablast.py @@ -0,0 +1,57 @@ +from collections import defaultdict +import mock +from searx.engines import gigablast +from searx.testing import SearxTestCase + + +class TestGigablastEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = gigablast.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('gigablast.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, gigablast.response, None) + self.assertRaises(AttributeError, gigablast.response, []) + self.assertRaises(AttributeError, gigablast.response, '') + self.assertRaises(AttributeError, gigablast.response, '[]') + + response = mock.Mock(content='<response></response>') + self.assertEqual(gigablast.response(response), []) + + response = mock.Mock(content='<response></response>') + self.assertEqual(gigablast.response(response), []) + + xml = """<?xml version="1.0" encoding="UTF-8" ?> + <response> + <hits>5941888</hits> + <moreResultsFollow>1</moreResultsFollow> + <result> + <title><![CDATA[This should be the title]]></title> + <sum><![CDATA[This should be the content.]]></sum> + <url><![CDATA[http://this.should.be.the.link/]]></url> + <size>90.5</size> + <docId>145414002633</docId> + <siteId>2660021087</siteId> + <domainId>2660021087</domainId> + <spidered>1320519373</spidered> + <indexed>1320519373</indexed> + <pubdate>4294967295</pubdate> + <isModDate>0</isModDate> + <language><![CDATA[English]]></language> + <charset><![CDATA[UTF-8]]></charset> + </result> + </response> + """ + response = mock.Mock(content=xml) + results = gigablast.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This should be the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py @@ -9,6 +9,7 @@ from searx.tests.engines.test_digg import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_flickr_noapi import * # noqa +from searx.tests.engines.test_gigablast import * # noqa from searx.tests.engines.test_github import * # noqa from searx.tests.engines.test_www1x import * # noqa from searx.tests.engines.test_google_images import * # noqa