logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: f1c10f4fe45f34c12994b9bbc4aca133202fd7ca
parent 3a4d6045c1da950d13d1d14192247389c5932631
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date:   Fri,  6 Feb 2015 17:31:10 +0100

Startpage's unit test

Diffstat:

Msearx/engines/startpage.py13+++++--------
Asearx/tests/engines/test_startpage.py140+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/tests/test_engines.py1+
3 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py @@ -13,6 +13,7 @@ from lxml import html from cgi import escape import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -45,8 +46,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['data']['with_language'] = ('lang_' + - params['language'].split('_')[0]) + params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) return params @@ -64,18 +64,15 @@ def response(resp): continue link = links[0] url = link.attrib.get('href') - try: - title = escape(link.text_content()) - except UnicodeDecodeError: - continue # block google-ad url's if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): continue + title = escape(extract_text(link)) + if result.xpath('./p[@class="desc"]'): - content = escape(result.xpath('./p[@class="desc"]')[0] - .text_content()) + content = escape(extract_text(result.xpath('./p[@class="desc"]'))) else: content = '' diff --git a/searx/tests/engines/test_startpage.py b/searx/tests/engines/test_startpage.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import startpage +from searx.testing import SearxTestCase + + +class TestStartpageEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = startpage.request(query, dicto) + self.assertIn('url', params) + self.assertIn('startpage.com', params['url']) + self.assertIn('data', params) + self.assertIn('query', params['data']) + self.assertIn(query, params['data']['query']) + self.assertIn('with_language', params['data']) + self.assertIn('lang_fr', params['data']['with_language']) + + dicto['language'] = 'all' + params = startpage.request(query, dicto) + self.assertNotIn('with_language', params['data']) + + def test_response(self): + self.assertRaises(AttributeError, startpage.response, None) + self.assertRaises(AttributeError, startpage.response, []) + self.assertRaises(AttributeError, startpage.response, '') + self.assertRaises(AttributeError, startpage.response, '[]') + + response = mock.Mock(content='<html></html>') + self.assertEqual(startpage.response(response), []) + + html = """ + <div class='result' style=' *width : auto; *margin-right : 10%;'> + <h3> + <a href='http://this.should.be.the.link/' id='title_2' name='title_2' > + This should be the title + </a> + <span id='title_stars_2' name='title_stars_2'> </span> + </h3> + <p class='desc'> + This should be the content. + </p> + <p> + <span class='url'>www.speed<b>test</b>.net/fr/ + </span> + - + <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata=" + class='proxy'> + Navigation avec Ixquick Proxy + </A> + - + <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid= + &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0= + &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'> + Mis en surbrillance + </A> + </p> + </div> + """ + response = mock.Mock(content=html) + results = startpage.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This should be the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') + + html = """ + <div class='result' style=' *width : auto; *margin-right : 10%;'> + <h3> + <a href='http://www.google.com/aclk?sa=l&ai=C' id='title_2' name='title_2' > + This should be the title + </a> + <span id='title_stars_2' name='title_stars_2'> </span> + </h3> + <p class='desc'> + This should be the content. + </p> + <p> + <span class='url'>www.speed<b>test</b>.net/fr/ + </span> + - + <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata=" + class='proxy'> + Navigation avec Ixquick Proxy + </A> + - + <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid= + &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0= + &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'> + Mis en surbrillance + </A> + </p> + </div> + <div class='result' style=' *width : auto; *margin-right : 10%;'> + <h3> + <span id='title_stars_2' name='title_stars_2'> </span> + </h3> + <p class='desc'> + This should be the content. + </p> + <p> + <span class='url'>www.speed<b>test</b>.net/fr/ + </span> + </p> + </div> + <div class='result' style=' *width : auto; *margin-right : 10%;'> + <h3> + <a href='http://this.should.be.the.link/' id='title_2' name='title_2' > + This should be the title + </a> + <span id='title_stars_2' name='title_stars_2'> </span> + </h3> + <p> + <span class='url'>www.speed<b>test</b>.net/fr/ + </span> + - + <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata=" + class='proxy'> + Navigation avec Ixquick Proxy + </A> + - + <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid= + &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0= + &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'> + Mis en surbrillance + </A> + </p> + </div> + """ + response = mock.Mock(content=html) + results = startpage.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['content'], '') diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py @@ -23,6 +23,7 @@ from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa +from searx.tests.engines.test_startpage import * # noqa from searx.tests.engines.test_subtitleseeker import * # noqa from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa