commit: f1c10f4fe45f34c12994b9bbc4aca133202fd7ca
parent 3a4d6045c1da950d13d1d14192247389c5932631
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Fri, 6 Feb 2015 17:31:10 +0100
Startpage's unit test
Diffstat:
3 files changed, 146 insertions(+), 8 deletions(-)
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
@@ -13,6 +13,7 @@
from lxml import html
from cgi import escape
import re
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['general']
@@ -45,8 +46,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
- params['data']['with_language'] = ('lang_' +
- params['language'].split('_')[0])
+ params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
return params
@@ -64,18 +64,15 @@ def response(resp):
continue
link = links[0]
url = link.attrib.get('href')
- try:
- title = escape(link.text_content())
- except UnicodeDecodeError:
- continue
# block google-ad url's
if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
continue
+ title = escape(extract_text(link))
+
if result.xpath('./p[@class="desc"]'):
- content = escape(result.xpath('./p[@class="desc"]')[0]
- .text_content())
+ content = escape(extract_text(result.xpath('./p[@class="desc"]')))
else:
content = ''
diff --git a/searx/tests/engines/test_startpage.py b/searx/tests/engines/test_startpage.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import startpage
+from searx.testing import SearxTestCase
+
+
+class TestStartpageEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 1
+ dicto['language'] = 'fr_FR'
+ params = startpage.request(query, dicto)
+ self.assertIn('url', params)
+ self.assertIn('startpage.com', params['url'])
+ self.assertIn('data', params)
+ self.assertIn('query', params['data'])
+ self.assertIn(query, params['data']['query'])
+ self.assertIn('with_language', params['data'])
+ self.assertIn('lang_fr', params['data']['with_language'])
+
+ dicto['language'] = 'all'
+ params = startpage.request(query, dicto)
+ self.assertNotIn('with_language', params['data'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, startpage.response, None)
+ self.assertRaises(AttributeError, startpage.response, [])
+ self.assertRaises(AttributeError, startpage.response, '')
+ self.assertRaises(AttributeError, startpage.response, '[]')
+
+ response = mock.Mock(content='<html></html>')
+ self.assertEqual(startpage.response(response), [])
+
+ html = """
+ <div class='result' style=' *width : auto; *margin-right : 10%;'>
+ <h3>
+ <a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
+ This should be the title
+ </a>
+ <span id='title_stars_2' name='title_stars_2'> </span>
+ </h3>
+ <p class='desc'>
+ This should be the content.
+ </p>
+ <p>
+ <span class='url'>www.speed<b>test</b>.net/fr/
+ </span>
+ -
+ <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
+ class='proxy'>
+ Navigation avec Ixquick Proxy
+ </A>
+ -
+ <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
+ &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
+ &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
+ Mis en surbrillance
+ </A>
+ </p>
+ </div>
+ """
+ response = mock.Mock(content=html)
+ results = startpage.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This should be the title')
+ self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
+ self.assertEqual(results[0]['content'], 'This should be the content.')
+
+ html = """
+ <div class='result' style=' *width : auto; *margin-right : 10%;'>
+ <h3>
+ <a href='http://www.google.com/aclk?sa=l&ai=C' id='title_2' name='title_2' >
+ This should be the title
+ </a>
+ <span id='title_stars_2' name='title_stars_2'> </span>
+ </h3>
+ <p class='desc'>
+ This should be the content.
+ </p>
+ <p>
+ <span class='url'>www.speed<b>test</b>.net/fr/
+ </span>
+ -
+ <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
+ class='proxy'>
+ Navigation avec Ixquick Proxy
+ </A>
+ -
+ <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
+ &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
+ &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
+ Mis en surbrillance
+ </A>
+ </p>
+ </div>
+ <div class='result' style=' *width : auto; *margin-right : 10%;'>
+ <h3>
+ <span id='title_stars_2' name='title_stars_2'> </span>
+ </h3>
+ <p class='desc'>
+ This should be the content.
+ </p>
+ <p>
+ <span class='url'>www.speed<b>test</b>.net/fr/
+ </span>
+ </p>
+ </div>
+ <div class='result' style=' *width : auto; *margin-right : 10%;'>
+ <h3>
+ <a href='http://this.should.be.the.link/' id='title_2' name='title_2' >
+ This should be the title
+ </a>
+ <span id='title_stars_2' name='title_stars_2'> </span>
+ </h3>
+ <p>
+ <span class='url'>www.speed<b>test</b>.net/fr/
+ </span>
+ -
+ <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata="
+ class='proxy'>
+ Navigation avec Ixquick Proxy
+ </A>
+ -
+ <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid=
+ &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0=
+ &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'>
+ Mis en surbrillance
+ </A>
+ </p>
+ </div>
+ """
+ response = mock.Mock(content=html)
+ results = startpage.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['content'], '')
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
@@ -23,6 +23,7 @@ from searx.tests.engines.test_searchcode_code import * # noqa
from searx.tests.engines.test_searchcode_doc import * # noqa
from searx.tests.engines.test_soundcloud import * # noqa
from searx.tests.engines.test_stackoverflow import * # noqa
+from searx.tests.engines.test_startpage import * # noqa
from searx.tests.engines.test_subtitleseeker import * # noqa
from searx.tests.engines.test_twitter import * # noqa
from searx.tests.engines.test_vimeo import * # noqa