commit: efe6dead5566d4800587491e5252474a33ddff60
parent 0e6f8393ab8b29b2e85d1fafdc7442455767f753
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Mon, 2 Feb 2015 17:55:39 +0100
Duckduckgo unit test
Diffstat:
3 files changed, 96 insertions(+), 5 deletions(-)
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
@@ -15,7 +15,7 @@
from urllib import urlencode
from lxml.html import fromstring
-from searx.utils import html_to_text
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['general']
@@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}'
# specific xpath variables
result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa
url_xpath = './/a[@class="large"]/@href'
-title_xpath = './/a[@class="large"]//text()'
-content_xpath = './/div[@class="snippet"]//text()'
+title_xpath = './/a[@class="large"]'
+content_xpath = './/div[@class="snippet"]'
# do search-request
@@ -64,8 +64,8 @@ def response(resp):
if not res_url:
continue
- title = html_to_text(''.join(r.xpath(title_xpath)))
- content = html_to_text(''.join(r.xpath(content_xpath)))
+ title = extract_text(r.xpath(title_xpath))
+ content = extract_text(r.xpath(content_xpath))
# append result
results.append({'title': title,
diff --git a/searx/tests/engines/test_duckduckgo.py b/searx/tests/engines/test_duckduckgo.py
@@ -0,0 +1,90 @@
+from collections import defaultdict
+import mock
+from searx.engines import duckduckgo
+from searx.testing import SearxTestCase
+
+
+class TestBingEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 1
+ dicto['language'] = 'fr_FR'
+ params = duckduckgo.request(query, dicto)
+ self.assertIn('url', params)
+ self.assertIn(query, params['url'])
+ self.assertIn('duckduckgo.com', params['url'])
+ self.assertIn('fr-fr', params['url'])
+
+ dicto['language'] = 'all'
+ params = duckduckgo.request(query, dicto)
+ self.assertIn('en-us', params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, duckduckgo.response, None)
+ self.assertRaises(AttributeError, duckduckgo.response, [])
+ self.assertRaises(AttributeError, duckduckgo.response, '')
+ self.assertRaises(AttributeError, duckduckgo.response, '[]')
+
+ response = mock.Mock(text='<html></html>')
+ self.assertEqual(duckduckgo.response(response), [])
+
+ html = """
+ <div class="results_links results_links_deep web-result">
+ <div class="icon_fav" style="display: block;">
+ <a rel="nofollow" href="https://www.test.com/">
+ <img width="16" height="16" alt=""
+ src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
+ </a>
+ </div>
+ <div class="links_main links_deep"> <!-- This is the visible part -->
+ <a rel="nofollow" class="large" href="http://this.should.be.the.link/">
+ This <b>is</b> <b>the</b> title
+ </a>
+ <div class="snippet"><b>This</b> should be the content.</div>
+ <div class="url">
+ http://this.should.be.the.link/
+ </div>
+ </div>
+ </div>
+ """
+ response = mock.Mock(text=html)
+ results = duckduckgo.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the title')
+ self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
+ self.assertEqual(results[0]['content'], 'This should be the content.')
+
+ html = """
+ <div class="results_links results_links_deep web-result">
+ <div class="icon_fav" style="display: block;">
+ </div>
+ <div class="links_main links_deep"> <!-- This is the visible part -->
+ <div class="snippet"><b>This</b> should be the content.</div>
+ <div class="url">
+ http://this.should.be.the.link/
+ </div>
+ </div>
+ </div>
+ <div class="results_links results_links_deep web-result">
+ <div class="icon_fav" style="display: block;">
+ <img width="16" height="16" alt=""
+ src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
+ </div>
+ <div class="links_main links_deep"> <!-- This is the visible part -->
+ <a rel="nofollow" class="large" href="">
+ This <b>is</b> <b>the</b> title
+ </a>
+ <div class="snippet"><b>This</b> should be the content.</div>
+ <div class="url">
+ http://this.should.be.the.link/
+ </div>
+ </div>
+ </div>
+ """
+ response = mock.Mock(text=html)
+ results = duckduckgo.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
@@ -6,6 +6,7 @@ from searx.tests.engines.test_dailymotion import * # noqa
from searx.tests.engines.test_deezer import * # noqa
from searx.tests.engines.test_deviantart import * # noqa
from searx.tests.engines.test_digg import * # noqa
+from searx.tests.engines.test_duckduckgo import * # noqa
from searx.tests.engines.test_dummy import * # noqa
from searx.tests.engines.test_flickr import * # noqa
from searx.tests.engines.test_flickr_noapi import * # noqa