commit: 751c9a346ebd193800a1228ffff1b856942099ab
parent dbf4f83231b89a2326ba2b62bceacaf5db76d4ca
Author: a01200356 <a01200356@itesm.mx>
Date: Mon, 21 Mar 2016 20:19:13 -0600
[fix] duckduckgo's xpaths changed
test_duckduckgo modified to reflect changes in duckduckgo's html
Diffstat:
2 files changed, 26 insertions(+), 46 deletions(-)
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
@@ -28,10 +28,10 @@ language_support = True
url = 'https://duckduckgo.com/html?{query}&s={offset}'
# specific xpath variables
-result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa
-url_xpath = './/a[@class="large"]/@href'
-title_xpath = './/a[@class="large"]'
-content_xpath = './/div[@class="snippet"]'
+result_xpath = '//div[@class="result results_links results_links_deep web-result "]' # noqa
+url_xpath = './/a[@class="result__a"]/@href'
+title_xpath = './/a[@class="result__a"]'
+content_xpath = './/a[@class="result__snippet"]'
# do search-request
diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py
@@ -32,55 +32,32 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertEqual(duckduckgo.response(response), [])
html = u"""
- <div class="results_links results_links_deep web-result">
- <div class="icon_fav" style="display: block;">
- <a rel="nofollow" href="https://www.test.com/">
- <img width="16" height="16" alt=""
- src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
- </a>
- </div>
- <div class="links_main links_deep"> <!-- This is the visible part -->
- <a rel="nofollow" class="large" href="http://this.should.be.the.link/ű">
- This <b>is</b> <b>the</b> title
- </a>
- <div class="snippet"><b>This</b> should be the content.</div>
- <div class="url">
- http://this.should.be.the.link/
+ <div class="result results_links results_links_deep web-result result--no-result">
+ <div class="links_main links_deep result__body">
+ <h2 class="result__title">
+ </h2>
+ <div class="no-results">No results</div>
+ <div class="result__extras">
</div>
</div>
</div>
"""
response = mock.Mock(text=html)
results = duckduckgo.response(response)
- self.assertEqual(type(results), list)
- self.assertEqual(len(results), 1)
- self.assertEqual(results[0]['title'], 'This is the title')
- self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
- self.assertEqual(results[0]['content'], 'This should be the content.')
+ self.assertEqual(duckduckgo.response(response), [])
- html = """
- <div class="results_links results_links_deep web-result">
- <div class="icon_fav" style="display: block;">
- </div>
- <div class="links_main links_deep"> <!-- This is the visible part -->
- <div class="snippet"><b>This</b> should be the content.</div>
- <div class="url">
- http://this.should.be.the.link/
- </div>
- </div>
- </div>
- <div class="results_links results_links_deep web-result">
- <div class="icon_fav" style="display: block;">
- <img width="16" height="16" alt=""
- src="/i/www.test.com.ico" style="visibility: visible;" name="i15" />
- </div>
- <div class="links_main links_deep"> <!-- This is the visible part -->
- <a rel="nofollow" class="large" href="">
- This <b>is</b> <b>the</b> title
+ html = u"""
+ <div class="result results_links results_links_deep web-result ">
+ <div class="links_main links_deep result__body">
+ <h2 class="result__title">
+ <a rel="nofollow" class="result__a" href="http://this.should.be.the.link/ű">
+ This <b>is</b> <b>the</b> title
+ </a>
+ </h2>
+ <a class="result__snippet" href="http://this.should.be.the.link/ű">
+ <b>This</b> should be the content.
</a>
- <div class="snippet"><b>This</b> should be the content.</div>
- <div class="url">
- http://this.should.be.the.link/
+ <div class="result__extras">
</div>
</div>
</div>
@@ -88,4 +65,7 @@ class TestDuckduckgoEngine(SearxTestCase):
response = mock.Mock(text=html)
results = duckduckgo.response(response)
self.assertEqual(type(results), list)
- self.assertEqual(len(results), 0)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the title')
+ self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
+ self.assertEqual(results[0]['content'], 'This should be the content.')