commit: 57996b12fcbe193e89537c63abd1a75c3c65a25a
parent dc036ece856fb437504bc0b9a059b305999bb68b
Author: dalf <alex@al-f.net>
Date: Fri, 20 Feb 2015 12:34:13 +0100
[fix] update yahoo engine according to the web site changes
Diffstat:
2 files changed, 65 insertions(+), 67 deletions(-)
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
@@ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
# specific xpath variables
-results_xpath = '//div[@class="res"]'
+results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
url_xpath = './/h3/a/@href'
title_xpath = './/h3/a'
-content_xpath = './/div[@class="abstr"]'
-suggestion_xpath = '//div[@id="satat"]//a'
+content_xpath = './/div[@class="compText aAbs"]'
+suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
# remove yahoo-specific tracking-url
@@ -91,11 +91,12 @@ def response(resp):
'content': content})
# if no suggestion found, return results
- if not dom.xpath(suggestion_xpath):
+ suggestions = dom.xpath(suggestion_xpath)
+ if not suggestions:
return results
# parse suggestion
- for suggestion in dom.xpath(suggestion_xpath):
+ for suggestion in suggestions:
# append suggestion
results.append({'suggestion': extract_text(suggestion)})
diff --git a/searx/tests/engines/test_yahoo.py b/searx/tests/engines/test_yahoo.py
@@ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase):
self.assertEqual(yahoo.response(response), [])
html = """
- <div class="res">
- <div>
- <h3>
- <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
- _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
- /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
- <b>This</b> is the title
- </a>
+<ol class="reg mb-15 searchCenterMiddle">
+ <li class="first">
+ <div class="dd algo fst Sr">
+ <div class="compTitle">
+ <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
+ _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
+ /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
+ target="_blank" data-bid="54e712e13671c">
+ <b><b>This is the title</b></b></a>
</h3>
</div>
- <span class="url" dir="ltr">www.<b>test</b>.com</span>
- <div class="abstr">
- <b>This</b> is the content
+ <div class="compText aAbs">
+ <p class="lh-18"><b><b>This is the </b>content</b>
+ </p>
</div>
</div>
- <div id="satat" data-bns="Yahoo" data-bk="124.1">
- <h2>Also Try</h2>
- <table>
- <tbody>
- <tr>
- <td>
- <a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" >
- <span>
- <b></b>This is <b>the suggestion</b>
- </span>
- </a>
- </td>
- </tr>
- </tbody>
- </table>
+ </li>
+ <li>
+ <div class="dd algo lst Sr">
+ <div class="compTitle">
+ <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA;
+ _ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10
+ /RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-"
+ target="_blank" data-bid="54e712e136926">
+ This is the second <b><b>title</b></b></a>
+ </h3>
+ </div>
+ <div class="compText aAbs">
+ <p class="lh-18">This is the second content</p>
+ </div>
</div>
+ </li>
+</ol>
+<div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04">
+ <div class="compTitle mb-4 h-17">
+ <h3 class="title">Also Try</h3> </div>
+ <table class="compTable m-0 ac-1st td-u fz-ms">
+ <tbody>
+ <tr>
+ <td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a>
+ </td>
+ </tr>
+ </table>
+</div>
"""
response = mock.Mock(text=html)
results = yahoo.response(response)
+ print results
self.assertEqual(type(results), list)
- self.assertEqual(len(results), 2)
+ self.assertEqual(len(results), 3)
self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
self.assertEqual(results[0]['content'], 'This is the content')
- self.assertEqual(results[1]['suggestion'], 'This is the suggestion')
+ self.assertEqual(results[1]['title'], 'This is the second title')
+ self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/')
+ self.assertEqual(results[1]['content'], 'This is the second content')
+ self.assertEqual(results[2]['suggestion'], 'This is the suggestion')
html = """
- <div class="res">
- <div>
- <h3>
- <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
- _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
- /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
- <b>This</b> is the title
- </a>
- </h3>
- </div>
- <span class="url" dir="ltr">www.<b>test</b>.com</span>
- <div class="abstr">
- <b>This</b> is the content
- </div>
- </div>
- <div class="res">
- <div>
- <h3>
- <a id="link-1" class="yschttl spt">
- <b>This</b> is the title
- </a>
- </h3>
- </div>
- <span class="url" dir="ltr">www.<b>test</b>.com</span>
- <div class="abstr">
- <b>This</b> is the content
- </div>
- </div>
- <div class="res">
- <div>
- <h3>
+<ol class="reg mb-15 searchCenterMiddle">
+ <li class="first">
+ <div class="dd algo fst Sr">
+ <div class="compTitle">
+ <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
+ _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
+ /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
+ target="_blank" data-bid="54e712e13671c">
+ <b><b>This is the title</b></b></a>
</h3>
</div>
- <span class="url" dir="ltr">www.<b>test</b>.com</span>
- <div class="abstr">
- <b>This</b> is the content
+ <div class="compText aAbs">
+ <p class="lh-18"><b><b>This is the </b>content</b>
+ </p>
</div>
</div>
+ </li>
+</ol>
"""
response = mock.Mock(text=html)
results = yahoo.response(response)