logo

searx

Unnamed repository; edit this file 'description' to name the repository.
commit: 7f7f10bb6f7746c0891c2795b36261286b52a76a
parent: dc036ece856fb437504bc0b9a059b305999bb68b
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Fri, 20 Feb 2015 14:22:25 +0100

Merge pull request #249 from dalf/master

[fix] update yahoo engine according to the web site changes

Diffstat:

searx/engines/yahoo.py | 11++++++-----
searx/tests/engines/test_yahoo.py | 121+++++++++++++++++++++++++++++++++++++++----------------------------------------
2 files changed, 65 insertions(+), 67 deletions(-)

diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py @@ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/' search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' # specific xpath variables -results_xpath = '//div[@class="res"]' +results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" url_xpath = './/h3/a/@href' title_xpath = './/h3/a' -content_xpath = './/div[@class="abstr"]' -suggestion_xpath = '//div[@id="satat"]//a' +content_xpath = './/div[@class="compText aAbs"]' +suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a" # remove yahoo-specific tracking-url @@ -91,11 +91,12 @@ def response(resp): 'content': content}) # if no suggestion found, return results - if not dom.xpath(suggestion_xpath): + suggestions = dom.xpath(suggestion_xpath) + if not suggestions: return results # parse suggestion - for suggestion in dom.xpath(suggestion_xpath): + for suggestion in suggestions: # append suggestion results.append({'suggestion': extract_text(suggestion)}) diff --git a/searx/tests/engines/test_yahoo.py b/searx/tests/engines/test_yahoo.py @@ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase): self.assertEqual(yahoo.response(response), []) html = """ - <div class="res"> - <div> - <h3> - <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA; - _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 - /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1"> - <b>This</b> is the title - </a> +<ol class="reg mb-15 searchCenterMiddle"> + <li class="first"> + <div class="dd algo fst Sr"> + <div class="compTitle"> + <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA; + _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 + /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-" + target="_blank" data-bid="54e712e13671c"> + <b><b>This is the title</b></b></a> </h3> </div> - <span class="url" dir="ltr">www.<b>test</b>.com</span> - <div class="abstr"> - <b>This</b> is the content + <div class="compText aAbs"> + <p class="lh-18"><b><b>This is the </b>content</b> + </p> </div> </div> - <div id="satat" data-bns="Yahoo" data-bk="124.1"> - <h2>Also Try</h2> - <table> - <tbody> - <tr> - <td> - <a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" > - <span> - <b></b>This is <b>the suggestion</b> - </span> - </a> - </td> - </tr> - </tbody> - </table> + </li> + <li> + <div class="dd algo lst Sr"> + <div class="compTitle"> + <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA; + _ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10 + /RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-" + target="_blank" data-bid="54e712e136926"> + This is the second <b><b>title</b></b></a> + </h3> + </div> + <div class="compText aAbs"> + <p class="lh-18">This is the second content</p> + </div> </div> + </li> +</ol> +<div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04"> + <div class="compTitle mb-4 h-17"> + <h3 class="title">Also Try</h3> </div> + <table class="compTable m-0 ac-1st td-u fz-ms"> + <tbody> + <tr> + <td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a> + </td> + </tr> + </table> +</div> """ response = mock.Mock(text=html) results = yahoo.response(response) + print results self.assertEqual(type(results), list) - self.assertEqual(len(results), 2) + self.assertEqual(len(results), 3) self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['url'], 'https://this.is.the.url/') self.assertEqual(results[0]['content'], 'This is the content') - self.assertEqual(results[1]['suggestion'], 'This is the suggestion') + self.assertEqual(results[1]['title'], 'This is the second title') + self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/') + self.assertEqual(results[1]['content'], 'This is the second content') + self.assertEqual(results[2]['suggestion'], 'This is the suggestion') html = """ - <div class="res"> - <div> - <h3> - <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA; - _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 - /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1"> - <b>This</b> is the title - </a> - </h3> - </div> - <span class="url" dir="ltr">www.<b>test</b>.com</span> - <div class="abstr"> - <b>This</b> is the content - </div> - </div> - <div class="res"> - <div> - <h3> - <a id="link-1" class="yschttl spt"> - <b>This</b> is the title - </a> - </h3> - </div> - <span class="url" dir="ltr">www.<b>test</b>.com</span> - <div class="abstr"> - <b>This</b> is the content - </div> - </div> - <div class="res"> - <div> - <h3> +<ol class="reg mb-15 searchCenterMiddle"> + <li class="first"> + <div class="dd algo fst Sr"> + <div class="compTitle"> + <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA; + _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 + /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-" + target="_blank" data-bid="54e712e13671c"> + <b><b>This is the title</b></b></a> </h3> </div> - <span class="url" dir="ltr">www.<b>test</b>.com</span> - <div class="abstr"> - <b>This</b> is the content + <div class="compText aAbs"> + <p class="lh-18"><b><b>This is the </b>content</b> + </p> </div> </div> + </li> +</ol> """ response = mock.Mock(text=html) results = yahoo.response(response)