logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 34a85533e0b6cc4989f675912fdc25d21948b378
parent: aa09f963eb8220f866334779f61741da8926fcf2
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Tue, 19 Jul 2016 10:02:47 +0200

Merge pull request #633 from kvch/deviantart-fix

[fix] deviantart engine xpaths

Diffstat:

Msearx/engines/deviantart.py9++++-----
Mtests/unit/engines/test_deviantart.py47++++++++---------------------------------------
2 files changed, 12 insertions(+), 44 deletions(-)

diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py @@ -50,11 +50,10 @@ def response(resp): regex = re.compile(r'\/200H\/') # parse results - for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'): - link = result.xpath('.//a[contains(@class, "thumb")]')[0] - url = urljoin(base_url, link.attrib.get('href')) - title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') - title = extract_text(title_links[0]) + for result in dom.xpath('.//span[@class="thumb wide"]'): + link = result.xpath('.//a[@class="torpedo-thumb-link"]')[0] + url = link.attrib.get('href') + title = extract_text(result.xpath('.//span[@class="title"]')) thumbnail_src = link.xpath('.//img')[0].attrib.get('src') img_src = regex.sub('/', thumbnail_src) diff --git a/tests/unit/engines/test_deviantart.py b/tests/unit/engines/test_deviantart.py @@ -28,44 +28,13 @@ class TestDeviantartEngine(SearxTestCase): self.assertEqual(deviantart.response(response), []) html = """ - <div class="tt-a tt-fh tt-boxed" collect_rid="1:149167425" - usericon="http://a.deviantart.net/avatars/t/e/test-0.gif" userid="233301" - username="test-0" symbol="~" category="digitalart/animation"> - <span class="tt-w" style="width: auto; max-width: 277px;"> - <span class="tt-fh-tc" style="width: 202px;"> - <span class="tt-bb" style="width: 202px;"> - </span> - <span class="shadow"> - <a class="thumb" href="http://url.of.result/2nd.part.of.url" - title="Behoimi BE Animation Test by test-0, Jan 4, - 2010 in Digital Art &gt; Animation"> <i></i> - <img width="200" height="200" alt="Test" - src="http://url.of.thumbnail" data-src="http://th08.deviantart.net/test.jpg"> - </a> - </span> - <!-- ^TTT --> - </span> - <span class="details"> - <a href="http://test-0.deviantart.com/art/Test" class="t" - title="Behoimi BE Animation Test by test-0, Jan 4, 2010"> - <span class="tt-fh-oe">Title of image</span> </a> - <small> - <span class="category"> - <span class="age"> - 5 years ago - </span> - in <a title="Behoimi BE Animation Test by test-0, Jan 4, 2010" - href="http://www.deviantart.com/browse/all/digitalart/animation/">Animation</a> - </span> - <div class="commentcount"> - <a href="http://test-0.deviantart.com/art/Test#comments"> - <span class="iconcommentsstats"></span>9 Comments</a> - </div> - <a class="mlt-link" href="http://www.deviantart.com/morelikethis/149167425"> - <span class="mlt-icon"></span> <span class="mlt-text">More Like This</span> </a> - </span> - </small> <!-- TTT$ --> - </span> + <div id="page-1-results" class="page-results results-page-thumb torpedo-container"> + <span class="thumb wide" href="http://amai911.deviantart.com/art/Horse-195212845" + data-super-full-width="900" data-super-full-height="600"> + <a class="torpedo-thumb-link" href="https://url.of.image"> + <img data-sigil="torpedo-img" src="https://url.of.thumbnail" /> + </a> + <span class="info"><span class="title-wrap"><span class="title">Title of image</span></span> </div> """ response = mock.Mock(text=html) @@ -73,7 +42,7 @@ class TestDeviantartEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'Title of image') - self.assertEqual(results[0]['url'], 'http://url.of.result/2nd.part.of.url') + self.assertEqual(results[0]['url'], 'https://url.of.image') self.assertNotIn('content', results[0]) self.assertEqual(results[0]['thumbnail_src'], 'https://url.of.thumbnail')