logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 8cf2ee57216b4dffc419e1762ff1fe4dfd30e227
parent f18807955beceb86a99963feedee8355f31c481c
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date:   Sun,  1 Feb 2015 13:43:10 +0100

500px unit test

Diffstat:

Msearx/engines/www500px.py11++++++-----
Asearx/tests/engines/test_www500px.py83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/tests/test_engines.py1+
3 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py @@ -15,6 +15,7 @@ from urllib import urlencode from urlparse import urljoin from lxml import html import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['images'] @@ -22,7 +23,7 @@ paging = True # search-url base_url = 'https://500px.com' -search_url = base_url+'/search?search?page={pageno}&type=photos&{query}' +search_url = base_url + '/search?search?page={pageno}&type=photos&{query}' # do search-request @@ -44,11 +45,11 @@ def response(resp): for result in dom.xpath('//div[@class="photo"]'): link = result.xpath('.//a')[0] url = urljoin(base_url, link.attrib.get('href')) - title = result.xpath('.//div[@class="title"]//text()')[0] - thumbnail_src = link.xpath('.//img')[0].attrib['src'] + title = extract_text(result.xpath('.//div[@class="title"]')) + thumbnail_src = link.xpath('.//img')[0].attrib.get('src') # To have a bigger thumbnail, uncomment the next line - #thumbnail_src = regex.sub('4.jpg', thumbnail_src) - content = result.xpath('.//div[@class="info"]//text()')[0] + # thumbnail_src = regex.sub('4.jpg', thumbnail_src) + content = extract_text(result.xpath('.//div[@class="info"]')) img_src = regex.sub('2048.jpg', thumbnail_src) # append result diff --git a/searx/tests/engines/test_www500px.py b/searx/tests/engines/test_www500px.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import www500px +from searx.testing import SearxTestCase + + +class TestWww500pxImagesEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = www500px.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('500px.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, www500px.response, None) + self.assertRaises(AttributeError, www500px.response, []) + self.assertRaises(AttributeError, www500px.response, '') + self.assertRaises(AttributeError, www500px.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(www500px.response(response), []) + + html = """ + <div class="photo"> + <a href="/this.should.be.the.url" data-ga-category="Photo Thumbnail" data-ga-action="Title"> + <img src="https://image.url/3.jpg?v=0" /> + </a> + <div class="details"> + <div class="inside"> + <div class="title"> + <a href="/photo/64312705/branch-out-by-oliver-turpin?feature="> + This is the title + </a> + </div> + <div class="info"> + <a href="/ChronicleUK" data-ga-action="Image" data-ga-category="Photo Thumbnail"> + This is the content + </a> + </div> + <div class="rating">44.8</div> + </div> + </div> + </div> + """ + response = mock.Mock(text=html) + results = www500px.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://500px.com/this.should.be.the.url') + self.assertEqual(results[0]['content'], 'This is the content') + self.assertEqual(results[0]['thumbnail_src'], 'https://image.url/3.jpg?v=0') + self.assertEqual(results[0]['img_src'], 'https://image.url/2048.jpg') + + html = """ + <a href="/this.should.be.the.url" data-ga-category="Photo Thumbnail" data-ga-action="Title"> + <img src="https://image.url/3.jpg?v=0" /> + </a> + <div class="details"> + <div class="inside"> + <div class="title"> + <a href="/photo/64312705/branch-out-by-oliver-turpin?feature="> + This is the title + </a> + </div> + <div class="info"> + <a href="/ChronicleUK" data-ga-action="Image" data-ga-category="Photo Thumbnail"> + Oliver Turpin + </a> + </div> + <div class="rating">44.8</div> + </div> + </div> + """ + response = mock.Mock(text=html) + results = www500px.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py @@ -18,4 +18,5 @@ from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa from searx.tests.engines.test_vimeo import * # noqa +from searx.tests.engines.test_www500px import * # noqa from searx.tests.engines.test_youtube import * # noqa