commit: 8cf2ee57216b4dffc419e1762ff1fe4dfd30e227
parent f18807955beceb86a99963feedee8355f31c481c
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Sun, 1 Feb 2015 13:43:10 +0100
500px unit test
Diffstat:
3 files changed, 90 insertions(+), 5 deletions(-)
diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py
@@ -15,6 +15,7 @@ from urllib import urlencode
from urlparse import urljoin
from lxml import html
import re
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['images']
@@ -22,7 +23,7 @@ paging = True
# search-url
base_url = 'https://500px.com'
-search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
+search_url = base_url + '/search?search?page={pageno}&type=photos&{query}'
# do search-request
@@ -44,11 +45,11 @@ def response(resp):
for result in dom.xpath('//div[@class="photo"]'):
link = result.xpath('.//a')[0]
url = urljoin(base_url, link.attrib.get('href'))
- title = result.xpath('.//div[@class="title"]//text()')[0]
- thumbnail_src = link.xpath('.//img')[0].attrib['src']
+ title = extract_text(result.xpath('.//div[@class="title"]'))
+ thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
# To have a bigger thumbnail, uncomment the next line
- #thumbnail_src = regex.sub('4.jpg', thumbnail_src)
- content = result.xpath('.//div[@class="info"]//text()')[0]
+ # thumbnail_src = regex.sub('4.jpg', thumbnail_src)
+ content = extract_text(result.xpath('.//div[@class="info"]'))
img_src = regex.sub('2048.jpg', thumbnail_src)
# append result
diff --git a/searx/tests/engines/test_www500px.py b/searx/tests/engines/test_www500px.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+from collections import defaultdict
+import mock
+from searx.engines import www500px
+from searx.testing import SearxTestCase
+
+
+class TestWww500pxImagesEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 1
+ params = www500px.request(query, dicto)
+ self.assertTrue('url' in params)
+ self.assertTrue(query in params['url'])
+ self.assertTrue('500px.com' in params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, www500px.response, None)
+ self.assertRaises(AttributeError, www500px.response, [])
+ self.assertRaises(AttributeError, www500px.response, '')
+ self.assertRaises(AttributeError, www500px.response, '[]')
+
+ response = mock.Mock(text='<html></html>')
+ self.assertEqual(www500px.response(response), [])
+
+ html = """
+ <div class="photo">
+ <a href="/this.should.be.the.url" data-ga-category="Photo Thumbnail" data-ga-action="Title">
+ <img src="https://image.url/3.jpg?v=0" />
+ </a>
+ <div class="details">
+ <div class="inside">
+ <div class="title">
+ <a href="/photo/64312705/branch-out-by-oliver-turpin?feature=">
+ This is the title
+ </a>
+ </div>
+ <div class="info">
+ <a href="/ChronicleUK" data-ga-action="Image" data-ga-category="Photo Thumbnail">
+ This is the content
+ </a>
+ </div>
+ <div class="rating">44.8</div>
+ </div>
+ </div>
+ </div>
+ """
+ response = mock.Mock(text=html)
+ results = www500px.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the title')
+ self.assertEqual(results[0]['url'], 'https://500px.com/this.should.be.the.url')
+ self.assertEqual(results[0]['content'], 'This is the content')
+ self.assertEqual(results[0]['thumbnail_src'], 'https://image.url/3.jpg?v=0')
+ self.assertEqual(results[0]['img_src'], 'https://image.url/2048.jpg')
+
+ html = """
+ <a href="/this.should.be.the.url" data-ga-category="Photo Thumbnail" data-ga-action="Title">
+ <img src="https://image.url/3.jpg?v=0" />
+ </a>
+ <div class="details">
+ <div class="inside">
+ <div class="title">
+ <a href="/photo/64312705/branch-out-by-oliver-turpin?feature=">
+ This is the title
+ </a>
+ </div>
+ <div class="info">
+ <a href="/ChronicleUK" data-ga-action="Image" data-ga-category="Photo Thumbnail">
+ Oliver Turpin
+ </a>
+ </div>
+ <div class="rating">44.8</div>
+ </div>
+ </div>
+ """
+ response = mock.Mock(text=html)
+ results = www500px.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
@@ -18,4 +18,5 @@ from searx.tests.engines.test_searchcode_doc import * # noqa
from searx.tests.engines.test_soundcloud import * # noqa
from searx.tests.engines.test_stackoverflow import * # noqa
from searx.tests.engines.test_vimeo import * # noqa
+from searx.tests.engines.test_www500px import * # noqa
from searx.tests.engines.test_youtube import * # noqa