commit: cfe81d741cdd2517c4587071e4afbdd0adb923bd
parent 4dba3739fb3b98572cbd51adab226376b5844105
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date: Tue, 27 Jan 2015 20:03:33 +0100
A bit of utils unit tests
Diffstat:
2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/searx/tests/test_utils.py b/searx/tests/test_utils.py
@@ -10,6 +10,11 @@ class TestUtils(SearxTestCase):
self.assertIsNotNone(utils.gen_useragent())
self.assertTrue(utils.gen_useragent().startswith('Mozilla'))
+ def test_searx_useragent(self):
+ self.assertIsInstance(utils.searx_useragent(), str)
+ self.assertIsNotNone(utils.searx_useragent())
+ self.assertTrue(utils.searx_useragent().startswith('searx'))
+
def test_highlight_content(self):
self.assertEqual(utils.highlight_content(0, None), None)
self.assertEqual(utils.highlight_content(None, None), None)
@@ -29,6 +34,23 @@ class TestUtils(SearxTestCase):
query = 'a test'
self.assertEqual(utils.highlight_content(content, query), content)
+ def test_html_to_text(self):
+ html = """
+ <a href="/testlink" class="link_access_account">
+ <span class="toto">
+ <span>
+ <img src="test.jpg" />
+ </span>
+ </span>
+ <span class="titi">
+ Test text
+ </span>
+ </a>
+ """
+ self.assertIsInstance(utils.html_to_text(html), unicode)
+ self.assertIsNotNone(utils.html_to_text(html))
+ self.assertEqual(utils.html_to_text(html), "Test text")
+
class TestHTMLTextExtractor(SearxTestCase):
diff --git a/searx/utils.py b/searx/utils.py
@@ -115,7 +115,7 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(name)
def get_text(self):
- return u''.join(self.result)
+ return u''.join(self.result).strip()
def html_to_text(html):