logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: cfe81d741cdd2517c4587071e4afbdd0adb923bd
parent 4dba3739fb3b98572cbd51adab226376b5844105
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date:   Tue, 27 Jan 2015 20:03:33 +0100

A bit of utils unit tests

Diffstat:

Msearx/tests/test_utils.py22++++++++++++++++++++++
Msearx/utils.py2+-
2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/searx/tests/test_utils.py b/searx/tests/test_utils.py @@ -10,6 +10,11 @@ class TestUtils(SearxTestCase): self.assertIsNotNone(utils.gen_useragent()) self.assertTrue(utils.gen_useragent().startswith('Mozilla')) + def test_searx_useragent(self): + self.assertIsInstance(utils.searx_useragent(), str) + self.assertIsNotNone(utils.searx_useragent()) + self.assertTrue(utils.searx_useragent().startswith('searx')) + def test_highlight_content(self): self.assertEqual(utils.highlight_content(0, None), None) self.assertEqual(utils.highlight_content(None, None), None) @@ -29,6 +34,23 @@ class TestUtils(SearxTestCase): query = 'a test' self.assertEqual(utils.highlight_content(content, query), content) + def test_html_to_text(self): + html = """ + <a href="/testlink" class="link_access_account"> + <span class="toto"> + <span> + <img src="test.jpg" /> + </span> + </span> + <span class="titi"> + Test text + </span> + </a> + """ + self.assertIsInstance(utils.html_to_text(html), unicode) + self.assertIsNotNone(utils.html_to_text(html)) + self.assertEqual(utils.html_to_text(html), "Test text") + class TestHTMLTextExtractor(SearxTestCase): diff --git a/searx/utils.py b/searx/utils.py @@ -115,7 +115,7 @@ class HTMLTextExtractor(HTMLParser): self.result.append(name) def get_text(self): - return u''.join(self.result) + return u''.join(self.result).strip() def html_to_text(html):