logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 9c2b7a82f0c515fd1df88ed80349eda7f49e0825
parent e391b2d970a19cdc39dd550929e91ace4aee8832
Author: Noémi Ványi <sitbackandwait@gmail.com>
Date:   Wed,  1 Nov 2017 12:28:18 +0100

minor fixes of arxiv

Closes #1050

Diffstat:

Msearx/engines/arxiv.py11++++++-----
Mtests/unit/engines/test_arxiv.py6+++---
2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py @@ -2,7 +2,7 @@ """ ArXiV (Scientific preprints) - @website https://axiv.org + @website https://arxiv.org @provide-api yes (export.arxiv.org/api/query) @using-api yes @results XML-RSS @@ -41,7 +41,8 @@ def request(query, params): def response(resp): results = [] - search_results = html.fromstring(resp.text).xpath('//entry') + dom = html.fromstring(resp.content) + search_results = dom.xpath('//entry') for entry in search_results: title = entry.xpath('.//title')[0].text @@ -49,15 +50,15 @@ def response(resp): url = entry.xpath('.//id')[0].text content_string = '{doi_content}{abstract_content}' - + abstract = entry.xpath('.//summary')[0].text # If a doi is available, add it to the snipppet try: doi_content = entry.xpath('.//link[@title="doi"]')[0].text - content = content_string.format(doi_content=doi_content, abstract_content=abstract_content) + content = content_string.format(doi_content=doi_content, abstract_content=abstract) except: - content = content_string.format(abstract_content=abstract_content) + content = content_string.format(doi_content="", abstract_content=abstract) if len(content) > 300: content = content[0:300] + "..." diff --git a/tests/unit/engines/test_arxiv.py b/tests/unit/engines/test_arxiv.py @@ -21,11 +21,11 @@ class TestBaseEngine(SearxTestCase): self.assertRaises(AttributeError, arxiv.response, '') self.assertRaises(AttributeError, arxiv.response, '[]') - response = mock.Mock(text='''<?xml version="1.0" encoding="UTF-8"?> + response = mock.Mock(content=b'''<?xml version="1.0" encoding="UTF-8"?> <feed xmlns="http://www.w3.org/2005/Atom"></feed>''') self.assertEqual(arxiv.response(response), []) - xml_mock = '''<?xml version="1.0" encoding="UTF-8"?> + xml_mock = b'''<?xml version="1.0" encoding="UTF-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title type="html">ArXiv Query: search_query=all:test_query&amp;id_list=&amp;start=0&amp;max_results=1</title> <id>http://arxiv.org/api/1</id> @@ -50,7 +50,7 @@ class TestBaseEngine(SearxTestCase): </feed> ''' - response = mock.Mock(text=xml_mock.encode('utf-8')) + response = mock.Mock(content=xml_mock) results = arxiv.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1)