logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: f05087b93ac1ebef3bdacd353524bac0d8041832
parent 884eeb8541e0a4cf3d65c2a17e1c2f788cab7fb1
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date:   Tue,  2 Jun 2015 20:36:58 +0200

Refactor
Use only one engine for the four search from Qwant

Diffstat:

Msearx/engines/qwant.py38++++++++++++++++++++++++++++++--------
Dsearx/engines/qwant_images.py70----------------------------------------------------------------------
Dsearx/engines/qwant_news.py69---------------------------------------------------------------------
Dsearx/engines/qwant_social.py69---------------------------------------------------------------------
Msearx/settings.yml14+++++++++++---
Msearx/tests/engines/test_qwant.py176+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsearx/tests/engines/test_qwant_images.py145-------------------------------------------------------------------------------
Dsearx/tests/engines/test_qwant_news.py137-------------------------------------------------------------------------------
Dsearx/tests/engines/test_qwant_social.py140-------------------------------------------------------------------------------
Msearx/tests/test_engines.py3---
10 files changed, 217 insertions(+), 644 deletions(-)

diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py @@ -1,5 +1,5 @@ """ - Qwant (Web) + Qwant (Web, Images, News, Social) @website https://qwant.com/ @provide-api not officially (https://api.qwant.com/api/search/) @@ -12,21 +12,25 @@ from urllib import urlencode from json import loads +from datetime import datetime # engine dependent config -categories = ['general'] +categories = None paging = True language_support = True +search_url_keyword = None + # search-url -url = 'https://api.qwant.com/api/search/web?count=10&offset={offset}&f=&{query}' +url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}' # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 - params['url'] = url.format(query=urlencode({'q': query}), + params['url'] = url.format(keyword=search_url_keyword, + query=urlencode({'q': query}), offset=offset) # add language tag if specified @@ -57,10 +61,28 @@ def response(resp): res_url = result['url'] content = result['desc'] - # append result - results.append({'title': title, - 'content': content, - 'url': res_url}) + if search_url_keyword == 'web': + results.append({'title': title, + 'content': content, + 'url': res_url}) + + elif search_url_keyword == 'images': + thumbnail_src = result['thumbnail'] + img_src = result['media'] + results.append({'template': 'images.html', + 'url': res_url, + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'img_src': img_src}) + + elif search_url_keyword == 'news' or search_url_keyword == 'social': + published_date = datetime.fromtimestamp(result['date'], None) + + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content}) # return results return results diff --git a/searx/engines/qwant_images.py b/searx/engines/qwant_images.py @@ -1,70 +0,0 @@ -""" - Qwant (Images) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads - -# engine dependent config -categories = ['images'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/images?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - thumbnail_src = result['thumbnail'] - img_src = result['media'] - - # append result - results.append({'template': 'images.html', - 'url': res_url, - 'title': title, - 'content': '', - 'thumbnail_src': thumbnail_src, - 'img_src': img_src}) - - # return results - return results diff --git a/searx/engines/qwant_news.py b/searx/engines/qwant_news.py @@ -1,69 +0,0 @@ -""" - Qwant (News) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads -from datetime import datetime - -# engine dependent config -categories = ['news'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/news?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - content = result['desc'] - published_date = datetime.fromtimestamp(result['date'], None) - - # append result - results.append({'url': res_url, - 'title': title, - 'publishedDate': published_date, - 'content': content}) - - # return results - return results diff --git a/searx/engines/qwant_social.py b/searx/engines/qwant_social.py @@ -1,69 +0,0 @@ -""" - Qwant (social media) - - @website https://qwant.com/ - @provide-api not officially (https://api.qwant.com/api/search/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from urllib import urlencode -from json import loads -from datetime import datetime - -# engine dependent config -categories = ['social media'] -paging = True -language_support = True - -# search-url -url = 'https://api.qwant.com/api/search/social?count=10&offset={offset}&f=&{query}' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * 10 - - params['url'] = url.format(query=urlencode({'q': query}), - offset=offset) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&locale=' + params['language'].lower() - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'data' not in search_results: - return [] - - data = search_results.get('data', {}) - - res = data.get('result', {}) - - # parse results - for result in res.get('items', {}): - - title = result['title'] - res_url = result['url'] - content = result['desc'] - published_date = datetime.fromtimestamp(result['date'], None) - - # append result - results.append({'url': res_url, - 'title': title, - 'content': content, - 'publishedDate': published_date}) - - # return results - return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -171,18 +171,26 @@ engines: - name : qwant engine : qwant shortcut : qw + search_url_keyword : web + categories : general - name : qwant images - engine : qwant_images + engine : qwant shortcut : qwi + search_url_keyword : images + categories : images - name : qwant news - engine : qwant_news + engine : qwant shortcut : qwn + search_url_keyword : news + categories : news - name : qwant social - engine : qwant_social + engine : qwant shortcut : qws + search_url_keyword : social + categories : social media - name : kickass engine : kickass diff --git a/searx/tests/engines/test_qwant.py b/searx/tests/engines/test_qwant.py @@ -68,6 +68,7 @@ class TestQwantEngine(SearxTestCase): } """ response = mock.Mock(text=json) + qwant.search_url_keyword = 'web' results = qwant.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -85,6 +86,181 @@ class TestQwantEngine(SearxTestCase): "offset": 10 }, "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "media": "http://image.jpg", + "desc": "", + "thumbnail": "http://thumbnail.jpg", + "date": "", + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + qwant.search_url_keyword = 'images' + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.jpg') + self.assertEqual(results[0]['img_src'], 'http://image.jpg') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": 1433260920, + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + qwant.search_url_keyword = 'news' + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + self.assertIn('publishedDate', results[0]) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": 1433260920, + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + qwant.search_url_keyword = 'social' + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + self.assertIn('publishedDate', results[0]) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": 1433260920, + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + qwant.search_url_keyword = '' + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { "filters": [] }, "cache": { diff --git a/searx/tests/engines/test_qwant_images.py b/searx/tests/engines/test_qwant_images.py @@ -1,145 +0,0 @@ -from collections import defaultdict -import mock -from searx.engines import qwant_images -from searx.testing import SearxTestCase - - -class TestQwantImagesEngine(SearxTestCase): - - def test_request(self): - query = 'test_query' - dicto = defaultdict(dict) - dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' - params = qwant_images.request(query, dicto) - self.assertIn('url', params) - self.assertIn(query, params['url']) - self.assertIn('qwant.com', params['url']) - self.assertIn('fr_fr', params['url']) - - dicto['language'] = 'all' - params = qwant_images.request(query, dicto) - self.assertFalse('fr' in params['url']) - - def test_response(self): - self.assertRaises(AttributeError, qwant_images.response, None) - self.assertRaises(AttributeError, qwant_images.response, []) - self.assertRaises(AttributeError, qwant_images.response, '') - self.assertRaises(AttributeError, qwant_images.response, '[]') - - response = mock.Mock(text='{}') - self.assertEqual(qwant_images.response(response), []) - - response = mock.Mock(text='{"data": {}}') - self.assertEqual(qwant_images.response(response), []) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "items": [ - { - "title": "Title", - "type": "image", - "media": "http://www.url.xyz/fullimage.jpg", - "desc": "", - "thumbnail": "http://www.url.xyz/thumbnail.jpg", - "thumb_width": 365, - "thumb_height": 230, - "width": "365", - "height": "230", - "size": "187.7KB", - "url": "http://www.url.xyz", - "_id": "0ffd93fb26f3e192a6020af8fc16fbb1", - "media_fullsize": "http://www.proxy/fullimage.jpg", - "count": 0 - } - ], - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'Title') - self.assertEqual(results[0]['url'], 'http://www.url.xyz') - self.assertEqual(results[0]['content'], '') - self.assertEqual(results[0]['thumbnail_src'], 'http://www.url.xyz/thumbnail.jpg') - self.assertEqual(results[0]['img_src'], 'http://www.url.xyz/fullimage.jpg') - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success" - } - """ - response = mock.Mock(text=json) - results = qwant_images.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_news.py b/searx/tests/engines/test_qwant_news.py @@ -1,137 +0,0 @@ -from collections import defaultdict -import mock -from searx.engines import qwant_news -from searx.testing import SearxTestCase - - -class TestQwantNewsEngine(SearxTestCase): - - def test_request(self): - query = 'test_query' - dicto = defaultdict(dict) - dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' - params = qwant_news.request(query, dicto) - self.assertIn('url', params) - self.assertIn(query, params['url']) - self.assertIn('qwant.com', params['url']) - self.assertIn('fr_fr', params['url']) - - dicto['language'] = 'all' - params = qwant_news.request(query, dicto) - self.assertFalse('fr' in params['url']) - - def test_response(self): - self.assertRaises(AttributeError, qwant_news.response, None) - self.assertRaises(AttributeError, qwant_news.response, []) - self.assertRaises(AttributeError, qwant_news.response, '') - self.assertRaises(AttributeError, qwant_news.response, '[]') - - response = mock.Mock(text='{}') - self.assertEqual(qwant_news.response(response), []) - - response = mock.Mock(text='{"data": {}}') - self.assertEqual(qwant_news.response(response), []) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "items": [ - { - "title": "Title", - "score": 9999, - "url": "http://www.url.xyz", - "source": "...", - "desc": "Description", - "date": 1433065411, - "_id": "db0aadd62c2a8565567ffc382f5c61fa", - "favicon": "https://s.qwant.com/fav.ico" - } - ], - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_news.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'Title') - self.assertEqual(results[0]['url'], 'http://www.url.xyz') - self.assertEqual(results[0]['content'], 'Description') - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_news.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_news.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success" - } - """ - response = mock.Mock(text=json) - results = qwant_news.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_social.py b/searx/tests/engines/test_qwant_social.py @@ -1,140 +0,0 @@ -from collections import defaultdict -import mock -from searx.engines import qwant_social -from searx.testing import SearxTestCase - - -class TestQwantSocialEngine(SearxTestCase): - - def test_request(self): - query = 'test_query' - dicto = defaultdict(dict) - dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' - params = qwant_social.request(query, dicto) - self.assertIn('url', params) - self.assertIn(query, params['url']) - self.assertIn('qwant.com', params['url']) - self.assertIn('fr_fr', params['url']) - - dicto['language'] = 'all' - params = qwant_social.request(query, dicto) - self.assertFalse('fr' in params['url']) - - def test_response(self): - self.assertRaises(AttributeError, qwant_social.response, None) - self.assertRaises(AttributeError, qwant_social.response, []) - self.assertRaises(AttributeError, qwant_social.response, '') - self.assertRaises(AttributeError, qwant_social.response, '[]') - - response = mock.Mock(text='{}') - self.assertEqual(qwant_social.response(response), []) - - response = mock.Mock(text='{"data": {}}') - self.assertEqual(qwant_social.response(response), []) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "items": [ - { - "_id": "dc0b3f24c93684c7d7f1b0a4c2d9f1b0", - "__index": 32, - "title": "Title", - "img": "img", - "desc": "Description", - "date": 1432643480, - "type": "twitter", - "card": "XXX", - "post": "603176590856556545", - "url": "http://www.url.xyz", - "userUrl": "https://twitter.com/XXX" - } - ], - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_social.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'Title') - self.assertEqual(results[0]['url'], 'http://www.url.xyz') - self.assertEqual(results[0]['content'], 'Description') - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "result": { - "filters": [] - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_social.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success", - "data": { - "query": { - "locale": "en_us", - "query": "Test", - "offset": 10 - }, - "cache": { - "key": "e66aa864c00147a0e3a16ff7a5efafde", - "created": 1433092754, - "expiration": 259200, - "status": "miss", - "age": 0 - } - } - } - """ - response = mock.Mock(text=json) - results = qwant_social.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) - - json = """ - { - "status": "success" - } - """ - response = mock.Mock(text=json) - results = qwant_social.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py @@ -26,9 +26,6 @@ from searx.tests.engines.test_openstreetmap import * # noqa from searx.tests.engines.test_photon import * # noqa from searx.tests.engines.test_piratebay import * # noqa from searx.tests.engines.test_qwant import * # noqa -from searx.tests.engines.test_qwant_images import * # noqa -from searx.tests.engines.test_qwant_news import * # noqa -from searx.tests.engines.test_qwant_social import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa