logo

searx

My custom branche(s) on searx, a meta-search engine
commit: f6db77d81ea87d99462b4c3cc40a8a27e0264724
parent: 516105c570a920dadeb87b34ee5ee434ad5cb16f
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Thu, 12 Feb 2015 10:52:55 +0100

Merge pull request #210 from Cqoicebordel/unit-tests

unit tests

Diffstat:

Msearx/engines/currency_convert.py9+++------
Msearx/engines/duckduckgo.py10+++++-----
Msearx/engines/duckduckgo_definitions.py5+++--
Msearx/engines/faroo.py8+++-----
Msearx/engines/openstreetmap.py9+++++----
Msearx/engines/photon.py2+-
Msearx/engines/startpage.py13+++++--------
Msearx/engines/subtitleseeker.py15++++++++-------
Msearx/engines/twitter.py13++++++++-----
Msearx/engines/yacy.py19+++++++++++--------
Msearx/engines/yahoo.py4++--
Asearx/tests/engines/test_currency_convert.py44++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_duckduckgo.py90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_duckduckgo_definitions.py250+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_faroo.py116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_google.py162+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_mediawiki.py130+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_openstreetmap.py199+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_photon.py166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_startpage.py140+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_subtitleseeker.py169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_twitter.py502+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_yacy.py96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_yahoo.py154+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/tests/engines/test_yahoo_news.py143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/tests/test_engines.py16+++++++++++++++-
26 files changed, 2430 insertions(+), 54 deletions(-)

diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py @@ -13,12 +13,9 @@ def request(query, params): if not m: # wrong query return params - try: - ammount, from_currency, to_currency = m.groups() - ammount = float(ammount) - except: - # wrong params - return params + + ammount, from_currency, to_currency = m.groups() + ammount = float(ammount) q = (from_currency + to_currency).upper() diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py @@ -15,7 +15,7 @@ from urllib import urlencode from lxml.html import fromstring -from searx.utils import html_to_text +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}' # specific xpath variables result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa url_xpath = './/a[@class="large"]/@href' -title_xpath = './/a[@class="large"]//text()' -content_xpath = './/div[@class="snippet"]//text()' +title_xpath = './/a[@class="large"]' +content_xpath = './/div[@class="snippet"]' # do search-request @@ -64,8 +64,8 @@ def response(resp): if not res_url: continue - title = html_to_text(''.join(r.xpath(title_xpath))) - content = html_to_text(''.join(r.xpath(content_xpath))) + title = extract_text(r.xpath(title_xpath)) + content = extract_text(r.xpath(content_xpath)) # append result results.append({'title': title, diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py @@ -25,9 +25,10 @@ def request(query, params): def response(resp): - search_res = json.loads(resp.text) results = [] + search_res = json.loads(resp.text) + content = '' heading = search_res.get('Heading', '') attributes = [] @@ -68,7 +69,7 @@ def response(resp): results.append({'title': heading, 'url': firstURL}) # related topics - for ddg_result in search_res.get('RelatedTopics', None): + for ddg_result in search_res.get('RelatedTopics', []): if 'FirstURL' in ddg_result: suggestion = result_to_text(ddg_result.get('FirstURL', None), ddg_result.get('Text', None), diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py @@ -37,7 +37,7 @@ search_category = {'general': 'web', # do search-request def request(query, params): - offset = (params['pageno']-1) * number_of_results + 1 + offset = (params['pageno'] - 1) * number_of_results + 1 categorie = search_category.get(params['category'], 'web') if params['language'] == 'all': @@ -45,11 +45,11 @@ def request(query, params): else: language = params['language'].split('_')[0] - # skip, if language is not supported + # if language is not supported, put it in english if language != 'en' and\ language != 'de' and\ language != 'zh': - return params + language = 'en' params['url'] = search_url.format(offset=offset, number_of_results=number_of_results, @@ -69,12 +69,10 @@ def response(resp): # HTTP-Code 401: api-key is not valide if resp.status_code == 401: raise Exception("API key is not valide") - return [] # HTTP-Code 429: rate limit exceeded if resp.status_code == 429: raise Exception("rate limit has been exceeded!") - return [] results = [] diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py @@ -38,6 +38,9 @@ def response(resp): # parse results for r in json: + if 'display_name' not in r: + continue + title = r['display_name'] osm_type = r.get('osm_type', r.get('type')) url = result_base_url.format(osm_type=osm_type, @@ -49,10 +52,8 @@ def response(resp): geojson = r.get('geojson') # if no geojson is found and osm_type is a node, add geojson Point - if not geojson and\ - osm_type == 'node': - geojson = {u'type': u'Point', - u'coordinates': [r['lon'], r['lat']]} + if not geojson and osm_type == 'node': + geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]} address_raw = r.get('address') address = {} diff --git a/searx/engines/photon.py b/searx/engines/photon.py @@ -61,7 +61,7 @@ def response(resp): continue # get title - title = properties['name'] + title = properties.get('name') # get osm-type if properties.get('osm_type') == 'N': diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py @@ -13,6 +13,7 @@ from lxml import html from cgi import escape import re +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -45,8 +46,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['data']['with_language'] = ('lang_' + - params['language'].split('_')[0]) + params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) return params @@ -64,18 +64,15 @@ def response(resp): continue link = links[0] url = link.attrib.get('href') - try: - title = escape(link.text_content()) - except UnicodeDecodeError: - continue # block google-ad url's if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): continue + title = escape(extract_text(link)) + if result.xpath('./p[@class="desc"]'): - content = escape(result.xpath('./p[@class="desc"]')[0] - .text_content()) + content = escape(extract_text(result.xpath('./p[@class="desc"]'))) else: content = '' diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py @@ -12,6 +12,7 @@ from cgi import escape from urllib import quote_plus from lxml import html from searx.languages import language_codes +from searx.engines.xpath import extract_text # engine dependent config categories = ['videos'] @@ -20,7 +21,7 @@ language = "" # search-url url = 'http://www.subtitleseeker.com/' -search_url = url+'search/TITLES/{query}&p={pageno}' +search_url = url + 'search/TITLES/{query}&p={pageno}' # specific xpath variables results_xpath = '//div[@class="boxRows"]' @@ -44,7 +45,7 @@ def response(resp): if resp.search_params['language'] != 'all': search_lang = [lc[1] for lc in language_codes - if lc[0][:2] == resp.search_params['language']][0] + if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] # parse results for result in dom.xpath(results_xpath): @@ -56,17 +57,17 @@ def response(resp): elif search_lang: href = href + search_lang + '/' - title = escape(link.xpath(".//text()")[0]) + title = escape(extract_text(link)) - content = result.xpath('.//div[contains(@class,"red")]//text()')[0] + content = extract_text(result.xpath('.//div[contains(@class,"red")]')) content = content + " - " - text = result.xpath('.//div[contains(@class,"grey-web")]')[0] - content = content + html.tostring(text, method='text') + text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0]) + content = content + text if result.xpath(".//span") != []: content = content +\ " - (" +\ - result.xpath(".//span//text()")[0].strip() +\ + extract_text(result.xpath(".//span")) +\ ")" # append result diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py @@ -13,8 +13,8 @@ from urlparse import urljoin from urllib import urlencode from lxml import html -from cgi import escape from datetime import datetime +from searx.engines.xpath import extract_text # engine dependent config categories = ['social media'] @@ -22,12 +22,12 @@ language_support = True # search-url base_url = 'https://twitter.com/' -search_url = base_url+'search?' +search_url = base_url + 'search?' # specific xpath variables results_xpath = '//li[@data-item-type="tweet"]' link_xpath = './/small[@class="time"]//a' -title_xpath = './/span[@class="username js-action-profile-name"]//text()' +title_xpath = './/span[@class="username js-action-profile-name"]' content_xpath = './/p[@class="js-tweet-text tweet-text"]' timestamp_xpath = './/span[contains(@class,"_timestamp")]' @@ -39,6 +39,8 @@ def request(query, params): # set language if specified if params['language'] != 'all': params['cookies']['lang'] = params['language'].split('_')[0] + else: + params['cookies']['lang'] = 'en' return params @@ -53,8 +55,9 @@ def response(resp): for tweet in dom.xpath(results_xpath): link = tweet.xpath(link_xpath)[0] url = urljoin(base_url, link.attrib.get('href')) - title = ''.join(tweet.xpath(title_xpath)) - content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8")) + title = extract_text(tweet.xpath(title_xpath)) + content = extract_text(tweet.xpath(content_xpath)[0]) + pubdate = tweet.xpath(timestamp_xpath) if len(pubdate) > 0: timestamp = float(pubdate[0].attrib.get('data-time')) diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py @@ -25,10 +25,10 @@ number_of_results = 5 # search-url base_url = 'http://localhost:8090' search_url = '/yacysearch.json?{query}'\ - '&startRecord={offset}'\ - '&maximumRecords={limit}'\ - '&contentdom={search_type}'\ - '&resource=global' # noqa + '&startRecord={offset}'\ + '&maximumRecords={limit}'\ + '&contentdom={search_type}'\ + '&resource=global' # yacy specific type-definitions search_types = {'general': 'text', @@ -41,7 +41,7 @@ search_types = {'general': 'text', # do search-request def request(query, params): offset = (params['pageno'] - 1) * number_of_results - search_type = search_types.get(params['category'], '0') + search_type = search_types.get(params.get('category'), '0') params['url'] = base_url +\ search_url.format(query=urlencode({'query': query}), @@ -66,9 +66,12 @@ def response(resp): if not raw_search_results: return [] - search_results = raw_search_results.get('channels', {})[0].get('items', []) + search_results = raw_search_results.get('channels', []) - for result in search_results: + if len(search_results) == 0: + return [] + + for result in search_results[0].get('items', []): # parse image results if result.get('image'): # append result @@ -88,7 +91,7 @@ def response(resp): 'content': result['description'], 'publishedDate': publishedDate}) - #TODO parse video, audio and file results + # TODO parse video, audio and file results # return results return results diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py @@ -35,7 +35,7 @@ suggestion_xpath = '//div[@id="satat"]//a' def parse_url(url_string): endings = ['/RS', '/RK'] endpositions = [] - start = url_string.find('http', url_string.find('/RU=')+1) + start = url_string.find('http', url_string.find('/RU=') + 1) for ending in endings: endpos = url_string.rfind(ending) @@ -91,7 +91,7 @@ def response(resp): 'content': content}) # if no suggestion found, return results - if not suggestion_xpath: + if not dom.xpath(suggestion_xpath): return results # parse suggestion diff --git a/searx/tests/engines/test_currency_convert.py b/searx/tests/engines/test_currency_convert.py @@ -0,0 +1,44 @@ +from collections import defaultdict +from datetime import datetime +import mock +from searx.engines import currency_convert +from searx.testing import SearxTestCase + + +class TestCurrencyConvertEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = currency_convert.request(query, dicto) + self.assertNotIn('url', params) + + query = '1.1.1 EUR in USD' + params = currency_convert.request(query, dicto) + self.assertNotIn('url', params) + + query = '10 eur in usd' + params = currency_convert.request(query, dicto) + self.assertIn('url', params) + self.assertIn('finance.yahoo.com', params['url']) + self.assertIn('EUR', params['url']) + self.assertIn('USD', params['url']) + + def test_response(self): + dicto = defaultdict(dict) + dicto['ammount'] = 10 + dicto['from'] = "EUR" + dicto['to'] = "USD" + response = mock.Mock(text='a,b,c,d', search_params=dicto) + self.assertEqual(currency_convert.response(response), []) + + csv = "2,0.5,1" + response = mock.Mock(text=csv, search_params=dicto) + results = currency_convert.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['answer'], '10 EUR = 5.0 USD (1 EUR = 0.5 USD)') + now_date = datetime.now().strftime('%Y%m%d') + self.assertEqual(results[0]['url'], 'http://finance.yahoo.com/currency/converter-results/' + + now_date + '/10-eur-to-usd.html') diff --git a/searx/tests/engines/test_duckduckgo.py b/searx/tests/engines/test_duckduckgo.py @@ -0,0 +1,90 @@ +from collections import defaultdict +import mock +from searx.engines import duckduckgo +from searx.testing import SearxTestCase + + +class TestDuckduckgoEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = duckduckgo.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('duckduckgo.com', params['url']) + self.assertIn('fr-fr', params['url']) + + dicto['language'] = 'all' + params = duckduckgo.request(query, dicto) + self.assertIn('en-us', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, duckduckgo.response, None) + self.assertRaises(AttributeError, duckduckgo.response, []) + self.assertRaises(AttributeError, duckduckgo.response, '') + self.assertRaises(AttributeError, duckduckgo.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(duckduckgo.response(response), []) + + html = """ + <div class="results_links results_links_deep web-result"> + <div class="icon_fav" style="display: block;"> + <a rel="nofollow" href="https://www.test.com/"> + <img width="16" height="16" alt="" + src="/i/www.test.com.ico" style="visibility: visible;" name="i15" /> + </a> + </div> + <div class="links_main links_deep"> <!-- This is the visible part --> + <a rel="nofollow" class="large" href="http://this.should.be.the.link/"> + This <b>is</b> <b>the</b> title + </a> + <div class="snippet"><b>This</b> should be the content.</div> + <div class="url"> + http://this.should.be.the.link/ + </div> + </div> + </div> + """ + response = mock.Mock(text=html) + results = duckduckgo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') + + html = """ + <div class="results_links results_links_deep web-result"> + <div class="icon_fav" style="display: block;"> + </div> + <div class="links_main links_deep"> <!-- This is the visible part --> + <div class="snippet"><b>This</b> should be the content.</div> + <div class="url"> + http://this.should.be.the.link/ + </div> + </div> + </div> + <div class="results_links results_links_deep web-result"> + <div class="icon_fav" style="display: block;"> + <img width="16" height="16" alt="" + src="/i/www.test.com.ico" style="visibility: visible;" name="i15" /> + </div> + <div class="links_main links_deep"> <!-- This is the visible part --> + <a rel="nofollow" class="large" href=""> + This <b>is</b> <b>the</b> title + </a> + <div class="snippet"><b>This</b> should be the content.</div> + <div class="url"> + http://this.should.be.the.link/ + </div> + </div> + </div> + """ + response = mock.Mock(text=html) + results = duckduckgo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_duckduckgo_definitions.py b/searx/tests/engines/test_duckduckgo_definitions.py @@ -0,0 +1,250 @@ +from collections import defaultdict +import mock +from searx.engines import duckduckgo_definitions +from searx.testing import SearxTestCase + + +class TestDDGDefinitionsEngine(SearxTestCase): + + def test_result_to_text(self): + url = '' + text = 'Text' + html_result = 'Html' + result = duckduckgo_definitions.result_to_text(url, text, html_result) + self.assertEqual(result, text) + + html_result = '<a href="url">Text in link</a>' + result = duckduckgo_definitions.result_to_text(url, text, html_result) + self.assertEqual(result, 'Text in link') + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = duckduckgo_definitions.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('duckduckgo.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, duckduckgo_definitions.response, None) + self.assertRaises(AttributeError, duckduckgo_definitions.response, []) + self.assertRaises(AttributeError, duckduckgo_definitions.response, '') + self.assertRaises(AttributeError, duckduckgo_definitions.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(duckduckgo_definitions.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(duckduckgo_definitions.response(response), []) + + json = """ + { + "DefinitionSource": "definition source", + "Heading": "heading", + "ImageWidth": 0, + "RelatedTopics": [ + { + "Result": "Top-level domains", + "Icon": { + "URL": "", + "Height": "", + "Width": "" + }, + "FirstURL": "https://first.url", + "Text": "text" + }, + { + "Topics": [ + { + "Result": "result topic", + "Icon": { + "URL": "", + "Height": "", + "Width": "" + }, + "FirstURL": "https://duckduckgo.com/?q=2%2F2", + "Text": "result topic text" + } + ], + "Name": "name" + } + ], + "Entity": "Entity", + "Type": "A", + "Redirect": "", + "DefinitionURL": "http://definition.url", + "AbstractURL": "https://abstract.url", + "Definition": "this is the definition", + "AbstractSource": "abstract source", + "Infobox": { + "content": [ + { + "data_type": "string", + "value": "1999", + "label": "Introduced", + "wiki_order": 0 + } + ], + "meta": [ + { + "data_type": "string", + "value": ".test", + "label": "article_title" + } + ] + }, + "Image": "image.png", + "ImageIsLogo": 0, + "Abstract": "abstract", + "AbstractText": "abstract text", + "AnswerType": "", + "ImageHeight": 0, + "Results": [{ + "Result" : "result title", + "Icon" : { + "URL" : "result url", + "Height" : 16, + "Width" : 16 + }, + "FirstURL" : "result first url", + "Text" : "result text" + } + ], + "Answer": "answer" + } + """ + response = mock.Mock(text=json) + results = duckduckgo_definitions.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 4) + self.assertEqual(results[0]['answer'], 'answer') + self.assertEqual(results[1]['title'], 'heading') + self.assertEqual(results[1]['url'], 'result first url') + self.assertEqual(results[2]['suggestion'], 'text') + self.assertEqual(results[3]['infobox'], 'heading') + self.assertEqual(results[3]['id'], 'http://definition.url') + self.assertEqual(results[3]['entity'], 'Entity') + self.assertIn('abstract', results[3]['content']) + self.assertIn('this is the definition', results[3]['content']) + self.assertEqual(results[3]['img_src'], 'image.png') + self.assertIn('Introduced', results[3]['attributes'][0]['label']) + self.assertIn('1999', results[3]['attributes'][0]['value']) + self.assertIn({'url': 'https://abstract.url', 'title': 'abstract source'}, results[3]['urls']) + self.assertIn({'url': 'http://definition.url', 'title': 'definition source'}, results[3]['urls']) + self.assertIn({'name': 'name', 'suggestions': ['result topic text']}, results[3]['relatedTopics']) + + json = """ + { + "DefinitionSource": "definition source", + "Heading": "heading", + "ImageWidth": 0, + "RelatedTopics": [], + "Entity": "Entity", + "Type": "A", + "Redirect": "", + "DefinitionURL": "", + "AbstractURL": "https://abstract.url", + "Definition": "", + "AbstractSource": "abstract source", + "Image": "", + "ImageIsLogo": 0, + "Abstract": "", + "AbstractText": "abstract text", + "AnswerType": "", + "ImageHeight": 0, + "Results": [], + "Answer": "" + } + """ + response = mock.Mock(text=json) + results = duckduckgo_definitions.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['url'], 'https://abstract.url') + self.assertEqual(results[0]['title'], 'heading') + self.assertEqual(results[0]['content'], '') + + json = """ + { + "DefinitionSource": "definition source", + "Heading": "heading", + "ImageWidth": 0, + "RelatedTopics": [ + { + "Result": "Top-level domains", + "Icon": { + "URL": "", + "Height": "", + "Width": "" + }, + "FirstURL": "https://first.url", + "Text": "heading" + }, + { + "Name": "name" + }, + { + "Topics": [ + { + "Result": "result topic", + "Icon": { + "URL": "", + "Height": "", + "Width": "" + }, + "FirstURL": "https://duckduckgo.com/?q=2%2F2", + "Text": "heading" + } + ], + "Name": "name" + } + ], + "Entity": "Entity", + "Type": "A", + "Redirect": "", + "DefinitionURL": "http://definition.url", + "AbstractURL": "https://abstract.url", + "Definition": "this is the definition", + "AbstractSource": "abstract source", + "Infobox": { + "meta": [ + { + "data_type": "string", + "value": ".test", + "label": "article_title" + } + ] + }, + "Image": "image.png", + "ImageIsLogo": 0, + "Abstract": "abstract", + "AbstractText": "abstract text", + "AnswerType": "", + "ImageHeight": 0, + "Results": [{ + "Result" : "result title", + "Icon" : { + "URL" : "result url", + "Height" : 16, + "Width" : 16 + }, + "Text" : "result text" + } + ], + "Answer": "" + } + """ + response = mock.Mock(text=json) + results = duckduckgo_definitions.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['infobox'], 'heading') + self.assertEqual(results[0]['id'], 'http://definition.url') + self.assertEqual(results[0]['entity'], 'Entity') + self.assertIn('abstract', results[0]['content']) + self.assertIn('this is the definition', results[0]['content']) + self.assertEqual(results[0]['img_src'], 'image.png') + self.assertIn({'url': 'https://abstract.url', 'title': 'abstract source'}, results[0]['urls']) + self.assertIn({'url': 'http://definition.url', 'title': 'definition source'}, results[0]['urls']) + self.assertIn({'name': 'name', 'suggestions': []}, results[0]['relatedTopics']) diff --git a/searx/tests/engines/test_faroo.py b/searx/tests/engines/test_faroo.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import faroo +from searx.testing import SearxTestCase + + +class TestFarooEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + dicto['category'] = 'general' + params = faroo.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('faroo.com', params['url']) + self.assertIn('en', params['url']) + self.assertIn('web', params['url']) + + dicto['language'] = 'all' + params = faroo.request(query, dicto) + self.assertIn('en', params['url']) + + dicto['language'] = 'de_DE' + params = faroo.request(query, dicto) + self.assertIn('de', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, faroo.response, None) + self.assertRaises(AttributeError, faroo.response, []) + self.assertRaises(AttributeError, faroo.response, '') + self.assertRaises(AttributeError, faroo.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(faroo.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(faroo.response(response), []) + + response = mock.Mock(text='{"data": []}', status_code=401) + self.assertRaises(Exception, faroo.response, response) + + response = mock.Mock(text='{"data": []}', status_code=429) + self.assertRaises(Exception, faroo.response, response) + + json = """ + { + "results": [ + { + "title": "This is the title", + "kwic": "This is the content", + "content": "", + "url": "http://this.is.the.url/", + "iurl": "", + "domain": "css3test.com", + "author": "Jim Dalrymple", + "news": true, + "votes": "10", + "date": 1360622563000, + "related": [] + }, + { + "title": "This is the title2", + "kwic": "This is the content2", + "content": "", + "url": "http://this.is.the.url2/", + "iurl": "", + "domain": "css3test.com", + "author": "Jim Dalrymple", + "news": false, + "votes": "10", + "related": [] + }, + { + "title": "This is the title3", + "kwic": "This is the content3", + "content": "", + "url": "http://this.is.the.url3/", + "iurl": "http://upload.wikimedia.org/optimized.jpg", + "domain": "css3test.com", + "author": "Jim Dalrymple", + "news": false, + "votes": "10", + "related": [] + } + ], + "query": "test", + "suggestions": [], + "count": 100, + "start": 1, + "length": 10, + "time": "15" + } + """ + response = mock.Mock(text=json) + results = faroo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 4) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/') + self.assertEqual(results[0]['content'], 'This is the content') + self.assertEqual(results[1]['title'], 'This is the title2') + self.assertEqual(results[1]['url'], 'http://this.is.the.url2/') + self.assertEqual(results[1]['content'], 'This is the content2') + self.assertEqual(results[3]['img_src'], 'http://upload.wikimedia.org/optimized.jpg') + + json = """ + {} + """ + response = mock.Mock(text=json) + results = faroo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_google.py b/searx/tests/engines/test_google.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +import lxml +from searx.engines import google +from searx.testing import SearxTestCase + + +class TestGoogleEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = google.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('google.com', params['url']) + self.assertIn('PREF', params['cookies']) + self.assertIn('fr', params['headers']['Accept-Language']) + + dicto['language'] = 'all' + params = google.request(query, dicto) + self.assertIn('en', params['headers']['Accept-Language']) + + def test_response(self): + self.assertRaises(AttributeError, google.response, None) + self.assertRaises(AttributeError, google.response, []) + self.assertRaises(AttributeError, google.response, '') + self.assertRaises(AttributeError, google.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(google.response(response), []) + + html = """ + <li class="g"> + <h3 class="r"> + <a href="http://this.should.be.the.link/"> + <b>This</b> is <b>the</b> title + </a> + </h3> + <div class="s"> + <div class="kv" style="margin-bottom:2px"> + <cite> + <b>test</b>.psychologies.com/ + </cite> + <div class="_nBb">‎ + <div style="display:inline" onclick="google.sham(this);" aria-expanded="false" + aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA"> + <span class="_O0"> + </span> + </div> + <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1"> + <ul> + <li class="_Ykb"> + <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent + .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/"> + En cache + </a> + </li> + <li class="_Ykb"> + <a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/"> + Pages similaires + </a> + </li> + </ul> + </div> + </div> + </div> + <span class="st"> + This should be the content. + </span> + <br> + <div class="osl">‎ + <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/"> + Test Personnalité + </a> - ‎ + <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/"> + Tests - Moi + </a> - ‎ + <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple"> + Test Couple + </a> + - ‎ + <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour"> + Test Amour + </a> + </div> + </div> + </li> + <li class="g"> + <h3 class="r"> + <a href="http://www.google.com/images?q=toto"> + <b>This</b> + </a> + </h3> + </li> + <li class="g"> + <h3 class="r"> + <a href="http://www.google.com/search?q=toto"> + <b>This</b> is + </a> + </h3> + </li> + <li class="g"> + <h3 class="r"> + <a href="€"> + <b>This</b> is <b>the</b> + </a> + </h3> + </li> + <li class="g"> + <h3 class="r"> + <a href="/url?q=url"> + <b>This</b> is <b>the</b> + </a> + </h3> + </li> + <p class="_Bmc" style="margin:3px 8px"> + <a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved="> + suggestion <b>title</b> + </a> + </p> + """ + response = mock.Mock(text=html) + results = google.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') + self.assertEqual(results[1]['suggestion'], 'suggestion title') + + html = """ + <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO"> + </li> + """ + response = mock.Mock(text=html) + results = google.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + def test_parse_images(self): + html = """ + <li> + <div> + <a href="http://www.google.com/url?q=http://this.is.the.url/"> + <img style="margin:3px 0;margin-right:6px;padding:0" height="90" + src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0"> + </a> + </div> + </li> + """ + dom = lxml.html.fromstring(html) + results = google.parse_images(dom) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['url'], 'http://this.is.the.url/') + self.assertEqual(results[0]['title'], '') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') diff --git a/searx/tests/engines/test_mediawiki.py b/searx/tests/engines/test_mediawiki.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import mediawiki +from searx.testing import SearxTestCase + + +class TestMediawikiEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = mediawiki.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('wikipedia.org', params['url']) + self.assertIn('fr', params['url']) + + dicto['language'] = 'all' + params = mediawiki.request(query, dicto) + self.assertIn('en', params['url']) + + mediawiki.base_url = "http://test.url/" + mediawiki.search_url = mediawiki.base_url +\ + 'w/api.php?action=query'\ + '&list=search'\ + '&{query}'\ + '&srprop=timestamp'\ + '&format=json'\ + '&sroffset={offset}'\ + '&srlimit={limit}' # noqa + params = mediawiki.request(query, dicto) + self.assertIn('test.url', params['url']) + + def test_response(self): + dicto = defaultdict(dict) + dicto['language'] = 'fr' + mediawiki.base_url = "https://{language}.wikipedia.org/" + + self.assertRaises(AttributeError, mediawiki.response, None) + self.assertRaises(AttributeError, mediawiki.response, []) + self.assertRaises(AttributeError, mediawiki.response, '') + self.assertRaises(AttributeError, mediawiki.response, '[]') + + response = mock.Mock(text='{}', search_params=dicto) + self.assertEqual(mediawiki.response(response), []) + + response = mock.Mock(text='{"data": []}', search_params=dicto) + self.assertEqual(mediawiki.response(response), []) + + json = """ + { + "query-continue": { + "search": { + "sroffset": 1 + } + }, + "query": { + "searchinfo": { + "totalhits": 29721 + }, + "search": [ + { + "ns": 0, + "title": "This is the title étude", + "timestamp": "2014-12-19T17:42:52Z" + } + ] + } + } + """ + response = mock.Mock(text=json, search_params=dicto) + results = mediawiki.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], u'This is the title étude') + self.assertIn('fr.wikipedia.org', results[0]['url']) + self.assertIn('This_is_the_title', results[0]['url']) + self.assertIn('%C3%A9tude', results[0]['url']) + self.assertEqual(results[0]['content'], '') + + json = """ + { + "query-continue": { + "search": { + "sroffset": 1 + } + }, + "query": { + "searchinfo": { + "totalhits": 29721 + }, + "search": [ + ] + } + } + """ + response = mock.Mock(text=json, search_params=dicto) + results = mediawiki.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "query-continue": { + "search": { + "sroffset": 1 + } + }, + "query": { + } + } + """ + response = mock.Mock(text=json, search_params=dicto) + results = mediawiki.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + {"toto":[ + {"id":200,"name":"Artist Name", + "link":"http:\/\/www.mediawiki.com\/artist\/1217","type":"artist"} + ]} + """ + response = mock.Mock(text=json, search_params=dicto) + results = mediawiki.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_openstreetmap.py b/searx/tests/engines/test_openstreetmap.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import openstreetmap +from searx.testing import SearxTestCase + + +class TestOpenstreetmapEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = openstreetmap.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('openstreetmap.org', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, openstreetmap.response, None) + self.assertRaises(AttributeError, openstreetmap.response, []) + self.assertRaises(AttributeError, openstreetmap.response, '') + self.assertRaises(AttributeError, openstreetmap.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(openstreetmap.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(openstreetmap.response(response), []) + + json = """ + [ + { + "place_id": "127732055", + "licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright", + "osm_type": "relation", + "osm_id": "7444", + "boundingbox": [ + "48.8155755", + "48.902156", + "2.224122", + "2.4697602" + ], + "lat": "48.8565056", + "lon": "2.3521334", + "display_name": "This is the title", + "class": "place", + "type": "city", + "importance": 0.96893459932191, + "icon": "https://nominatim.openstreetmap.org/images/mapicons/poi_place_city.p.20.png", + "address": { + "city": "Paris", + "county": "Paris", + "state": "Île-de-France", + "country": "France", + "country_code": "fr" + }, + "geojson": { + "type": "Polygon", + "coordinates": [ + [ + [ + 2.224122, + 48.854199 + ] + ] + ] + } + } + ] + """ + response = mock.Mock(text=json) + results = openstreetmap.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://openstreetmap.org/relation/7444') + self.assertIn('coordinates', results[0]['geojson']) + self.assertEqual(results[0]['geojson']['coordinates'][0][0][0], 2.224122) + self.assertEqual(results[0]['geojson']['coordinates'][0][0][1], 48.854199) + self.assertEqual(results[0]['address'], None) + self.assertIn('48.8155755', results[0]['boundingbox']) + self.assertIn('48.902156', results[0]['boundingbox']) + self.assertIn('2.224122', results[0]['boundingbox']) + self.assertIn('2.4697602', results[0]['boundingbox']) + + json = """ + [ + { + "place_id": "127732055", + "licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright", + "osm_type": "relation", + "osm_id": "7444", + "boundingbox": [ + "48.8155755", + "48.902156", + "2.224122", + "2.4697602" + ], + "lat": "48.8565056", + "lon": "2.3521334", + "display_name": "This is the title", + "class": "tourism", + "type": "city", + "importance": 0.96893459932191, + "icon": "https://nominatim.openstreetmap.org/images/mapicons/poi_place_city.p.20.png", + "address": { + "city": "Paris", + "county": "Paris", + "state": "Île-de-France", + "country": "France", + "country_code": "fr", + "address29": "Address" + }, + "geojson": { + "type": "Polygon", + "coordinates": [ + [ + [ + 2.224122, + 48.854199 + ] + ] + ] + } + }, + { + "place_id": "127732055", + "licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright", + "osm_type": "relation", + "osm_id": "7444", + "boundingbox": [ + "48.8155755", + "48.902156", + "2.224122", + "2.4697602" + ], + "lat": "48.8565056", + "lon": "2.3521334", + "display_name": "This is the title", + "class": "tourism", + "type": "city", + "importance": 0.96893459932191, + "icon": "https://nominatim.openstreetmap.org/images/mapicons/poi_place_city.p.20.png", + "address": { + "city": "Paris", + "county": "Paris", + "state": "Île-de-France", + "country": "France", + "postcode": 75000, + "country_code": "fr" + }, + "geojson": { + "type": "Polygon", + "coordinates": [ + [ + [ + 2.224122, + 48.854199 + ] + ] + ] + } + }, + { + "place_id": "127732055", + "licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright", + "osm_type": "node", + "osm_id": "7444", + "boundingbox": [ + "48.8155755", + "48.902156", + "2.224122", + "2.4697602" + ], + "lat": "48.8565056", + "lon": "2.3521334", + "display_name": "This is the title", + "class": "tourism", + "type": "city", + "importance": 0.96893459932191, + "icon": "https://nominatim.openstreetmap.org/images/mapicons/poi_place_city.p.20.png", + "address": { + "city": "Paris", + "county": "Paris", + "state": "Île-de-France", + "country": "France", + "country_code": "fr", + "address29": "Address" + } + } + ] + """ + response = mock.Mock(text=json) + results = openstreetmap.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 3) + self.assertIn('48.8565056', results[2]['geojson']['coordinates']) + self.assertIn('2.3521334', results[2]['geojson']['coordinates']) diff --git a/searx/tests/engines/test_photon.py b/searx/tests/engines/test_photon.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import photon +from searx.testing import SearxTestCase + + +class TestPhotonEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'all' + params = photon.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('photon.komoot.de', params['url']) + + dicto['language'] = 'all' + params = photon.request(query, dicto) + self.assertNotIn('lang', params['url']) + + dicto['language'] = 'al' + params = photon.request(query, dicto) + self.assertNotIn('lang', params['url']) + + dicto['language'] = 'fr' + params = photon.request(query, dicto) + self.assertIn('fr', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, photon.response, None) + self.assertRaises(AttributeError, photon.response, []) + self.assertRaises(AttributeError, photon.response, '') + self.assertRaises(AttributeError, photon.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(photon.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(photon.response(response), []) + + json = """ + { + "features": [ + { + "properties": { + "osm_key": "waterway", + "extent": [ + -1.4508446, + 51.1614997, + -1.4408036, + 51.1525635 + ], + "name": "This is the title", + "state": "England", + "osm_id": 114823817, + "osm_type": "W", + "osm_value": "river", + "city": "Test Valley", + "country": "United Kingdom" + }, + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + -1.4458571, + 51.1576661 + ] + } + }, + { + "properties": { + "osm_key": "place", + "street": "Rue", + "state": "Ile-de-France", + "osm_id": 129211377, + "osm_type": "R", + "housenumber": "10", + "postcode": "75011", + "osm_value": "house", + "city": "Paris", + "country": "France" + }, + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + 2.3725025, + 48.8654481 + ] + } + }, + { + "properties": { + "osm_key": "amenity", + "street": "Allée", + "name": "Bibliothèque", + "state": "Ile-de-France", + "osm_id": 1028573132, + "osm_type": "N", + "postcode": "75001", + "osm_value": "library", + "city": "Paris", + "country": "France" + }, + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + 2.3445634, + 48.862494 + ] + } + }, + { + "properties": { + "osm_key": "amenity", + "osm_id": 1028573132, + "osm_type": "Y", + "postcode": "75001", + "osm_value": "library", + "city": "Paris", + "country": "France" + }, + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + 2.3445634, + 48.862494 + ] + } + }, + { + } + ], + "type": "FeatureCollection" + } + """ + response = mock.Mock(text=json) + results = photon.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 3) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['longitude'], -1.4458571) + self.assertEqual(results[0]['latitude'], 51.1576661) + self.assertIn(-1.4508446, results[0]['boundingbox']) + self.assertIn(51.1614997, results[0]['boundingbox']) + self.assertIn(-1.4408036, results[0]['boundingbox']) + self.assertIn(51.1525635, results[0]['boundingbox']) + self.assertIn('type', results[0]['geojson']) + self.assertEqual(results[0]['geojson']['type'], 'Point') + self.assertEqual(results[0]['address'], None) + self.assertEqual(results[0]['osm']['type'], 'way') + self.assertEqual(results[0]['osm']['id'], 114823817) + self.assertEqual(results[0]['url'], 'https://openstreetmap.org/way/114823817') + self.assertEqual(results[1]['osm']['type'], 'relation') + self.assertEqual(results[2]['address']['name'], u'Bibliothèque') + self.assertEqual(results[2]['address']['house_number'], None) + self.assertEqual(results[2]['address']['locality'], 'Paris') + self.assertEqual(results[2]['address']['postcode'], '75001') + self.assertEqual(results[2]['address']['country'], 'France') + self.assertEqual(results[2]['osm']['type'], 'node') diff --git a/searx/tests/engines/test_startpage.py b/searx/tests/engines/test_startpage.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import startpage +from searx.testing import SearxTestCase + + +class TestStartpageEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = startpage.request(query, dicto) + self.assertIn('url', params) + self.assertIn('startpage.com', params['url']) + self.assertIn('data', params) + self.assertIn('query', params['data']) + self.assertIn(query, params['data']['query']) + self.assertIn('with_language', params['data']) + self.assertIn('lang_fr', params['data']['with_language']) + + dicto['language'] = 'all' + params = startpage.request(query, dicto) + self.assertNotIn('with_language', params['data']) + + def test_response(self): + self.assertRaises(AttributeError, startpage.response, None) + self.assertRaises(AttributeError, startpage.response, []) + self.assertRaises(AttributeError, startpage.response, '') + self.assertRaises(AttributeError, startpage.response, '[]') + + response = mock.Mock(content='<html></html>') + self.assertEqual(startpage.response(response), []) + + html = """ + <div class='result' style=' *width : auto; *margin-right : 10%;'> + <h3> + <a href='http://this.should.be.the.link/' id='title_2' name='title_2' > + This should be the title + </a> + <span id='title_stars_2' name='title_stars_2'> </span> + </h3> + <p class='desc'> + This should be the content. + </p> + <p> + <span class='url'>www.speed<b>test</b>.net/fr/ + </span> + - + <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata=" + class='proxy'> + Navigation avec Ixquick Proxy + </A> + - + <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid= + &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0= + &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'> + Mis en surbrillance + </A> + </p> + </div> + """ + response = mock.Mock(content=html) + results = startpage.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This should be the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') + + html = """ + <div class='result' style=' *width : auto; *margin-right : 10%;'> + <h3> + <a href='http://www.google.com/aclk?sa=l&ai=C' id='title_2' name='title_2' > + This should be the title + </a> + <span id='title_stars_2' name='title_stars_2'> </span> + </h3> + <p class='desc'> + This should be the content. + </p> + <p> + <span class='url'>www.speed<b>test</b>.net/fr/ + </span> + - + <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata=" + class='proxy'> + Navigation avec Ixquick Proxy + </A> + - + <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid= + &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0= + &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'> + Mis en surbrillance + </A> + </p> + </div> + <div class='result' style=' *width : auto; *margin-right : 10%;'> + <h3> + <span id='title_stars_2' name='title_stars_2'> </span> + </h3> + <p class='desc'> + This should be the content. + </p> + <p> + <span class='url'>www.speed<b>test</b>.net/fr/ + </span> + </p> + </div> + <div class='result' style=' *width : auto; *margin-right : 10%;'> + <h3> + <a href='http://this.should.be.the.link/' id='title_2' name='title_2' > + This should be the title + </a> + <span id='title_stars_2' name='title_stars_2'> </span> + </h3> + <p> + <span class='url'>www.speed<b>test</b>.net/fr/ + </span> + - + <A class="proxy" id="proxy_link" HREF="https://ixquick-proxy.com/do/spg/proxy?ep=&edata=&ek=&ekdata=" + class='proxy'> + Navigation avec Ixquick Proxy + </A> + - + <A HREF="https://ixquick-proxy.com/do/spg/highlight.pl?l=francais&c=hf&cat=web&q=test&rl=NONE&rid= + &hlq=https://startpage.com/do/search&mtabp=-1&mtcmd=process_search&mtlanguage=francais&mtengine0= + &mtcat=web&u=http:%2F%2Fwww.speedtest.net%2Ffr%2F" class='proxy'> + Mis en surbrillance + </A> + </p> + </div> + """ + response = mock.Mock(content=html) + results = startpage.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['content'], '') diff --git a/searx/tests/engines/test_subtitleseeker.py b/searx/tests/engines/test_subtitleseeker.py @@ -0,0 +1,169 @@ +from collections import defaultdict +import mock +from searx.engines import subtitleseeker +from searx.testing import SearxTestCase + + +class TestSubtitleseekerEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = subtitleseeker.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('subtitleseeker.com' in params['url']) + + def test_response(self): + dicto = defaultdict(dict) + dicto['language'] = 'fr_FR' + response = mock.Mock(search_params=dicto) + + self.assertRaises(AttributeError, subtitleseeker.response, None) + self.assertRaises(AttributeError, subtitleseeker.response, []) + self.assertRaises(AttributeError, subtitleseeker.response, '') + self.assertRaises(AttributeError, subtitleseeker.response, '[]') + + response = mock.Mock(text='<html></html>', search_params=dicto) + self.assertEqual(subtitleseeker.response(response), []) + + html = """ + <div class="boxRows"> + <div class="boxRowsInner" style="width:600px;"> + <img src="http://static.subtitleseeker.com/images/movie.gif" + style="width:16px; height:16px;" class="icon"> + <a href="http://this.is.the.url/" + class="blue" title="Title subtitle" > + This is the Title + </a> + <br><br> + <span class="f10b grey-dark arial" style="padding:0px 0px 5px 20px"> + "Alternative Title" + </span> + </div> + <div class="boxRowsInner f12b red" style="width:70px;"> + 1998 + </div> + <div class="boxRowsInner grey-web f12" style="width:120px;"> + <img src="http://static.subtitleseeker.com/images/basket_put.png" + style="width:16px; height:16px;" class="icon"> + 1039 Subs + </div> + <div class="boxRowsInner grey-web f10" style="width:130px;"> + <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" + style="width:16px; height:16px;" class="icon"> + 1 hours ago + </div> + <div class="clear"></div> + </div> + """ + response = mock.Mock(text=html, search_params=dicto) + results = subtitleseeker.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the Title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/French/') + self.assertIn('1998', results[0]['content']) + self.assertIn('1039 Subs', results[0]['content']) + self.assertIn('Alternative Title', results[0]['content']) + + html = """ + <div class="boxRows"> + <div class="boxRowsInner" style="width:600px;"> + <img src="http://static.subtitleseeker.com/images/movie.gif" + style="width:16px; height:16px;" class="icon"> + <a href="http://this.is.the.url/" + class="blue" title="Title subtitle" > + This is the Title + </a> + </div> + <div class="boxRowsInner f12b red" style="width:70px;"> + 1998 + </div> + <div class="boxRowsInner grey-web f12" style="width:120px;"> + <img src="http://static.subtitleseeker.com/images/basket_put.png" + style="width:16px; height:16px;" class="icon"> + 1039 Subs + </div> + <div class="boxRowsInner grey-web f10" style="width:130px;"> + <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" + style="width:16px; height:16px;" class="icon"> + 1 hours ago + </div> + <div class="clear"></div> + </div> + """ + dicto['language'] = 'all' + response = mock.Mock(text=html, search_params=dicto) + results = subtitleseeker.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the Title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/') + self.assertIn('1998', results[0]['content']) + self.assertIn('1039 Subs', results[0]['content']) + + html = """ + <div class="boxRows"> + <div class="boxRowsInner" style="width:600px;"> + <img src="http://static.subtitleseeker.com/images/movie.gif" + style="width:16px; height:16px;" class="icon"> + <a href="http://this.is.the.url/" + class="blue" title="Title subtitle" > + This is the Title + </a> + </div> + <div class="boxRowsInner f12b red" style="width:70px;"> + 1998 + </div> + <div class="boxRowsInner grey-web f12" style="width:120px;"> + <img src="http://static.subtitleseeker.com/images/basket_put.png" + style="width:16px; height:16px;" class="icon"> + 1039 Subs + </div> + <div class="boxRowsInner grey-web f10" style="width:130px;"> + <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" + style="width:16px; height:16px;" class="icon"> + 1 hours ago + </div> + <div class="clear"></div> + </div> + """ + subtitleseeker.language = 'English' + response = mock.Mock(text=html, search_params=dicto) + results = subtitleseeker.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the Title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/English/') + self.assertIn('1998', results[0]['content']) + self.assertIn('1039 Subs', results[0]['content']) + + html = """ + <div class="boxRowsInner" style="width:600px;"> + <img src="http://static.subtitleseeker.com/images/movie.gif" + style="width:16px; height:16px;" class="icon"> + <a href="http://this.is.the.url/" + class="blue" title="Title subtitle" > + This is the Title + </a> + </div> + <div class="boxRowsInner f12b red" style="width:70px;"> + 1998 + </div> + <div class="boxRowsInner grey-web f12" style="width:120px;"> + <img src="http://static.subtitleseeker.com/images/basket_put.png" + style="width:16px; height:16px;" class="icon"> + 1039 Subs + </div> + <div class="boxRowsInner grey-web f10" style="width:130px;"> + <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" + style="width:16px; height:16px;" class="icon"> + 1 hours ago + </div> + """ + response = mock.Mock(text=html, search_params=dicto) + results = subtitleseeker.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_twitter.py b/searx/tests/engines/test_twitter.py @@ -0,0 +1,502 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import twitter +from searx.testing import SearxTestCase + + +class TestTwitterEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = twitter.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('twitter.com', params['url']) + self.assertIn('cookies', params) + self.assertIn('lang', params['cookies']) + self.assertIn('fr', params['cookies']['lang']) + + dicto['language'] = 'all' + params = twitter.request(query, dicto) + self.assertIn('cookies', params) + self.assertIn('lang', params['cookies']) + self.assertIn('en', params['cookies']['lang']) + + def test_response(self): + self.assertRaises(AttributeError, twitter.response, None) + self.assertRaises(AttributeError, twitter.response, []) + self.assertRaises(AttributeError, twitter.response, '') + self.assertRaises(AttributeError, twitter.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(twitter.response(response), []) + + html = """ + <li class="js-stream-item stream-item stream-item expanding-stream-item" data-item-id="563005573290287105" + id="stream-item-tweet-563005573290287105" data-item-type="tweet"> + <div class="tweet original-tweet js-stream-tweet js-actionable-tweet js-profile-popup-actionable + js-original-tweet has-cards has-native-media" data-tweet-id="563005573290287105" data-disclosure-type="" + data-item-id="563005573290287105" data-screen-name="Jalopnik" data-name="Jalopnik" + data-user-id="3060631" data-has-native-media="true" data-has-cards="true" data-card-type="photo" + data-expanded-footer="&lt;div class=&quot;js-tweet-details-fixer + tweet-details-fixer&quot;&gt;&#10;&#10;&#10; + &lt;div class=&quot;cards-media-container js-media-container&quot;&gt;&lt;div + data-card-url=&quot;//twitter.com/Jalopnik/status/563005573290287105/photo/1&quot; data-card-type=&quot; + photo&quot; class=&quot;cards-base cards-multimedia&quot; data-element-context=&quot;platform_photo_card + &quot;&gt;&#10;&#10;&#10; &lt;a class=&quot;media media-thumbnail twitter-timeline-link is-preview + &quot; data-url=&quot;https://pbs.twimg.com/media/B9Aylf5IMAAuziP.jpg:large&quot; + data-resolved-url-large=&quot;https://pbs.twimg.com/media/B9Aylf5IMAAuziP.jpg:large&quot; + href=&quot;//twitter.com/Jalopnik/status/563005573290287105/photo/1&quot;&gt;&#10; + &lt;div class=&quot;&quot;&gt;&#10; &lt;img src=&quot; + https://pbs.twimg.com/media/B9Aylf5IMAAuziP.jpg&quot; + alt=&quot;Embedded image permalink&quot; width=&quot;636&quot; height=&quot;309&quot;&gt;&#10; + &lt;/div&gt;&#10;&#10; &lt;/a&gt;&#10;&#10; &lt;div class=&quot;cards-content&quot;&gt;&#10; + &lt;div class=&quot;byline&quot;&gt;&#10; &#10; &lt;/div&gt;&#10; &#10; &lt;/div&gt;&#10; + &#10;&lt;/div&gt;&#10;&#10;&#10;&#10;&#10;&lt;/div&gt;&#10;&#10;&#10;&#10; &lt;div + class=&quot;js-machine-translated-tweet-container&quot;&gt;&lt;/div&gt;&#10; &lt;div + class=&quot;js-tweet-stats-container tweet-stats-container &quot;&gt;&#10; &lt;/div&gt;&#10;&#10; + &lt;div class=&quot;client-and-actions&quot;&gt;&#10; &lt;span class=&quot;metadata&quot;&gt;&#10; + &lt;span&gt;5:06 PM - 4 Feb 2015&lt;/span&gt;&#10;&#10; &amp;middot; &lt;a + class=&quot;permalink-link js-permalink js-nav&quot; href=&quot;/Jalopnik/status/563005573290287105 + &quot;tabindex=&quot;-1&quot;&gt;Details&lt;/a&gt;&#10; &#10;&#10; &#10; &#10; + &#10;&#10; &lt;/span&gt;&#10;&lt;/div&gt;&#10;&#10;&#10;&lt;/div&gt;&#10;" data-you-follow="false" + data-you-block="false"> + <div class="context"> + </div> + <div class="content"> + <div class="stream-item-header"> + <a class="account-group js-account-group js-action-profile js-user-profile-link js-nav" + href="/Jalopnik" data-user-id="3060631"> + <img class="avatar js-action-profile-avatar" + src="https://pbs.twimg.com/profile_images/2976430168/5cd4a59_bigger.jpeg" alt=""> + <strong class="fullname js-action-profile-name show-popup-with-id" data-aria-label-part> + Jalopnik + </strong> + <span>&rlm;</span> + <span class="username js-action-profile-name" data-aria-label-part> + <s>@</s><b>TitleName</b> + </span> + </a> + <small class="time"> + <a href="/this.is.the.url" + class="tweet-timestamp js-permalink js-nav js-tooltip" title="5:06 PM - 4 Feb 2015" > + <span class="u-hiddenVisually" data-aria-label-part="last">17 minutes ago</span> + </a> + </small> + </div> + <p class="js-tweet-text tweet-text" lang="en" data-aria-label-part="0"> + This is the content étude à€ + <a href="http://t.co/nRWsqQAwBL" rel="nofollow" dir="ltr" + data-expanded-url="http://jalo.ps/ReMENu4" class="twitter-timeline-link" + target="_blank" title="http://jalo.ps/ReMENu4" > + <span class="tco-ellipsis"> + </span> + <span class="invisible">http://</span><span class="js-display-url">link.in.tweet</span> + <span class="invisible"></span> + <span class="tco-ellipsis"> + <span class="invisible">&nbsp;</span> + </span> + </a> + <a href="http://t.co/rbFsfeE0l3" class="twitter-timeline-link u-hidden" + data-pre-embedded="true" dir="ltr"> + pic.twitter.com/rbFsfeE0l3 + </a> + </p> + <div class="expanded-content js-tweet-details-dropdown"> + </div> + <div class="stream-item-footer"> + <a class="details with-icn js-details" href="/Jalopnik/status/563005573290287105"> + <span class="Icon Icon--photo"> + </span> + <b> + <span class="expand-stream-item js-view-details"> + View photo + </span> + <span class="collapse-stream-item js-hide-details"> + Hide photo + </span> + </b> + </a> + <span class="ProfileTweet-action--reply u-hiddenVisually"> + <span class="ProfileTweet-actionCount" aria-hidden="true" data-tweet-stat-count="0"> + <span class="ProfileTweet-actionCountForAria" >0 replies</span> + </span> + </span> + <span class="ProfileTweet-action--retweet u-hiddenVisually"> + <span class="ProfileTweet-actionCount" data-tweet-stat-count="8"> + <span class="ProfileTweet-actionCountForAria" data-aria-label-part>8 retweets</span> + </span> + </span> + <span class="ProfileTweet-action--favorite u-hiddenVisually"> + <span class="ProfileTweet-actionCount" data-tweet-stat-count="14"> + <span class="ProfileTweet-actionCountForAria" data-aria-label-part>14 favorites</span> + </span> + </span> + <div role="group" aria-label="Tweet actions" class="ProfileTweet-actionList u-cf js-actions"> + <div class="ProfileTweet-action ProfileTweet-action--reply"> + <button class="ProfileTweet-actionButton u-textUserColorHover js-actionButton + js-actionReply" data-modal="ProfileTweet-reply" type="button" title="Reply"> + <span class="Icon Icon--reply"> + </span> + <span class="u-hiddenVisually">Reply</span> + <span class="ProfileTweet-actionCount u-textUserColorHover + ProfileTweet-actionCount--isZero"> + <span class="ProfileTweet-actionCountForPresentation" aria-hidden="true"> + </span> + </span> + </button> + </div> + <div class="ProfileTweet-action ProfileTweet-action--retweet js-toggleState js-toggleRt"> + <button class="ProfileTweet-actionButton js-actionButton js-actionRetweet js-tooltip" + title="Retweet" data-modal="ProfileTweet-retweet" type="button"> + <span class="Icon Icon--retweet"> + </span> + <span class="u-hiddenVisually">Retweet</span> + <span class="ProfileTweet-actionCount"> + <span class="ProfileTweet-actionCountForPresentation">8</span> + </span> + </button> + <button class="ProfileTweet-actionButtonUndo js-actionButton js-actionRetweet" + data-modal="ProfileTweet-retweet" title="Undo retweet" type="button"> + <span class="Icon Icon--retweet"> + </span> + <span class="u-hiddenVisually">Retweeted</span> + <span class="ProfileTweet-actionCount"> + <span class="ProfileTweet-actionCountForPresentation">8</span> + </span> + </button> + </div> + <div class="ProfileTweet-action ProfileTweet-action--favorite js-toggleState"> + <button class="ProfileTweet-actionButton js-actionButton js-actionFavorite js-tooltip" + title="Favorite" type="button"> + <span class="Icon Icon--favorite"> + </span> + <span class="u-hiddenVisually">Favorite</span> + <span class="ProfileTweet-actionCount"> + <span class="ProfileTweet-actionCountForPresentation">14</span> + </span> + </button> + <button class="ProfileTweet-actionButtonUndo u-linkClean js-actionButton + js-actionFavorite" title="Undo favorite" type="button"> + <span class="Icon Icon--favorite"> + </span> + <span class="u-hiddenVisually">Favorited</span> + <span class="ProfileTweet-actionCount"> + <span class="ProfileTweet-actionCountForPresentation"> + 14 + </span> + </span> + </button> + </div> + <div class="ProfileTweet-action ProfileTweet-action--more js-more-ProfileTweet-actions"> + <div class="dropdown"> + <button class="ProfileTweet-actionButton u-textUserColorHover dropdown-toggle + js-tooltip js-dropdown-toggle" type="button" title="More"> + <span class="Icon Icon--dots"> + </span> + <span class="u-hiddenVisually">More</span> + </button> + <div class="dropdown-menu"> + <div class="dropdown-caret"> + <div class="caret-outer"> + </div> + <div class="caret-inner"> + </div> + </div> + <ul> + <li class="share-via-dm js-actionShareViaDM" data-nav="share_tweet_dm"> + <button type="button" class="dropdown-link"> + Share via Direct Message + </button> + </li> + <li class="embed-link js-actionEmbedTweet" data-nav="embed_tweet"> + <button type="button" class="dropdown-link"> + Embed Tweet + </button> + </li> + <li class="mute-user-item pretty-link"> + <button type="button" class="dropdown-link"> + Mute + </button> + </li> + <li class="unmute-user-item pretty-link"> + <button type="button" class="dropdown-link"> + Unmute + </button> + </li> + <li class="block-or-report-link js-actionBlockOrReport" + data-nav="block_or_report"> + <button type="button" class="dropdown-link"> + Block or report + </button> + </li> + </ul> + </div> + </div> + </div> + </div> + </div> + </div> + </div> + </li> + """ + response = mock.Mock(text=html) + results = twitter.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], '@TitleName') + self.assertEqual(results[0]['url'], 'https://twitter.com/this.is.the.url') + self.assertIn(u'This is the content', results[0]['content']) + # self.assertIn(u'This is the content étude à€', results[0]['content']) + + html = """ + <li class="js-stream-item stream-item stream-item expanding-stream-item" data-item-id="563005573290287105" + id="stream-item-tweet-563005573290287105" data-item-type="tweet"> + <div class="tweet original-tweet js-stream-tweet js-actionable-tweet js-profile-popup-actionable + js-original-tweet has-cards has-native-media" data-tweet-id="563005573290287105" data-disclosure-type="" + data-item-id="563005573290287105" data-screen-name="Jalopnik" data-name="Jalopnik" + data-user-id="3060631" data-has-native-media="true" data-has-cards="true" data-card-type="photo" + data-expanded-footer="&lt;div class=&quot;js-tweet-details-fixer + tweet-details-fixer&quot;&gt;&#10;&#10;&#10; + &lt;div class=&quot;cards-media-container js-media-container&quot;&gt;&lt;div + data-card-url=&quot;//twitter.com/Jalopnik/status/563005573290287105/photo/1&quot; data-card-type=&quot; + photo&quot; class=&quot;cards-base cards-multimedia&quot; data-element-context=&quot;platform_photo_card + &quot;&gt;&#10;&#10;&#10; &lt;a class=&quot;media media-thumbnail twitter-timeline-link is-preview + &quot; data-url=&quot;https://pbs.twimg.com/media/B9Aylf5IMAAuziP.jpg:large&quot; + data-resolved-url-large=&quot;https://pbs.twimg.com/media/B9Aylf5IMAAuziP.jpg:large&quot; + href=&quot;//twitter.com/Jalopnik/status/563005573290287105/photo/1&quot;&gt;&#10; + &lt;div class=&quot;&quot;&gt;&#10; &lt;img src=&quot; + https://pbs.twimg.com/media/B9Aylf5IMAAuziP.jpg&quot; + alt=&quot;Embedded image permalink&quot; width=&quot;636&quot; height=&quot;309&quot;&gt;&#10; + &lt;/div&gt;&#10;&#10; &lt;/a&gt;&#10;&#10; &lt;div class=&quot;cards-content&quot;&gt;&#10; + &lt;div class=&quot;byline&quot;&gt;&#10; &#10; &lt;/div&gt;&#10; &#10; &lt;/div&gt;&#10; + &#10;&lt;/div&gt;&#10;&#10;&#10;&#10;&#10;&lt;/div&gt;&#10;&#10;&#10;&#10; &lt;div + class=&quot;js-machine-translated-tweet-container&quot;&gt;&lt;/div&gt;&#10; &lt;div + class=&quot;js-tweet-stats-container tweet-stats-container &quot;&gt;&#10; &lt;/div&gt;&#10;&#10; + &lt;div class=&quot;client-and-actions&quot;&gt;&#10; &lt;span class=&quot;metadata&quot;&gt;&#10; + &lt;span&gt;5:06 PM - 4 Feb 2015&lt;/span&gt;&#10;&#10; &amp;middot; &lt;a + class=&quot;permalink-link js-permalink js-nav&quot; href=&quot;/Jalopnik/status/563005573290287105 + &quot;tabindex=&quot;-1&quot;&gt;Details&lt;/a&gt;&#10; &#10;&#10; &#10; &#10; + &#10;&#10; &lt;/span&gt;&#10;&lt;/div&gt;&#10;&#10;&#10;&lt;/div&gt;&#10;" data-you-follow="false" + data-you-block="false"> + <div class="context"> + </div> + <div class="content"> + <div class="stream-item-header"> + <a class="account-group js-account-group js-action-profile js-user-profile-link js-nav" + href="/Jalopnik" data-user-id="3060631"> + <img class="avatar js-action-profile-avatar" + src="https://pbs.twimg.com/profile_images/2976430168/5cd4a59_bigger.jpeg" alt=""> + <strong class="fullname js-action-profile-name show-popup-with-id" data-aria-label-part> + Jalopnik + </strong> + <span>&rlm;</span> + <span class="username js-action-profile-name" data-aria-label-part> + <s>@</s><b>TitleName</b> + </span> + </a> + <small class="time"> + <a href="/this.is.the.url" + class="tweet-timestamp js-permalink js-nav js-tooltip" title="5:06 PM - 4 Feb 2015" > + <span class="_timestamp js-short-timestamp js-relative-timestamp" data-time="1423065963" + data-time-ms="1423065963000" data-long-form="true" aria-hidden="true"> + 17m + </span> + <span class="u-hiddenVisually" data-aria-label-part="last">17 minutes ago</span> + </a> + </small> + </div> + <p class="js-tweet-text tweet-text" lang="en" data-aria-label-part="0"> + This is the content étude à€ + <a href="http://t.co/nRWsqQAwBL" rel="nofollow" dir="ltr" + data-expanded-url="http://jalo.ps/ReMENu4" class="twitter-timeline-link" + target="_blank" title="http://jalo.ps/ReMENu4" > + <span class="tco-ellipsis"> + </span> + <span class="invisible">http://</span><span class="js-display-url">link.in.tweet</span> + <span class="invisible"></span> + <span class="tco-ellipsis"> + <span class="invisible">&nbsp;</span> + </span> + </a> + <a href="http://t.co/rbFsfeE0l3" class="twitter-timeline-link u-hidden" + data-pre-embedded="true" dir="ltr"> + pic.twitter.com/rbFsfeE0l3 + </a> + </p> + <div class="expanded-content js-tweet-details-dropdown"> + </div> + <div class="stream-item-footer"> + <a class="details with-icn js-details" href="/Jalopnik/status/563005573290287105"> + <span class="Icon Icon--photo"> + </span> + <b> + <span class="expand-stream-item js-view-details"> + View photo + </span> + <span class="collapse-stream-item js-hide-details"> + Hide photo + </span> + </b> + </a> + <span class="ProfileTweet-action--reply u-hiddenVisually"> + <span class="ProfileTweet-actionCount" aria-hidden="true" data-tweet-stat-count="0"> + <span class="ProfileTweet-actionCountForAria" >0 replies</span> + </span> + </span> + <span class="ProfileTweet-action--retweet u-hiddenVisually"> + <span class="ProfileTweet-actionCount" data-tweet-stat-count="8"> + <span class="ProfileTweet-actionCountForAria" data-aria-label-part>8 retweets</span> + </span> + </span> + <span class="ProfileTweet-action--favorite u-hiddenVisually"> + <span class="ProfileTweet-actionCount" data-tweet-stat-count="14"> + <span class="ProfileTweet-actionCountForAria" data-aria-label-part>14 favorites</span> + </span> + </span> + <div role="group" aria-label="Tweet actions" class="ProfileTweet-actionList u-cf js-actions"> + <div class="ProfileTweet-action ProfileTweet-action--reply"> + <button class="ProfileTweet-actionButton u-textUserColorHover js-actionButton + js-actionReply" data-modal="ProfileTweet-reply" type="button" title="Reply"> + <span class="Icon Icon--reply"> + </span> + <span class="u-hiddenVisually">Reply</span> + <span class="ProfileTweet-actionCount u-textUserColorHover + ProfileTweet-actionCount--isZero"> + <span class="ProfileTweet-actionCountForPresentation" aria-hidden="true"> + </span> + </span> + </button> + </div> + <div class="ProfileTweet-action ProfileTweet-action--retweet js-toggleState js-toggleRt"> + <button class="ProfileTweet-actionButton js-actionButton js-actionRetweet js-tooltip" + title="Retweet" data-modal="ProfileTweet-retweet" type="button"> + <span class="Icon Icon--retweet"> + </span> + <span class="u-hiddenVisually">Retweet</span> + <span class="ProfileTweet-actionCount"> + <span class="ProfileTweet-actionCountForPresentation">8</span> + </span> + </button> + <button class="ProfileTweet-actionButtonUndo js-actionButton js-actionRetweet" + data-modal="ProfileTweet-retweet" title="Undo retweet" type="button"> + <span class="Icon Icon--retweet"> + </span> + <span class="u-hiddenVisually">Retweeted</span> + <span class="ProfileTweet-actionCount"> + <span class="ProfileTweet-actionCountForPresentation">8</span> + </span> + </button> + </div> + <div class="ProfileTweet-action ProfileTweet-action--favorite js-toggleState"> + <button class="ProfileTweet-actionButton js-actionButton js-actionFavorite js-tooltip" + title="Favorite" type="button"> + <span class="Icon Icon--favorite"> + </span> + <span class="u-hiddenVisually">Favorite</span> + <span class="ProfileTweet-actionCount"> + <span class="ProfileTweet-actionCountForPresentation">14</span> + </span> + </button> + <button class="ProfileTweet-actionButtonUndo u-linkClean js-actionButton + js-actionFavorite" title="Undo favorite" type="button"> + <span class="Icon Icon--favorite"> + </span> + <span class="u-hiddenVisually">Favorited</span> + <span class="ProfileTweet-actionCount"> + <span class="ProfileTweet-actionCountForPresentation"> + 14 + </span> + </span> + </button> + </div> + <div class="ProfileTweet-action ProfileTweet-action--more js-more-ProfileTweet-actions"> + <div class="dropdown"> + <button class="ProfileTweet-actionButton u-textUserColorHover dropdown-toggle + js-tooltip js-dropdown-toggle" type="button" title="More"> + <span class="Icon Icon--dots"> + </span> + <span class="u-hiddenVisually">More</span> + </button> + <div class="dropdown-menu"> + <div class="dropdown-caret"> + <div class="caret-outer"> + </div> + <div class="caret-inner"> + </div> + </div> + <ul> + <li class="share-via-dm js-actionShareViaDM" data-nav="share_tweet_dm"> + <button type="button" class="dropdown-link"> + Share via Direct Message + </button> + </li> + <li class="embed-link js-actionEmbedTweet" data-nav="embed_tweet"> + <button type="button" class="dropdown-link"> + Embed Tweet + </button> + </li> + <li class="mute-user-item pretty-link"> + <button type="button" class="dropdown-link"> + Mute + </button> + </li> + <li class="unmute-user-item pretty-link"> + <button type="button" class="dropdown-link"> + Unmute + </button> + </li> + <li class="block-or-report-link js-actionBlockOrReport" + data-nav="block_or_report"> + <button type="button" class="dropdown-link"> + Block or report + </button> + </li> + </ul> + </div> + </div> + </div> + </div> + </div> + </div> + </div> + </li> + """ + response = mock.Mock(text=html) + results = twitter.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], '@TitleName') + self.assertEqual(results[0]['url'], 'https://twitter.com/this.is.the.url') + self.assertIn(u'This is the content', results[0]['content']) + + html = """ + <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO"> + <div Class="sa_mc"> + <div class="sb_tlst"> + <h2> + <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1"> + <strong>This</strong> should be the title</a> + </h2> + </div> + <div class="sb_meta"> + <cite> + <strong>this</strong>.meta.com</cite> + <span class="c_tlbxTrg"> + <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1"> + </span> + </span> + </div> + <p> + <strong>This</strong> should be the content.</p> + </div> + </li> + """ + response = mock.Mock(text=html) + results = twitter.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_yacy.py b/searx/tests/engines/test_yacy.py @@ -0,0 +1,96 @@ +from collections import defaultdict +import mock +from searx.engines import yacy +from searx.testing import SearxTestCase + + +class TestYacyEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = yacy.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('localhost', params['url']) + self.assertIn('fr', params['url']) + + dicto['language'] = 'all' + params = yacy.request(query, dicto) + self.assertIn('url', params) + self.assertNotIn('lr=lang_', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, yacy.response, None) + self.assertRaises(AttributeError, yacy.response, []) + self.assertRaises(AttributeError, yacy.response, '') + self.assertRaises(AttributeError, yacy.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(yacy.response(response), []) + + response = mock.Mock(text='{"data": []}') + self.assertEqual(yacy.response(response), []) + + json = """ + { + "channels": [ + { + "title": "YaCy P2P-Search for test", + "description": "Search for test", + "link": "http://search.yacy.de:7001/yacysearch.html?query=test&amp;resource=global&amp;contentdom=0", + "image": { + "url": "http://search.yacy.de:7001/env/grafics/yacy.png", + "title": "Search for test", + "link": "http://search.yacy.de:7001/yacysearch.html?query=test&amp;resource=global&amp;contentdom=0" + }, + "totalResults": "249", + "startIndex": "0", + "itemsPerPage": "5", + "searchTerms": "test", + "items": [ + { + "title": "This is the title", + "link": "http://this.is.the.url", + "code": "", + "description": "This should be the content", + "pubDate": "Sat, 08 Jun 2013 02:00:00 +0200", + "size": "44213", + "sizename": "43 kbyte", + "guid": "lzh_1T_5FP-A", + "faviconCode": "XTS4uQ_5FP-A", + "host": "www.gamestar.de", + "path": "/spiele/city-of-heroes-freedom/47019.html", + "file": "47019.html", + "urlhash": "lzh_1T_5FP-A", + "ranking": "0.20106804" + }, + { + "title": "This is the title2", + "icon": "/ViewImage.png?maxwidth=96&amp;maxheight=96&amp;code=7EbAbW6BpPOA", + "image": "http://image.url/image.png", + "cache": "/ViewImage.png?quadratic=&amp;url=http://golem.ivwbox.de/cgi-bin/ivw/CP/G_INET?d=14071378", + "url": "http://this.is.the.url", + "urlhash": "7EbAbW6BpPOA", + "host": "www.golem.de", + "width": "-1", + "height": "-1" + } + ] + } + ] + } + """ + response = mock.Mock(text=json) + results = yacy.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url') + self.assertEqual(results[0]['content'], 'This should be the content') + self.assertEqual(results[1]['img_src'], 'http://image.url/image.png') + self.assertEqual(results[1]['content'], '') + self.assertEqual(results[1]['url'], 'http://this.is.the.url') + self.assertEqual(results[1]['title'], 'This is the title2') diff --git a/searx/tests/engines/test_yahoo.py b/searx/tests/engines/test_yahoo.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import yahoo +from searx.testing import SearxTestCase + + +class TestYahooEngine(SearxTestCase): + + def test_parse_url(self): + test_url = 'http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb' +\ + '2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=' +\ + 'dtcJsfP4mEeBOjnVfUQ-' + url = yahoo.parse_url(test_url) + self.assertEqual('https://this.is.the.url/', url) + + test_url = 'http://r.search.yahoo.com/_ylt=A0LElb9JUSKcAEGRXNyoA;_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb' +\ + '2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10/RU=https%3a%2f%2fthis.is.the.url%2f/RS=' +\ + 'dtcJsfP4mEeBOjnVfUQ-' + url = yahoo.parse_url(test_url) + self.assertEqual('https://this.is.the.url/', url) + + test_url = 'https://this.is.the.url/' + url = yahoo.parse_url(test_url) + self.assertEqual('https://this.is.the.url/', url) + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = yahoo.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('search.yahoo.com', params['url']) + self.assertIn('fr', params['url']) + self.assertIn('cookies', params) + self.assertIn('sB', params['cookies']) + self.assertIn('fr', params['cookies']['sB']) + + dicto['language'] = 'all' + params = yahoo.request(query, dicto) + self.assertIn('cookies', params) + self.assertIn('sB', params['cookies']) + self.assertIn('en', params['cookies']['sB']) + self.assertIn('en', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, yahoo.response, None) + self.assertRaises(AttributeError, yahoo.response, []) + self.assertRaises(AttributeError, yahoo.response, '') + self.assertRaises(AttributeError, yahoo.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(yahoo.response(response), []) + + html = """ + <div class="res"> + <div> + <h3> + <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA; + _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 + /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1"> + <b>This</b> is the title + </a> + </h3> + </div> + <span class="url" dir="ltr">www.<b>test</b>.com</span> + <div class="abstr"> + <b>This</b> is the content + </div> + </div> + <div id="satat" data-bns="Yahoo" data-bk="124.1"> + <h2>Also Try</h2> + <table> + <tbody> + <tr> + <td> + <a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" > + <span> + <b></b>This is <b>the suggestion</b> + </span> + </a> + </td> + </tr> + </tbody> + </table> + </div> + """ + response = mock.Mock(text=html) + results = yahoo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://this.is.the.url/') + self.assertEqual(results[0]['content'], 'This is the content') + self.assertEqual(results[1]['suggestion'], 'This is the suggestion') + + html = """ + <div class="res"> + <div> + <h3> + <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA; + _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 + /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1"> + <b>This</b> is the title + </a> + </h3> + </div> + <span class="url" dir="ltr">www.<b>test</b>.com</span> + <div class="abstr"> + <b>This</b> is the content + </div> + </div> + <div class="res"> + <div> + <h3> + <a id="link-1" class="yschttl spt"> + <b>This</b> is the title + </a> + </h3> + </div> + <span class="url" dir="ltr">www.<b>test</b>.com</span> + <div class="abstr"> + <b>This</b> is the content + </div> + </div> + <div class="res"> + <div> + <h3> + </h3> + </div> + <span class="url" dir="ltr">www.<b>test</b>.com</span> + <div class="abstr"> + <b>This</b> is the content + </div> + </div> + """ + response = mock.Mock(text=html) + results = yahoo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'https://this.is.the.url/') + self.assertEqual(results[0]['content'], 'This is the content') + + html = """ + <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO"> + </li> + """ + response = mock.Mock(text=html) + results = yahoo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_yahoo_news.py b/searx/tests/engines/test_yahoo_news.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +from datetime import datetime +import mock +from searx.engines import yahoo_news +from searx.testing import SearxTestCase + + +class TestYahooNewsEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = yahoo_news.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('news.search.yahoo.com', params['url']) + self.assertIn('fr', params['url']) + self.assertIn('cookies', params) + self.assertIn('sB', params['cookies']) + self.assertIn('fr', params['cookies']['sB']) + + dicto['language'] = 'all' + params = yahoo_news.request(query, dicto) + self.assertIn('cookies', params) + self.assertIn('sB', params['cookies']) + self.assertIn('en', params['cookies']['sB']) + self.assertIn('en', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, yahoo_news.response, None) + self.assertRaises(AttributeError, yahoo_news.response, []) + self.assertRaises(AttributeError, yahoo_news.response, '') + self.assertRaises(AttributeError, yahoo_news.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(yahoo_news.response(response), []) + + html = """ + <div class="res"> + <div> + <h3> + <a class="yschttl spt" href="http://this.is.the.url" target="_blank"> + This is + the <b>title</b>... + </a> + </h3> + </div> + <span class="url">Business via Yahoo! Finance</span> &nbsp; <span class="timestamp">Feb 03 09:45am</span> + <div class="abstr"> + This is the content + </div> + </div> + """ + response = mock.Mock(text=html) + results = yahoo_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title...') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/') + self.assertEqual(results[0]['content'], 'This is the content') + + html = """ + <div class="res"> + <div> + <h3> + <a class="yschttl spt" href="http://this.is.the.url" target="_blank"> + This is + the <b>title</b>... + </a> + </h3> + </div> + <span class="url">Business via Yahoo!</span> &nbsp; <span class="timestamp">2 hours, 22 minutes ago</span> + <div class="abstr"> + This is the content + </div> + </div> + <div class="res"> + <div> + <h3> + <a class="yschttl spt" href="http://this.is.the.url" target="_blank"> + This is + the <b>title</b>... + </a> + </h3> + </div> + <span class="url">Business via Yahoo!</span> &nbsp; <span class="timestamp">22 minutes ago</span> + <div class="abstr"> + This is the content + </div> + </div> + <div class="res"> + <div> + <h3> + <a class="yschttl spt" href="http://this.is.the.url" target="_blank"> + This is + the <b>title</b>... + </a> + </h3> + </div> + <span class="url">Business via Yahoo!</span> &nbsp; <span class="timestamp">Feb 03 09:45am 1900</span> + <div class="abstr"> + This is the content + </div> + </div> + """ + response = mock.Mock(text=html) + results = yahoo_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 3) + self.assertEqual(results[0]['title'], 'This is the title...') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/') + self.assertEqual(results[0]['content'], 'This is the content') + self.assertEqual(results[2]['publishedDate'].year, datetime.now().year) + + html = """ + <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO"> + <div Class="sa_mc"> + <div class="sb_tlst"> + <h2> + <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1"> + <strong>This</strong> should be the title</a> + </h2> + </div> + <div class="sb_meta"> + <cite> + <strong>this</strong>.meta.com</cite> + <span class="c_tlbxTrg"> + <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1"> + </span> + </span> + </div> + <p> + <strong>This</strong> should be the content.</p> + </div> + </li> + """ + response = mock.Mock(text=html) + results = yahoo_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py @@ -3,25 +3,39 @@ from searx.tests.engines.test_bing_images import * # noqa from searx.tests.engines.test_bing_news import * # noqa from searx.tests.engines.test_blekko_images import * # noqa from searx.tests.engines.test_btdigg import * # noqa +from searx.tests.engines.test_currency_convert import * # noqa from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_deviantart import * # noqa from searx.tests.engines.test_digg import * # noqa +from searx.tests.engines.test_duckduckgo import * # noqa +from searx.tests.engines.test_duckduckgo_definitions import * # noqa from searx.tests.engines.test_dummy import * # noqa +from searx.tests.engines.test_faroo import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_flickr_noapi import * # noqa from searx.tests.engines.test_gigablast import * # noqa from searx.tests.engines.test_github import * # noqa -from searx.tests.engines.test_www1x import * # noqa +from searx.tests.engines.test_google import * # noqa from searx.tests.engines.test_google_images import * # noqa from searx.tests.engines.test_google_news import * # noqa from searx.tests.engines.test_kickass import * # noqa +from searx.tests.engines.test_mediawiki import * # noqa from searx.tests.engines.test_mixcloud import * # noqa +from searx.tests.engines.test_openstreetmap import * # noqa +from searx.tests.engines.test_photon import * # noqa from searx.tests.engines.test_piratebay import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa +from searx.tests.engines.test_startpage import * # noqa +from searx.tests.engines.test_subtitleseeker import * # noqa +from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa +from searx.tests.engines.test_www1x import * # noqa from searx.tests.engines.test_www500px import * # noqa +from searx.tests.engines.test_yacy import * # noqa +from searx.tests.engines.test_yahoo import * # noqa from searx.tests.engines.test_youtube import * # noqa +from searx.tests.engines.test_yahoo_news import * # noqa