commit: f5128c7cb96d7564cc7bebeae13a319557b84aaa
parent: 4cffd78650c3f1dfce413ae0a1cd0453ebe6f277
Author: Alexandre Flament <alex@al-f.net>
Date: Sun, 12 Feb 2017 14:58:49 +0100
[mod] add/modify image fetching for bing_news, qwant and twitter engines
Diffstat:
4 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
@@ -112,12 +112,11 @@ def response(resp):
# append result
if thumbnail is not None:
- results.append({'template': 'videos.html',
- 'url': url,
+ results.append({'url': url,
'title': title,
'publishedDate': publishedDate,
'content': content,
- 'thumbnail': thumbnail})
+ 'img_src': thumbnail})
else:
results.append({'url': url,
'title': title,
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
@@ -96,14 +96,27 @@ def response(resp):
'thumbnail_src': thumbnail_src,
'img_src': img_src})
- elif (category_to_keyword.get(categories[0], '') == 'news' or
- category_to_keyword.get(categories[0], '') == 'social'):
+ elif category_to_keyword.get(categories[0], '') == 'social':
published_date = datetime.fromtimestamp(result['date'], None)
+ img_src = result.get('img', None)
+ results.append({'url': res_url,
+ 'title': title,
+ 'publishedDate': published_date,
+ 'content': content,
+ 'img_src': img_src})
+ elif category_to_keyword.get(categories[0], '') == 'news':
+ published_date = datetime.fromtimestamp(result['date'], None)
+ media = result.get('media', [])
+ if len(media) > 0:
+ img_src = media[0].get('pict', {}).get('url', None)
+ else:
+ img_src = None
results.append({'url': res_url,
'title': title,
'publishedDate': published_date,
- 'content': content})
+ 'content': content,
+ 'img_src': img_src})
return results
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
@@ -27,6 +27,7 @@ search_url = base_url + 'search?'
# specific xpath variables
results_xpath = '//li[@data-item-type="tweet"]'
+avatar_xpath = './/img[contains(@class, "avatar")]/@src'
link_xpath = './/small[@class="time"]//a'
title_xpath = './/span[contains(@class, "username")]'
content_xpath = './/p[contains(@class, "tweet-text")]'
@@ -57,6 +58,8 @@ def response(resp):
try:
link = tweet.xpath(link_xpath)[0]
content = extract_text(tweet.xpath(content_xpath)[0])
+ img_src = tweet.xpath(avatar_xpath)[0]
+ img_src = img_src.replace('_bigger', '_normal')
except Exception:
continue
@@ -71,12 +74,14 @@ def response(resp):
results.append({'url': url,
'title': title,
'content': content,
+ 'img_src': img_src,
'publishedDate': publishedDate})
else:
# append result
results.append({'url': url,
'title': title,
- 'content': content})
+ 'content': content,
+ 'img_src': img_src})
# return results
return results
diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py
@@ -81,11 +81,11 @@ class TestBingNewsEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'http://url.of.article/')
self.assertEqual(results[0]['content'], 'Article Content')
- self.assertEqual(results[0]['thumbnail'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
+ self.assertEqual(results[0]['img_src'], 'https://www.bing.com/th?id=ON.13371337133713371337133713371337')
self.assertEqual(results[1]['title'], 'Another Title')
self.assertEqual(results[1]['url'], 'http://another.url.of.article/')
self.assertEqual(results[1]['content'], 'Another Article Content')
- self.assertNotIn('thumbnail', results[1])
+ self.assertNotIn('img_src', results[1])
html = """<?xml version="1.0" encoding="utf-8" ?>
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">
@@ -120,7 +120,7 @@ class TestBingNewsEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'http://another.url.of.article/')
self.assertEqual(results[0]['content'], 'Article Content')
- self.assertEqual(results[0]['thumbnail'], 'http://another.bing.com/image')
+ self.assertEqual(results[0]['img_src'], 'http://another.bing.com/image')
html = """<?xml version="1.0" encoding="utf-8" ?>
<rss version="2.0" xmlns:News="https://www.bing.com:443/news/search?q=python&setmkt=en-US&first=1&format=RSS">