commit: cf26aba93b96bb1171feb60fefb232a9113b85b0
parent cee15f03755c8e360883918b38e6080c0dce800e
Author: Venca24 <Vaclav.Zouzalik@seznam.cz>
Date: Fri, 4 Jan 2019 15:48:22 +0100
[FIX] google videos thumbnails
Diffstat:
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
@@ -7,15 +7,16 @@
@using-api no
@results HTML
@stable no
- @parse url, title, content
+ @parse url, title, content, thumbnail
"""
from datetime import date, timedelta
from json import loads
from lxml import html
+from searx.engines import logger
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
-
+import re
# engine dependent config
categories = ['videos']
@@ -73,11 +74,24 @@ def response(resp):
url = result.xpath('.//div[@class="r"]/a/@href')[0]
content = extract_text(result.xpath('.//span[@class="st"]'))
+ # get thumbnails
+ script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
+ id = result.xpath('.//div[@class="s"]//img/@id')[0]
+ thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
+ script)
+ logger.debug('google video engine: ' + id + ' matched ' + str(len(thumbnails_data)) + ' times (thumbnail)')
+ tmp = []
+ if len(thumbnails_data) != 0:
+ tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
+ thumbnail = ''
+ if len(tmp) != 0:
+ thumbnail = tmp[-1]
+
# append result
results.append({'url': url,
'title': title,
'content': content,
- 'thumbnail': '',
+ 'thumbnail': thumbnail,
'template': 'videos.html'})
return results