[FIX] google videos thumbnails - searx - My custom branche(s) on searx, a meta-search engine

commit: cf26aba93b96bb1171feb60fefb232a9113b85b0
parent cee15f03755c8e360883918b38e6080c0dce800e
Author: Venca24 <Vaclav.Zouzalik@seznam.cz>
Date:   Fri,  4 Jan 2019 15:48:22 +0100

[FIX] google videos thumbnails

Diffstat:
M searx/engines/google_videos.py 20 +++++++++++++++++---

1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
@@ -7,15 +7,16 @@
  @using-api   no
  @results     HTML
  @stable      no
- @parse       url, title, content
+ @parse       url, title, content, thumbnail
 """
 
 from datetime import date, timedelta
 from json import loads
 from lxml import html
+from searx.engines import logger
 from searx.engines.xpath import extract_text
 from searx.url_utils import urlencode
-
+import re
 
 # engine dependent config
 categories = ['videos']
@@ -73,11 +74,24 @@ def response(resp):
         url = result.xpath('.//div[@class="r"]/a/@href')[0]
         content = extract_text(result.xpath('.//span[@class="st"]'))
 
+        # get thumbnails
+        script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
+        id = result.xpath('.//div[@class="s"]//img/@id')[0]
+        thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
+                                     script)
+        logger.debug('google video engine: ' + id + ' matched ' + str(len(thumbnails_data)) + ' times (thumbnail)')
+        tmp = []
+        if len(thumbnails_data) != 0:
+            tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
+        thumbnail = ''
+        if len(tmp) != 0:
+            thumbnail = tmp[-1]
+
         # append result
         results.append({'url': url,
                         'title': title,
                         'content': content,
-                        'thumbnail': '',
+                        'thumbnail': thumbnail,
                         'template': 'videos.html'})
 
     return results