logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: cb4a3fe598707fc42f86ea3f7bcf517dcd4db660
parent edd9d311809d8f6eab5109f9cd899e7989bb42d5
Author: Cqoicebordel <Cqoicebordel@users.noreply.github.com>
Date:   Sat, 17 Jan 2015 19:21:09 +0100

Add thumbnails in images results
- Modify engines to create/fetch an URL for the thumbnails
- Modify themes to show thumbnails instead of full images.

In Courgette, the result is not very beautiful. Should we change it ?

Diffstat:

Msearx/engines/500px.py8+++++++-
Msearx/engines/bing_images.py4++++
Msearx/engines/deviantart.py9+++++++--
Msearx/engines/flickr-noapi.py9+++++++++
Msearx/engines/flickr.py11++++++++++-
Msearx/engines/google_images.py3+++
Msearx/templates/courgette/result_templates/images.html2+-
Msearx/templates/default/result_templates/images.html2+-
Msearx/templates/oscar/result_templates/images.html4++--
9 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/searx/engines/500px.py b/searx/engines/500px.py @@ -14,6 +14,7 @@ from urllib import urlencode from urlparse import urljoin from lxml import html +import re # engine dependent config categories = ['images'] @@ -37,20 +38,25 @@ def response(resp): results = [] dom = html.fromstring(resp.text) + regex = re.compile('3\.jpg.*$') # parse results for result in dom.xpath('//div[@class="photo"]'): link = result.xpath('.//a')[0] url = urljoin(base_url, link.attrib.get('href')) title = result.xpath('.//div[@class="title"]//text()')[0] - img_src = link.xpath('.//img')[0].attrib['src'] + thumbnail_src = link.xpath('.//img')[0].attrib['src'] + # To have a bigger thumbnail, uncomment the next line + #thumbnail_src = regex.sub('4.jpg', thumbnail_src) content = result.xpath('.//div[@class="info"]//text()')[0] + img_src = regex.sub('2048.jpg', thumbnail_src) # append result results.append({'url': url, 'title': title, 'img_src': img_src, 'content': content, + 'thumbnail_src': thumbnail_src, 'template': 'images.html'}) # return results diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py @@ -25,6 +25,7 @@ paging = True # search-url base_url = 'https://www.bing.com/' search_string = 'images/search?{query}&count=10&first={offset}' +thumb_url = "http://ts1.mm.bing.net/th?id={ihk}" # do search-request @@ -63,6 +64,8 @@ def response(resp): yaml_data = load(p.sub(r'\1\2: \3', link.attrib.get('m'))) title = link.attrib.get('t1') + ihk = link.attrib.get('ihk') + #url = 'http://' + link.attrib.get('t3') url = yaml_data.get('surl') img_src = yaml_data.get('imgurl') @@ -72,6 +75,7 @@ def response(resp): 'url': url, 'title': title, 'content': '', + 'thumbnail_src': thumb_url.format(ihk=ihk), 'img_src': img_src}) # TODO stop parsing if 10 images are found diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py @@ -6,13 +6,14 @@ # @using-api no (TODO, rewrite to api) # @results HTML # @stable no (HTML can change) -# @parse url, title, thumbnail, img_src +# @parse url, title, thumbnail_src, img_src # # @todo rewrite to api from urllib import urlencode from urlparse import urljoin from lxml import html +import re # engine dependent config categories = ['images'] @@ -42,6 +43,8 @@ def response(resp): return [] dom = html.fromstring(resp.text) + + regex = re.compile('\/200H\/') # parse results for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'): @@ -49,12 +52,14 @@ def response(resp): url = urljoin(base_url, link.attrib.get('href')) title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') # noqa title = ''.join(title_links[0].xpath('.//text()')) - img_src = link.xpath('.//img')[0].attrib['src'] + thumbnail_src = link.xpath('.//img')[0].attrib['src'] + img_src = regex.sub('/', thumbnail_src) # append result results.append({'url': url, 'title': title, 'img_src': img_src, + 'thumbnail_src': thumbnail_src, 'template': 'images.html'}) # return results diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py @@ -71,6 +71,14 @@ def response(resp): if 'id' not in photo['owner']: continue +# For a bigger thumbnail, keep only the url_z, not the url_n + if 'n' in photo['sizes']: + thumbnail_src = photo['sizes']['n']['displayUrl'] + elif 'z' in photo['sizes']: + thumbnail_src = photo['sizes']['z']['displayUrl'] + else: + thumbnail_src = img_src + url = build_flickr_url(photo['owner']['id'], photo['id']) title = photo.get('title', '') @@ -89,6 +97,7 @@ def response(resp): results.append({'url': url, 'title': title, 'img_src': img_src, + 'thumbnail_src': thumbnail_src, 'content': content, 'template': 'images.html'}) diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py @@ -23,7 +23,7 @@ api_key = None url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\ '&api_key={api_key}&{text}&sort=relevance' +\ - '&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\ + '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' +\ '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' @@ -65,6 +65,14 @@ def response(resp): else: continue +# For a bigger thumbnail, keep only the url_z, not the url_n + if 'url_n' in photo: + thumbnail_src = photo['url_n'] + elif 'url_z' in photo: + thumbnail_src = photo['url_z'] + else: + thumbnail_src = img_src + url = build_flickr_url(photo['owner'], photo['id']) title = photo['title'] @@ -80,6 +88,7 @@ def response(resp): results.append({'url': url, 'title': title, 'img_src': img_src, + 'thumbnail_src': thumbnail_src, 'content': content, 'template': 'images.html'}) diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py @@ -43,15 +43,18 @@ def response(resp): # parse results for result in search_res['responseData']['results']: + print result href = result['originalContextUrl'] title = result['title'] if not result['url']: continue + thumbnail_src = result['tbUrl'] # append result results.append({'url': href, 'title': title, 'content': '', + 'thumbnail_src': thumbnail_src, 'img_src': unquote(result['url']), 'template': 'images.html'}) diff --git a/searx/templates/courgette/result_templates/images.html b/searx/templates/courgette/result_templates/images.html @@ -1,6 +1,6 @@ <div class="image_result"> <p> - <a href="{{ result.img_src }}"><img src="{{ image_proxify(result.img_src) }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> + <a href="{{ result.img_src }}"><img src="{{ image_proxify(result.thumbnail_src) }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}"/></a> <span class="url"><a href="{{ result.url }}" class="small_font">{{ _('original context') }}</a></span> </p> </div> diff --git a/searx/templates/default/result_templates/images.html b/searx/templates/default/result_templates/images.html @@ -1,6 +1,6 @@ <div class="image_result"> <p> - <a href="{{ result.img_src }}"><img src="{{ image_proxify(result.img_src) }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}" /></a> + <a href="{{ result.img_src }}"><img src="{{ image_proxify(result.thumbnail_src) }}" title="{{ result.title|striptags }}" alt="{{ result.title|striptags }}" /></a> <span class="url"><a href="{{ result.url }}" class="small_font">{{ _('original context') }}</a></span> </p> </div> diff --git a/searx/templates/oscar/result_templates/images.html b/searx/templates/oscar/result_templates/images.html @@ -1,7 +1,7 @@ {% from 'oscar/macros.html' import draw_favicon %} <a href="{{ result.img_src }}" data-toggle="modal" data-target="#modal-{{ index }}"> - <img src="{{ image_proxify(result.img_src) }}" alt="{{ result.title|striptags }}" title="{{ result.title|striptags }}" class="img-thumbnail"> + <img src="{{ image_proxify(result.thumbnail_src) }}" alt="{{ result.title|striptags }}" title="{{ result.title|striptags }}" class="img-thumbnail"> </a> <div class="modal fade" id="modal-{{ index }}" tabindex="-1" role="dialog" aria-hidden="true"> @@ -12,7 +12,7 @@ <h4 class="modal-title">{% if result.engine~".png" in favicons %}{{ draw_favicon(result.engine) }} {% endif %}{{ result.title|striptags }}</h4> </div> <div class="modal-body"> - <img class="img-responsive center-block" src="{{ result.img_src }}" alt="{{ result.title }}"> + <img class="img-responsive center-block" src="{{ result.thumbnail_src }}" alt="{{ result.title|striptags }}"> {% if result.content %}<p class="result-content">{{ result.content|safe }}</p>{% endif %} </div> <div class="modal-footer">