logo

searx

Unnamed repository; edit this file 'description' to name the repository.
commit: bfd321a7a9cfc6d643fdebe5e7a5824fe70b9aa9
parent: 8de97dac03fc97a9705c8d3cd3163330a6f08375
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sat, 31 Jan 2015 22:05:13 +0100

[mod] python importable engine names

Diffstat:

searx/engines/500px.py | 63---------------------------------------------------------------
searx/engines/flickr-noapi.py | 109-------------------------------------------------------------------------------
searx/engines/flickr_noapi.py | 109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/engines/www500px.py | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
searx/settings.yml | 4++--
5 files changed, 174 insertions(+), 174 deletions(-)

diff --git a/searx/engines/500px.py b/searx/engines/500px.py @@ -1,63 +0,0 @@ -## 500px (Images) -# -# @website https://500px.com -# @provide-api yes (https://developers.500px.com/) -# -# @using-api no -# @results HTML -# @stable no (HTML can change) -# @parse url, title, thumbnail, img_src, content -# -# @todo rewrite to api - - -from urllib import urlencode -from urlparse import urljoin -from lxml import html -import re - -# engine dependent config -categories = ['images'] -paging = True - -# search-url -base_url = 'https://500px.com' -search_url = base_url+'/search?search?page={pageno}&type=photos&{query}' - - -# do search-request -def request(query, params): - params['url'] = search_url.format(pageno=params['pageno'], - query=urlencode({'q': query})) - - return params - - -# get response from search-request -def response(resp): - results = [] - - dom = html.fromstring(resp.text) - regex = re.compile('3\.jpg.*$') - - # parse results - for result in dom.xpath('//div[@class="photo"]'): - link = result.xpath('.//a')[0] - url = urljoin(base_url, link.attrib.get('href')) - title = result.xpath('.//div[@class="title"]//text()')[0] - thumbnail_src = link.xpath('.//img')[0].attrib['src'] - # To have a bigger thumbnail, uncomment the next line - #thumbnail_src = regex.sub('4.jpg', thumbnail_src) - content = result.xpath('.//div[@class="info"]//text()')[0] - img_src = regex.sub('2048.jpg', thumbnail_src) - - # append result - results.append({'url': url, - 'title': title, - 'img_src': img_src, - 'content': content, - 'thumbnail_src': thumbnail_src, - 'template': 'images.html'}) - - # return results - return results diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py @@ -1,109 +0,0 @@ -#!/usr/bin/env python - -# Flickr (Images) -# -# @website https://www.flickr.com -# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) -# -# @using-api no -# @results HTML -# @stable no -# @parse url, title, thumbnail, img_src - -from urllib import urlencode -from json import loads -import re -from searx.engines import logger - - -logger = logger.getChild('flickr-noapi') - -categories = ['images'] - -url = 'https://secure.flickr.com/' -search_url = url+'search/?{query}&page={page}' -photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' -regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) -image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') - -paging = True - - -def build_flickr_url(user_id, photo_id): - return photo_url.format(userid=user_id, photoid=photo_id) - - -def request(query, params): - params['url'] = search_url.format(query=urlencode({'text': query}), - page=params['pageno']) - return params - - -def response(resp): - results = [] - - matches = regex.search(resp.text) - - if matches is None: - return results - - match = matches.group(1) - search_results = loads(match) - - if '_data' not in search_results: - return [] - - photos = search_results['_data'] - - for photo in photos: - - # In paged configuration, the first pages' photos - # are represented by a None object - if photo is None: - continue - - img_src = None - # From the biggest to the lowest format - for image_size in image_sizes: - if image_size in photo['sizes']: - img_src = photo['sizes'][image_size]['url'] - break - - if not img_src: - logger.debug('cannot find valid image size: {0}'.format(repr(photo))) - continue - - if 'id' not in photo['owner']: - continue - -# For a bigger thumbnail, keep only the url_z, not the url_n - if 'n' in photo['sizes']: - thumbnail_src = photo['sizes']['n']['url'] - elif 'z' in photo['sizes']: - thumbnail_src = photo['sizes']['z']['url'] - else: - thumbnail_src = img_src - - url = build_flickr_url(photo['owner']['id'], photo['id']) - - title = photo.get('title', '') - - content = '<span class="photo-author">' +\ - photo['owner']['username'] +\ - '</span><br />' - - if 'description' in photo: - content = content +\ - '<span class="description">' +\ - photo['description'] +\ - '</span>' - - # append result - results.append({'url': url, - 'title': title, - 'img_src': img_src, - 'thumbnail_src': thumbnail_src, - 'content': content, - 'template': 'images.html'}) - - return results diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +# Flickr (Images) +# +# @website https://www.flickr.com +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# +# @using-api no +# @results HTML +# @stable no +# @parse url, title, thumbnail, img_src + +from urllib import urlencode +from json import loads +import re +from searx.engines import logger + + +logger = logger.getChild('flickr-noapi') + +categories = ['images'] + +url = 'https://secure.flickr.com/' +search_url = url+'search/?{query}&page={page}' +photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' +regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) +image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') + +paging = True + + +def build_flickr_url(user_id, photo_id): + return photo_url.format(userid=user_id, photoid=photo_id) + + +def request(query, params): + params['url'] = search_url.format(query=urlencode({'text': query}), + page=params['pageno']) + return params + + +def response(resp): + results = [] + + matches = regex.search(resp.text) + + if matches is None: + return results + + match = matches.group(1) + search_results = loads(match) + + if '_data' not in search_results: + return [] + + photos = search_results['_data'] + + for photo in photos: + + # In paged configuration, the first pages' photos + # are represented by a None object + if photo is None: + continue + + img_src = None + # From the biggest to the lowest format + for image_size in image_sizes: + if image_size in photo['sizes']: + img_src = photo['sizes'][image_size]['url'] + break + + if not img_src: + logger.debug('cannot find valid image size: {0}'.format(repr(photo))) + continue + + if 'id' not in photo['owner']: + continue + +# For a bigger thumbnail, keep only the url_z, not the url_n + if 'n' in photo['sizes']: + thumbnail_src = photo['sizes']['n']['url'] + elif 'z' in photo['sizes']: + thumbnail_src = photo['sizes']['z']['url'] + else: + thumbnail_src = img_src + + url = build_flickr_url(photo['owner']['id'], photo['id']) + + title = photo.get('title', '') + + content = '<span class="photo-author">' +\ + photo['owner']['username'] +\ + '</span><br />' + + if 'description' in photo: + content = content +\ + '<span class="description">' +\ + photo['description'] +\ + '</span>' + + # append result + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'thumbnail_src': thumbnail_src, + 'content': content, + 'template': 'images.html'}) + + return results diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py @@ -0,0 +1,63 @@ +## 500px (Images) +# +# @website https://500px.com +# @provide-api yes (https://developers.500px.com/) +# +# @using-api no +# @results HTML +# @stable no (HTML can change) +# @parse url, title, thumbnail, img_src, content +# +# @todo rewrite to api + + +from urllib import urlencode +from urlparse import urljoin +from lxml import html +import re + +# engine dependent config +categories = ['images'] +paging = True + +# search-url +base_url = 'https://500px.com' +search_url = base_url+'/search?search?page={pageno}&type=photos&{query}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(pageno=params['pageno'], + query=urlencode({'q': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + regex = re.compile('3\.jpg.*$') + + # parse results + for result in dom.xpath('//div[@class="photo"]'): + link = result.xpath('.//a')[0] + url = urljoin(base_url, link.attrib.get('href')) + title = result.xpath('.//div[@class="title"]//text()')[0] + thumbnail_src = link.xpath('.//img')[0].attrib['src'] + # To have a bigger thumbnail, uncomment the next line + #thumbnail_src = regex.sub('4.jpg', thumbnail_src) + content = result.xpath('.//div[@class="info"]//text()')[0] + img_src = regex.sub('2048.jpg', thumbnail_src) + + # append result + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'content': content, + 'thumbnail_src': thumbnail_src, + 'template': 'images.html'}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -80,7 +80,7 @@ engines: # shortcut : fc - name : 500px - engine : 500px + engine : www500px shortcut : px - name : flickr @@ -91,7 +91,7 @@ engines: # engine : flickr # api_key: 'apikey' # required! # Or you can use the html non-stable engine, activated by default - engine : flickr-noapi + engine : flickr_noapi - name : general-file engine : generalfile