logo

searx

My custom branche(s) on searx, a meta-search engine
commit: af416074102d20f3cfd7cd1cfb5cd0e26c8b1f9c
parent: 2e41bfcbdbb762be610ff379380ea2201f544edd
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Mon, 22 Dec 2014 13:33:32 +0100

Merge pull request #149 from Cqoicebordel/Flickr-engine

Rework Flickr Engine

Diffstat:

Asearx/engines/flickr-noapi.py102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/engines/flickr.py84++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Msearx/settings.yml8++++++--
3 files changed, 163 insertions(+), 31 deletions(-)

diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +## Flickr (Images) +# +# @website https://www.flickr.com +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# +# @using-api no +# @results HTML +# @stable no +# @parse url, title, thumbnail, img_src + +from urllib import urlencode +from json import loads +from urlparse import urljoin +from lxml import html +import re + +categories = ['images'] + +url = 'https://secure.flickr.com/' +search_url = url+'search/?{query}&page={page}' +photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' +regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) + +paging = True + +def build_flickr_url(user_id, photo_id): + return photo_url.format(userid=user_id,photoid=photo_id) + + +def request(query, params): + params['url'] = search_url.format(query=urlencode({'text': query}), + page=params['pageno']) + return params + + +def response(resp): + results = [] + + matches = regex.search(resp.text) + + if matches == None: + return results + + match = matches.group(1) + search_results = loads(match) + + if not '_data' in search_results: + return [] + + photos = search_results['_data'] + + for photo in photos: + + # In paged configuration, the first pages' photos are represented by a None object + if photo == None: + continue + + # From the biggest to the lowest format + if 'o' in photo['sizes']: + img_src = photo['sizes']['o']['displayUrl'] + elif 'k' in photo['sizes']: + img_src = photo['sizes']['k']['displayUrl'] + elif 'h' in photo['sizes']: + img_src = photo['sizes']['h']['displayUrl'] + elif 'b' in photo['sizes']: + img_src = photo['sizes']['b']['displayUrl'] + elif 'c' in photo['sizes']: + img_src = photo['sizes']['c']['displayUrl'] + elif 'z' in photo['sizes']: + img_src = photo['sizes']['z']['displayUrl'] + elif 'n' in photo['sizes']: + img_src = photo['sizes']['n']['displayUrl'] + elif 'm' in photo['sizes']: + img_src = photo['sizes']['m']['displayUrl'] + elif 't' in photo['sizes']: + img_src = photo['sizes']['to']['displayUrl'] + elif 'q' in photo['sizes']: + img_src = photo['sizes']['q']['displayUrl'] + elif 's' in photo['sizes']: + img_src = photo['sizes']['s']['displayUrl'] + else: + continue + + url = build_flickr_url(photo['owner']['id'], photo['id']) + + title = photo['title'] + + content = '<span class="photo-author">'+ photo['owner']['username'] +'</span><br />' + + if 'description' in photo: + content = content + '<span class="description">' + photo['description'] + '</span>' + + # append result + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'content': content, + 'template': 'images.html'}) + + return results diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py @@ -1,54 +1,80 @@ #!/usr/bin/env python +## Flickr (Images) +# +# @website https://www.flickr.com +# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, thumbnail, img_src +#More info on api-key : https://www.flickr.com/services/apps/create/ + from urllib import urlencode -#from json import loads -from urlparse import urljoin -from lxml import html -from time import time +from json import loads categories = ['images'] -url = 'https://secure.flickr.com/' -search_url = url+'search/?{query}&page={page}' -results_xpath = '//div[@class="view display-item-tile"]/figure/div' +nb_per_page = 15 +paging = True +api_key= None + + +url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' paging = True +def build_flickr_url(user_id, photo_id): + return photo_url.format(userid=user_id,photoid=photo_id) + def request(query, params): - params['url'] = search_url.format(query=urlencode({'text': query}), - page=params['pageno']) - time_string = str(int(time())-3) - params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh' - params['cookies']['xb'] = '421409' - params['cookies']['localization'] = 'en-us' - params['cookies']['flrbp'] = time_string +\ - '-3a8cdb85a427a33efda421fbda347b2eaf765a54' - params['cookies']['flrbs'] = time_string +\ - '-ed142ae8765ee62c9ec92a9513665e0ee1ba6776' - params['cookies']['flrb'] = '9' + params['url'] = url.format(text=urlencode({'text': query}), + api_key=api_key, + nb_per_page=nb_per_page, + page=params['pageno']) return params def response(resp): results = [] - dom = html.fromstring(resp.text) - for result in dom.xpath(results_xpath): - img = result.xpath('.//img') + + search_results = loads(resp.text) - if not img: - continue + # return empty array if there are no results + if not 'photos' in search_results: + return [] + + if not 'photo' in search_results['photos']: + return [] - img = img[0] - img_src = 'https:'+img.attrib.get('src') + photos = search_results['photos']['photo'] - if not img_src: + # parse results + for photo in photos: + if 'url_o' in photo: + img_src = photo['url_o'] + elif 'url_z' in photo: + img_src = photo['url_z'] + else: continue - href = urljoin(url, result.xpath('.//a')[0].attrib.get('href')) - title = img.attrib.get('alt', '') - results.append({'url': href, + url = build_flickr_url(photo['owner'], photo['id']) + + title = photo['title'] + + content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />' + + content = content + '<span class="description">' + photo['description']['_content'] + '</span>' + + # append result + results.append({'url': url, 'title': title, 'img_src': img_src, + 'content': content, 'template': 'images.html'}) + + # return results return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -70,10 +70,14 @@ engines: shortcut : px - name : flickr - engine : flickr categories : images shortcut : fl - timeout: 3.0 +# You can use the engine using the official stable API, but you need an API key +# See : https://www.flickr.com/services/apps/create/ +# engine : flickr +# api_key: 'apikey' # required! +# Or you can use the html non-stable engine, activated by default + engine : flickr-noapi - name : general-file engine : generalfile