logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 90dcfc1dddbab74de64fc733802cce071540254d
parent 144f89bf785408a193d09f659a5442032c06de74
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Mon,  1 Sep 2014 15:36:53 +0200

fix dailymotion engine and add comments to it

Diffstat:

Msearx/engines/dailymotion.py54+++++++++++++++++++++++++++++++++++-------------------
1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py @@ -1,45 +1,61 @@ +## Dailymotion (Videos) +# +# @website https://www.dailymotion.com +# @provide-api yes (http://www.dailymotion.com/developer) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, thumbnail +# +# @todo set content-parameter with correct data + from urllib import urlencode from json import loads from lxml import html +# engine dependent config categories = ['videos'] locale = 'en_US' +paging = True +# search-url # see http://www.dailymotion.com/doc/api/obj-video.html -search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page={pageno}&{query}' # noqa - -# TODO use video result template -content_tpl = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />' - -paging = True +search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=5&page={pageno}&{query}' # noqa +# do search-request def request(query, params): params['url'] = search_url.format( query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno']) + return params +# get response from search-request def response(resp): results = [] + search_res = loads(resp.text) + + # return empty array if there are no results if not 'list' in search_res: - return results + return [] + + # parse results for res in search_res['list']: title = res['title'] url = res['url'] - if res['thumbnail_360_url']: - content = content_tpl.format(url, res['thumbnail_360_url']) - else: - content = '' - if res['description']: - description = text_content_from_html(res['description']) - content += description[:500] - results.append({'url': url, 'title': title, 'content': content}) - return results + #content = res['description'] + content = '' + thumbnail = res['thumbnail_360_url'] + results.append({'template': 'videos.html', + 'url': url, + 'title': title, + 'content': content, + 'thumbnail': thumbnail}) -def text_content_from_html(html_string): - desc_html = html.fragment_fromstring(html_string, create_parent=True) - return desc_html.text_content() + # return results + return results