commit: 993271bed30e24c7ae1e0f63b64e030829206f27
parent 337bd6d907503176eb94290c3f386ce88167dea8
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date: Tue, 18 Mar 2014 15:56:22 +0100
extract publishDate from vimeo
Diffstat:
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
@@ -2,6 +2,8 @@ from urllib import urlencode
from HTMLParser import HTMLParser
from lxml import html
from xpath import extract_text
+from datetime import datetime
+from dateutil import parser
base_url = 'http://vimeo.com'
search_url = base_url + '/search?{query}'
@@ -10,6 +12,7 @@ content_xpath = None
title_xpath = None
results_xpath = ''
content_tpl = '<a href="{0}"> <img src="{2}"/> </a>'
+publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
# the cookie set by vimeo contains all the following values,
# but only __utma seems to be requiered
@@ -40,9 +43,12 @@ def response(resp):
url = base_url + result.xpath(url_xpath)[0]
title = p.unescape(extract_text(result.xpath(title_xpath)))
thumbnail = extract_text(result.xpath(content_xpath)[0])
+ publishedDate = parser.parse(extract_text(result.xpath(publishedDate_xpath)[0]))
+
results.append({'url': url,
'title': title,
'content': content_tpl.format(url, title, thumbnail),
'template': 'videos.html',
+ 'publishedDate': publishedDate,
'thumbnail': thumbnail})
return results
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
@@ -53,7 +53,7 @@ def response(resp):
- timedelta(hours=int(timeNumbers[0]))\
- timedelta(minutes=int(timeNumbers[1]))
else:
- publishedDate =parser.parse(publishedDate)
+ publishedDate = parser.parse(publishedDate)
if publishedDate.year == 1900:
publishedDate = publishedDate.replace(year=datetime.now().year)