logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 46a2c63f8e1c3819cceff2d61fe9106051e8ecee
parent: 12d91c1d67aa4c05ecead7ff16041fb2b9ca366d
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sat,  8 Apr 2017 19:42:50 +0200

[fix] yahoo news date parsing

Diffstat:

Msearx/engines/yahoo_news.py19+++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py @@ -80,16 +80,19 @@ def response(resp): # still useful ? if re.match("^[0-9]+ minute(s|) ago$", publishedDate): - publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) # noqa + publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) + elif re.match("^[0-9]+ days? ago$", publishedDate): + publishedDate = datetime.now() - timedelta(days=int(re.match(r'\d+', publishedDate).group())) + elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): + timeNumbers = re.findall(r'\d+', publishedDate) + publishedDate = datetime.now()\ + - timedelta(hours=int(timeNumbers[0]))\ + - timedelta(minutes=int(timeNumbers[1])) else: - if re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", - publishedDate): - timeNumbers = re.findall(r'\d+', publishedDate) - publishedDate = datetime.now()\ - - timedelta(hours=int(timeNumbers[0]))\ - - timedelta(minutes=int(timeNumbers[1])) - else: + try: publishedDate = parser.parse(publishedDate) + except: + publishedDate = datetime.now() if publishedDate.year == 1900: publishedDate = publishedDate.replace(year=datetime.now().year)