logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 362c849797e2e6f0e232642c23744c47a75cdfd4
parent: 7580852bda660471f8968b4f14cdf44dad73249f
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Mon,  7 Sep 2015 22:39:33 +0200

[fix][mod] wikidata date handling refactor - fixes #387

Diffstat:

Msearx/engines/wikidata.py34++++++++++++++++++++++++++--------
Msearx/utils.py28++++++++++------------------
2 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py @@ -1,8 +1,15 @@ import json -from urllib import urlencode + +from searx import logger from searx.poolrequests import get from searx.utils import format_date_by_locale +from datetime import datetime +from dateutil.parser import parse as dateutil_parse +from urllib import urlencode + + +logger = logger.getChild('wikidata') result_count = 1 wikidata_host = 'https://www.wikidata.org' wikidata_api = wikidata_host + '/w/api.php' @@ -164,14 +171,12 @@ def getDetail(jsonresponse, wikidata_id, language, locale): if postal_code is not None: attributes.append({'label': 'Postal code(s)', 'value': postal_code}) - date_of_birth = get_time(claims, 'P569', None) + date_of_birth = get_time(claims, 'P569', locale, None) if date_of_birth is not None: - date_of_birth = format_date_by_locale(date_of_birth[8:], locale) attributes.append({'label': 'Date of birth', 'value': date_of_birth}) - date_of_death = get_time(claims, 'P570', None) + date_of_death = get_time(claims, 'P570', locale, None) if date_of_death is not None: - date_of_death = format_date_by_locale(date_of_death[8:], locale) attributes.append({'label': 'Date of death', 'value': date_of_death}) if len(attributes) == 0 and len(urls) == 2 and len(description) == 0: @@ -229,7 +234,7 @@ def get_string(claims, propertyName, defaultValue=None): return result[0] -def get_time(claims, propertyName, defaultValue=None): +def get_time(claims, propertyName, locale, defaultValue=None): propValue = claims.get(propertyName, {}) if len(propValue) == 0: return defaultValue @@ -244,9 +249,22 @@ def get_time(claims, propertyName, defaultValue=None): result.append(value.get('time', '')) if len(result) == 0: - return defaultValue + date_string = defaultValue else: - return ', '.join(result) + date_string = ', '.join(result) + + try: + parsed_date = datetime.strptime(date_string, "+%Y-%m-%dT%H:%M:%SZ") + except: + if date_string.startswith('-'): + return date_string.split('T')[0] + try: + parsed_date = dateutil_parse(date_string, fuzzy=False, default=False) + except: + logger.debug('could not parse date %s', date_string) + return date_string.split('T')[0] + + return format_date_by_locale(parsed_date, locale) def get_geolink(claims, propertyName, defaultValue=''): diff --git a/searx/utils.py b/searx/utils.py @@ -1,11 +1,10 @@ # import htmlentitydefs -import locale -import dateutil.parser import cStringIO import csv import os import re +from babel.dates import format_date from codecs import getincrementalencoder from HTMLParser import HTMLParser from random import choice @@ -195,23 +194,16 @@ def get_result_templates(base_path): return result_templates -def format_date_by_locale(date_string, locale_string): +def format_date_by_locale(date, locale_string): # strftime works only on dates after 1900 - parsed_date = dateutil.parser.parse(date_string) - if parsed_date.year <= 1900: - return parsed_date.isoformat().split('T')[0] - - orig_locale = locale.getlocale()[0] - try: - locale.setlocale(locale.LC_ALL, locale_string) - except: - logger.warning('cannot set locale: {0}'.format(locale_string)) - formatted_date = parsed_date.strftime(locale.nl_langinfo(locale.D_FMT)) - try: - locale.setlocale(locale.LC_ALL, orig_locale) - except: - logger.warning('cannot set original locale: {0}'.format(orig_locale)) - return formatted_date + + if date.year <= 1900: + return date.isoformat().split('T')[0] + + if locale_string == 'all': + locale_string = settings['ui']['default_locale'] or 'en_US' + + return format_date(date, locale=locale_string) def dict_subset(d, properties):