logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 1e99cf2a0e541a3d2df0104d64fadf955bbccc20
parent: c23db1b2bfb2c9233816fc378927c49b67eeffaf
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sun,  7 Sep 2014 19:12:05 +0200

Merge pull request #93 from dalf/master

yahoo, bing_new and dailymotion fixes

Diffstat:

Msearx/engines/bing_news.py23++++++++++++++++++-----
Msearx/engines/dailymotion.py7++++++-
Msearx/engines/yahoo.py8+++++---
3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py @@ -56,10 +56,14 @@ def response(resp): link = result.xpath('.//div[@class="newstitle"]/a')[0] url = link.attrib.get('href') title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()'))) - + contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()') + if contentXPath != None: + content = escape(' '.join(contentXPath)) + # parse publishedDate - publishedDate = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_ST"]//span[@class="sn_tm"]//text()'))) + publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()') + if publishedDateXPath != None: + publishedDate = escape(' '.join(publishedDateXPath)) if re.match("^[0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) @@ -74,9 +78,18 @@ def response(resp): publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) + elif re.match("^[0-9]+ day(s|) ago$", publishedDate): + timeNumbers = re.findall(r'\d+', publishedDate) + publishedDate = datetime.now()\ + - timedelta(days=int(timeNumbers[0])) else: - publishedDate = parser.parse(publishedDate) - + try: + # FIXME use params['language'] to parse either mm/dd or dd/mm + publishedDate = parser.parse(publishedDate, dayfirst=False) + except TypeError: + # FIXME + publishedDate = datetime.now() + # append result results.append({'url': url, 'title': title, diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py @@ -16,8 +16,8 @@ from lxml import html # engine dependent config categories = ['videos'] -locale = 'en_US' paging = True +language_support = True # search-url # see http://www.dailymotion.com/doc/api/obj-video.html @@ -26,6 +26,11 @@ search_url = 'https://api.dailymotion.com/videos?fields=title,description,durati # do search-request def request(query, params): + if params['language'] == 'all': + locale = 'en-US' + else: + locale = params['language'] + params['url'] = search_url.format( query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno']) diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py @@ -40,9 +40,11 @@ def parse_url(url_string): if endpos > -1: endpositions.append(endpos) - end = min(endpositions) - - return unquote(url_string[start:end]) + if start==0 or len(endpositions) == 0: + return url_string + else: + end = min(endpositions) + return unquote(url_string[start:end]) # do search-request