logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 108392f8daf70fbfcd43e9d691d665aad1e15994
parent: 94327d67fcc8b7891556bee030432677a4692f45
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Tue, 10 Jan 2017 11:03:05 +0100

[fix] skip non-complete google news results

Diffstat:

Msearx/engines/google_news.py13++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py @@ -66,11 +66,14 @@ def response(resp): # parse results for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): - r = { - 'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], - 'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), - 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), - } + try: + r = { + 'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], + 'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), + 'content': ''.join(result.xpath('.//div[@class="st"]//text()')), + } + except: + continue imgs = result.xpath('.//img/@src') if len(imgs) and not imgs[0].startswith('data'):