logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: d20bba6dc74ded16556acf2a404d01ec47455ca6
parent df0d915806b6e4488099130cd1d7fb1775fe475c
Author: Noémi Ványi <sitbackandwait@gmail.com>
Date:   Wed,  1 Nov 2017 14:20:47 +0100

minor fixes of pubmed engine

Closes #1045

Diffstat:

Msearx/engines/pubmed.py17+++++++----------
Msearx/settings.yml2+-
Msearx/url_utils.py2--
3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py @@ -11,9 +11,11 @@ More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/ """ +from flask_babel import gettext from lxml import etree from datetime import datetime -from searx.url_utils import urlencode, urlopen +from searx.url_utils import urlencode +from searx.poolrequests import get categories = ['science'] @@ -46,12 +48,7 @@ def response(resp): pubmed_retrieve_api_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'\ + 'db=pubmed&retmode=xml&id={pmids_string}' - # handle Python2 vs Python3 management of bytes and strings - try: - pmids_results = etree.XML(resp.text.encode('utf-8')) - except AttributeError: - pmids_results = etree.XML(resp.text) - + pmids_results = etree.XML(resp.content) pmids = pmids_results.xpath('//eSearchResult/IdList/Id') pmids_string = '' @@ -62,7 +59,7 @@ def response(resp): retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args) - search_results_xml = urlopen(retrieve_url_encoded).read() + search_results_xml = get(retrieve_url_encoded).content search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation') for entry in search_results: @@ -74,12 +71,12 @@ def response(resp): try: content = entry.xpath('.//Abstract/AbstractText')[0].text except: - content = 'No abstract is available for this publication.' + content = gettext('No abstract is available for this publication.') # If a doi is available, add it to the snipppet try: doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text - content = 'DOI: ' + doi + ' Abstract: ' + content + content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content) except: pass diff --git a/searx/settings.yml b/searx/settings.yml @@ -464,7 +464,7 @@ engines: engine : pubmed shortcut : pub categories: science - oa_first : false + timeout : 3.0 - name : qwant engine : qwant diff --git a/searx/url_utils.py b/searx/url_utils.py @@ -3,7 +3,6 @@ from sys import version_info if version_info[0] == 2: from urllib import quote, quote_plus, unquote, urlencode from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult - from urllib2 import urlopen else: from urllib.parse import ( parse_qs, @@ -17,7 +16,6 @@ else: urlunparse, ParseResult ) - from urllib.request import urlopen __export__ = (parse_qs,