logo

searx

My custom branche(s) on searx, a meta-search engine
commit: e2245611d78614555f59d0fe2cd4b94ce0b39b12
parent: 8b10eb6fe197cf136fa26f86e17dee1ffb851773
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Thu, 13 Oct 2016 11:19:11 +0200

Merge pull request #724 from Athemis/master

[engine] PDBe (Protein Data Bank Europe)

Diffstat:

MAUTHORS.rst1+
Asearx/engines/pdbe.py109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml7+++++++
Atests/unit/engines/test_pdbe.py109+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 226 insertions(+), 0 deletions(-)

diff --git a/AUTHORS.rst b/AUTHORS.rst @@ -59,3 +59,4 @@ generally made searx better: - Harry Wood @harry-wood - Thomas Renard @threnard - Pydo `<https://github.com/pydo>`_ +- Athemis `<https://github.com/Athemis>`_ diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py @@ -0,0 +1,109 @@ +""" + PDBe (Protein Data Bank in Europe) + + @website https://www.ebi.ac.uk/pdbe + @provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html), + unlimited + @using-api yes + @results python dictionary (from json) + @stable yes + @parse url, title, content, img_src +""" + +from json import loads +from flask_babel import gettext + +categories = ['science'] + +hide_obsolete = False + +# status codes of unpublished entries +pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN'] +# url for api query +pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?' +# base url for results +pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}' +# link to preview image of structure +pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png' + + +def request(query, params): + + params['url'] = pdbe_solr_url + params['method'] = 'POST' + params['data'] = { + 'q': query, + 'wt': "json" # request response in parsable format + } + return params + + +def construct_body(result): + # set title + title = result['title'] + + # construct content body + content = """{title}<br />{authors} {journal} <strong>{volume}</strong>&nbsp;{page} ({year})""" + + # replace placeholders with actual content + try: + if result['journal']: + content = content.format( + title=result['citation_title'], + authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'], + page=result['journal_page'], year=result['citation_year']) + else: + content = content.format( + title=result['citation_title'], + authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year']) + img_src = pdbe_preview_url.format(pdb_id=result['pdb_id']) + except (KeyError): + content = None + img_src = None + + # construct url for preview image + try: + img_src = pdbe_preview_url.format(pdb_id=result['pdb_id']) + except (KeyError): + img_src = None + + return [title, content, img_src] + + +def response(resp): + + results = [] + json = loads(resp.text)['response']['docs'] + + # parse results + for result in json: + # catch obsolete entries and mark them accordingly + if result['status'] in pdb_unpublished_codes: + continue + if hide_obsolete: + continue + if result['status'] == 'OBS': + # expand title to add some sort of warning message + title = gettext('{title}&nbsp;(OBSOLETE)').format(title=result['title']) + superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by']) + + # since we can't construct a proper body from the response, we'll make up our own + msg_superseded = gettext("This entry has been superseded by") + content = '<em>{msg_superseded} \<a href="{url}">{pdb_id}</a></em>'.format( + msg_superseded=msg_superseded, + url=superseded_url, + pdb_id=result['superseded_by'], ) + + # obsoleted entries don't have preview images + img_src = None + else: + title, content, img_src = construct_body(result) + + results.append({ + 'url': pdbe_entry_url.format(pdb_id=result['pdb_id']), + 'title': title, + 'content': content, + 'img_src': img_src + }) + + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -339,6 +339,13 @@ engines: disabled : True shortcut : or + - name : pdbe + engine : pdbe + shortcut : pdb +# Hide obsolete PDB entries. +# Default is not to hide obsolete structures +# hide_obsolete : False + - name : photon engine : photon shortcut : ph diff --git a/tests/unit/engines/test_pdbe.py b/tests/unit/engines/test_pdbe.py @@ -0,0 +1,109 @@ +import mock +from collections import defaultdict +from searx.engines import pdbe +from searx.testing import SearxTestCase + + +class TestPdbeEngine(SearxTestCase): + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + params = pdbe.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue('ebi.ac.uk' in params['url']) + self.assertTrue('data' in params) + self.assertTrue('q' in params['data']) + self.assertTrue(query in params['data']['q']) + self.assertTrue('wt' in params['data']) + self.assertTrue('json' in params['data']['wt']) + self.assertTrue('method' in params) + self.assertTrue(params['method'] == 'POST') + + def test_response(self): + self.assertRaises(AttributeError, pdbe.response, None) + self.assertRaises(AttributeError, pdbe.response, []) + self.assertRaises(AttributeError, pdbe.response, '') + self.assertRaises(AttributeError, pdbe.response, '[]') + + json = """ +{ + "response": { + "docs": [ + { + "citation_title": "X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.", + "citation_year": 1993, + "entry_author_list": [ + "Conti E, Moser C, Rizzi M, Mattevi A, Lionetti C, Coda A, Ascenzi P, Brunori M, Bolognesi M" + ], + "journal": "J. Mol. Biol.", + "journal_page": "498-508", + "journal_volume": "233", + "pdb_id": "2fal", + "status": "REL", + "title": "X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES" + } + ], + "numFound": 1, + "start": 0 + }, + "responseHeader": { + "QTime": 0, + "params": { + "q": "2fal", + "wt": "json" + }, + "status": 0 + } +} +""" + + response = mock.Mock(text=json) + results = pdbe.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], + 'X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES') + self.assertEqual(results[0]['url'], pdbe.pdbe_entry_url.format(pdb_id='2fal')) + self.assertEqual(results[0]['img_src'], pdbe.pdbe_preview_url.format(pdb_id='2fal')) + self.assertTrue('Conti E' in results[0]['content']) + self.assertTrue('X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.' in + results[0]['content']) + self.assertTrue('1993' in results[0]['content']) + + # Testing proper handling of PDB entries marked as obsolete + json = """ +{ + "response": { + "docs": [ + { + "citation_title": "Obsolete entry test", + "citation_year": 2016, + "entry_author_list": ["Doe J"], + "journal": "J. Obs.", + "journal_page": "1-2", + "journal_volume": "1", + "pdb_id": "xxxx", + "status": "OBS", + "title": "OBSOLETE ENTRY TEST", + "superseded_by": "yyyy" + } + ], + "numFound": 1, + "start": 0 + }, + "responseHeader": { + "QTime": 0, + "params": { + "q": "xxxx", + "wt": "json" + }, + "status": 0 + } +} +""" + response = mock.Mock(text=json) + results = pdbe.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'OBSOLETE ENTRY TEST&nbsp;(OBSOLETE)') + self.assertTrue(results[0]['content'].startswith('<em>This entry has been superseded by'))