commit: e2245611d78614555f59d0fe2cd4b94ce0b39b12
parent: 8b10eb6fe197cf136fa26f86e17dee1ffb851773
Author: Adam Tauber <asciimoo@gmail.com>
Date: Thu, 13 Oct 2016 11:19:11 +0200
Merge pull request #724 from Athemis/master
[engine] PDBe (Protein Data Bank Europe)
Diffstat:
4 files changed, 226 insertions(+), 0 deletions(-)
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -59,3 +59,4 @@ generally made searx better:
- Harry Wood @harry-wood
- Thomas Renard @threnard
- Pydo `<https://github.com/pydo>`_
+- Athemis `<https://github.com/Athemis>`_
diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py
@@ -0,0 +1,109 @@
+"""
+ PDBe (Protein Data Bank in Europe)
+
+ @website https://www.ebi.ac.uk/pdbe
+ @provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
+ unlimited
+ @using-api yes
+ @results python dictionary (from json)
+ @stable yes
+ @parse url, title, content, img_src
+"""
+
+from json import loads
+from flask_babel import gettext
+
+categories = ['science']
+
+hide_obsolete = False
+
+# status codes of unpublished entries
+pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN']
+# url for api query
+pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?'
+# base url for results
+pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}'
+# link to preview image of structure
+pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png'
+
+
+def request(query, params):
+
+ params['url'] = pdbe_solr_url
+ params['method'] = 'POST'
+ params['data'] = {
+ 'q': query,
+ 'wt': "json" # request response in parsable format
+ }
+ return params
+
+
+def construct_body(result):
+ # set title
+ title = result['title']
+
+ # construct content body
+ content = """{title}<br />{authors} {journal} <strong>{volume}</strong> {page} ({year})"""
+
+ # replace placeholders with actual content
+ try:
+ if result['journal']:
+ content = content.format(
+ title=result['citation_title'],
+ authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'],
+ page=result['journal_page'], year=result['citation_year'])
+ else:
+ content = content.format(
+ title=result['citation_title'],
+ authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year'])
+ img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
+ except (KeyError):
+ content = None
+ img_src = None
+
+ # construct url for preview image
+ try:
+ img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
+ except (KeyError):
+ img_src = None
+
+ return [title, content, img_src]
+
+
+def response(resp):
+
+ results = []
+ json = loads(resp.text)['response']['docs']
+
+ # parse results
+ for result in json:
+ # catch obsolete entries and mark them accordingly
+ if result['status'] in pdb_unpublished_codes:
+ continue
+ if hide_obsolete:
+ continue
+ if result['status'] == 'OBS':
+ # expand title to add some sort of warning message
+ title = gettext('{title} (OBSOLETE)').format(title=result['title'])
+ superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by'])
+
+ # since we can't construct a proper body from the response, we'll make up our own
+ msg_superseded = gettext("This entry has been superseded by")
+ content = '<em>{msg_superseded} \<a href="{url}">{pdb_id}</a></em>'.format(
+ msg_superseded=msg_superseded,
+ url=superseded_url,
+ pdb_id=result['superseded_by'], )
+
+ # obsoleted entries don't have preview images
+ img_src = None
+ else:
+ title, content, img_src = construct_body(result)
+
+ results.append({
+ 'url': pdbe_entry_url.format(pdb_id=result['pdb_id']),
+ 'title': title,
+ 'content': content,
+ 'img_src': img_src
+ })
+
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -339,6 +339,13 @@ engines:
disabled : True
shortcut : or
+ - name : pdbe
+ engine : pdbe
+ shortcut : pdb
+# Hide obsolete PDB entries.
+# Default is not to hide obsolete structures
+# hide_obsolete : False
+
- name : photon
engine : photon
shortcut : ph
diff --git a/tests/unit/engines/test_pdbe.py b/tests/unit/engines/test_pdbe.py
@@ -0,0 +1,109 @@
+import mock
+from collections import defaultdict
+from searx.engines import pdbe
+from searx.testing import SearxTestCase
+
+
+class TestPdbeEngine(SearxTestCase):
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ params = pdbe.request(query, dicto)
+ self.assertTrue('url' in params)
+ self.assertTrue('ebi.ac.uk' in params['url'])
+ self.assertTrue('data' in params)
+ self.assertTrue('q' in params['data'])
+ self.assertTrue(query in params['data']['q'])
+ self.assertTrue('wt' in params['data'])
+ self.assertTrue('json' in params['data']['wt'])
+ self.assertTrue('method' in params)
+ self.assertTrue(params['method'] == 'POST')
+
+ def test_response(self):
+ self.assertRaises(AttributeError, pdbe.response, None)
+ self.assertRaises(AttributeError, pdbe.response, [])
+ self.assertRaises(AttributeError, pdbe.response, '')
+ self.assertRaises(AttributeError, pdbe.response, '[]')
+
+ json = """
+{
+ "response": {
+ "docs": [
+ {
+ "citation_title": "X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.",
+ "citation_year": 1993,
+ "entry_author_list": [
+ "Conti E, Moser C, Rizzi M, Mattevi A, Lionetti C, Coda A, Ascenzi P, Brunori M, Bolognesi M"
+ ],
+ "journal": "J. Mol. Biol.",
+ "journal_page": "498-508",
+ "journal_volume": "233",
+ "pdb_id": "2fal",
+ "status": "REL",
+ "title": "X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES"
+ }
+ ],
+ "numFound": 1,
+ "start": 0
+ },
+ "responseHeader": {
+ "QTime": 0,
+ "params": {
+ "q": "2fal",
+ "wt": "json"
+ },
+ "status": 0
+ }
+}
+"""
+
+ response = mock.Mock(text=json)
+ results = pdbe.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'],
+ 'X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES')
+ self.assertEqual(results[0]['url'], pdbe.pdbe_entry_url.format(pdb_id='2fal'))
+ self.assertEqual(results[0]['img_src'], pdbe.pdbe_preview_url.format(pdb_id='2fal'))
+ self.assertTrue('Conti E' in results[0]['content'])
+ self.assertTrue('X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.' in
+ results[0]['content'])
+ self.assertTrue('1993' in results[0]['content'])
+
+ # Testing proper handling of PDB entries marked as obsolete
+ json = """
+{
+ "response": {
+ "docs": [
+ {
+ "citation_title": "Obsolete entry test",
+ "citation_year": 2016,
+ "entry_author_list": ["Doe J"],
+ "journal": "J. Obs.",
+ "journal_page": "1-2",
+ "journal_volume": "1",
+ "pdb_id": "xxxx",
+ "status": "OBS",
+ "title": "OBSOLETE ENTRY TEST",
+ "superseded_by": "yyyy"
+ }
+ ],
+ "numFound": 1,
+ "start": 0
+ },
+ "responseHeader": {
+ "QTime": 0,
+ "params": {
+ "q": "xxxx",
+ "wt": "json"
+ },
+ "status": 0
+ }
+}
+"""
+ response = mock.Mock(text=json)
+ results = pdbe.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'OBSOLETE ENTRY TEST (OBSOLETE)')
+ self.assertTrue(results[0]['content'].startswith('<em>This entry has been superseded by'))