Browse Source

initial commit of pdbe engine

Adds support for queries to the Protein Data Bank Europe (PDBe).
Alexander Minges 8 years ago
parent
commit
3c5883408c
4 changed files with 226 additions and 0 deletions
  1. 1 0
      AUTHORS.rst
  2. 109 0
      searx/engines/pdbe.py
  3. 7 0
      searx/settings.yml
  4. 109 0
      tests/unit/engines/test_pdbe.py

+ 1 - 0
AUTHORS.rst

@@ -59,3 +59,4 @@ generally made searx better:
 - Harry Wood @harry-wood
 - Thomas Renard @threnard
 - Pydo `<https://github.com/pydo>`_
+- Athemis `<https://github.com/Athemis>`_

+ 109 - 0
searx/engines/pdbe.py

@@ -0,0 +1,109 @@
+"""
+ PDBe (Protein Data Bank in Europe)
+
+ @website       https://www.ebi.ac.uk/pdbe
+ @provide-api   yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
+                unlimited
+ @using-api     yes
+ @results       python dictionary (from json)
+ @stable        yes
+ @parse         url, title, content, img_src
+"""
+
+from json import loads
+from flask_babel import gettext
+
+categories = ['science']
+
+hide_obsolete = False
+
+# status codes of unpublished entries
+pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN']
+# url for api query
+pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?'
+# base url for results
+pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}'
+# link to preview image of structure
+pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png'
+
+
+def request(query, params):
+
+    params['url'] = pdbe_solr_url
+    params['method'] = 'POST'
+    params['data'] = {
+        'q': query,
+        'wt': "json"  # request response in parsable format
+    }
+    return params
+
+
+def construct_body(result):
+    # set title
+    title = result['title']
+
+    # construct content body
+    content = """{title}<br />{authors} {journal} <strong>{volume}</strong>&nbsp;{page} ({year})"""
+
+    # replace placeholders with actual content
+    try:
+        if result['journal']:
+            content = content.format(
+                title=result['citation_title'],
+                authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'],
+                page=result['journal_page'], year=result['citation_year'])
+        else:
+            content = content.format(
+                title=result['citation_title'],
+                authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year'])
+        img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
+    except (KeyError):
+        content = None
+        img_src = None
+
+    # construct url for preview image
+    try:
+        img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
+    except (KeyError):
+        img_src = None
+
+    return [title, content, img_src]
+
+
+def response(resp):
+
+    results = []
+    json = loads(resp.text)['response']['docs']
+
+    # parse results
+    for result in json:
+        # catch obsolete entries and mark them accordingly
+        if result['status'] in pdb_unpublished_codes:
+            continue
+        if hide_obsolete:
+            continue
+        if result['status'] == 'OBS':
+            # expand title to add some sort of warning message
+            title = gettext('{title}&nbsp;(OBSOLETE)').format(title=result['title'])
+            superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by'])
+
+            # since we can't construct a proper body from the response, we'll make up our own
+            msg_superseded = gettext("This entry has been superseded by")
+            content = '<em>{msg_superseded} \<a href="{url}">{pdb_id}</a></em>'.format(
+                msg_superseded=msg_superseded,
+                url=superseded_url,
+                pdb_id=result['superseded_by'], )
+
+            # obsoleted entries don't have preview images
+            img_src = None
+        else:
+            title, content, img_src = construct_body(result)
+
+        results.append({
+            'url': pdbe_entry_url.format(pdb_id=result['pdb_id']),
+            'title': title,
+            'content': content,
+            'img_src': img_src
+        })
+
+    return results

+ 7 - 0
searx/settings.yml

@@ -339,6 +339,13 @@ engines:
     disabled : True
     shortcut : or
 
+  - name : pdbe
+    engine : pdbe
+    shortcut : pdb
+# Hide obsolete PDB entries.
+# Default is not to hide obsolete structures
+#    hide_obsolete : False
+
   - name : photon
     engine : photon
     shortcut : ph

+ 109 - 0
tests/unit/engines/test_pdbe.py

@@ -0,0 +1,109 @@
+import mock
+from collections import defaultdict
+from searx.engines import pdbe
+from searx.testing import SearxTestCase
+
+
+class TestPdbeEngine(SearxTestCase):
+    def test_request(self):
+        query = 'test_query'
+        dicto = defaultdict(dict)
+        params = pdbe.request(query, dicto)
+        self.assertTrue('url' in params)
+        self.assertTrue('ebi.ac.uk' in params['url'])
+        self.assertTrue('data' in params)
+        self.assertTrue('q' in params['data'])
+        self.assertTrue(query in params['data']['q'])
+        self.assertTrue('wt' in params['data'])
+        self.assertTrue('json' in params['data']['wt'])
+        self.assertTrue('method' in params)
+        self.assertTrue(params['method'] == 'POST')
+
+    def test_response(self):
+        self.assertRaises(AttributeError, pdbe.response, None)
+        self.assertRaises(AttributeError, pdbe.response, [])
+        self.assertRaises(AttributeError, pdbe.response, '')
+        self.assertRaises(AttributeError, pdbe.response, '[]')
+
+        json = """
+{
+  "response": {
+    "docs": [
+      {
+        "citation_title": "X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.",
+        "citation_year": 1993,
+        "entry_author_list": [
+          "Conti E, Moser C, Rizzi M, Mattevi A, Lionetti C, Coda A, Ascenzi P, Brunori M, Bolognesi M"
+        ],
+        "journal": "J. Mol. Biol.",
+        "journal_page": "498-508",
+        "journal_volume": "233",
+        "pdb_id": "2fal",
+        "status": "REL",
+        "title": "X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES"
+      }
+    ],
+    "numFound": 1,
+    "start": 0
+  },
+  "responseHeader": {
+    "QTime": 0,
+    "params": {
+      "q": "2fal",
+      "wt": "json"
+    },
+    "status": 0
+  }
+}
+"""
+
+        response = mock.Mock(text=json)
+        results = pdbe.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'],
+                         'X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES')
+        self.assertEqual(results[0]['url'], pdbe.pdbe_entry_url.format(pdb_id='2fal'))
+        self.assertEqual(results[0]['img_src'], pdbe.pdbe_preview_url.format(pdb_id='2fal'))
+        self.assertTrue('Conti E' in results[0]['content'])
+        self.assertTrue('X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.' in
+                        results[0]['content'])
+        self.assertTrue('1993' in results[0]['content'])
+
+        # Testing proper handling of PDB entries marked as obsolete
+        json = """
+{
+  "response": {
+    "docs": [
+      {
+        "citation_title": "Obsolete entry test",
+        "citation_year": 2016,
+        "entry_author_list": ["Doe J"],
+        "journal": "J. Obs.",
+        "journal_page": "1-2",
+        "journal_volume": "1",
+        "pdb_id": "xxxx",
+        "status": "OBS",
+        "title": "OBSOLETE ENTRY TEST",
+        "superseded_by": "yyyy"
+      }
+    ],
+    "numFound": 1,
+    "start": 0
+  },
+  "responseHeader": {
+    "QTime": 0,
+    "params": {
+      "q": "xxxx",
+      "wt": "json"
+    },
+    "status": 0
+  }
+}
+"""
+        response = mock.Mock(text=json)
+        results = pdbe.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['title'], 'OBSOLETE ENTRY TEST&nbsp;(OBSOLETE)')
+        self.assertTrue(results[0]['content'].startswith('<em>This entry has been superseded by'))