diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/ModelAnnotator.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/ModelAnnotator.java index fb21e1e31b1a74f5dfe67cd9121e222938112d03..b82f08768c430eb57a03f6a152bc2e8c9fa391bc 100644 --- a/annotation/src/main/java/lcsb/mapviewer/annotation/services/ModelAnnotator.java +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/ModelAnnotator.java @@ -23,6 +23,7 @@ import lcsb.mapviewer.annotation.services.annotators.EnsemblAnnotator; import lcsb.mapviewer.annotation.services.annotators.EntrezAnnotator; import lcsb.mapviewer.annotation.services.annotators.GoAnnotator; import lcsb.mapviewer.annotation.services.annotators.HgncAnnotator; +import lcsb.mapviewer.annotation.services.annotators.PdbAnnotator; import lcsb.mapviewer.annotation.services.annotators.ReconAnnotator; import lcsb.mapviewer.annotation.services.annotators.UniprotAnnotator; import lcsb.mapviewer.common.IProgressUpdater; @@ -85,6 +86,12 @@ public class ModelAnnotator { */ @Autowired private UniprotAnnotator uniprotAnnotator; + + /** + * PDB annotator. + */ + @Autowired + private PdbAnnotator pdbAnnotator; /** * Recon annotator. @@ -142,6 +149,7 @@ public class ModelAnnotator { addAnnotator(uniprotAnnotator); addAnnotator(goAnnotator); addAnnotator(hgncAnnotator); + addAnnotator(pdbAnnotator); addAnnotator(reconAnnotator); addAnnotator(entrezAnnotator); addAnnotator(ensemblAnnotator); diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/ElementAnnotator.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/ElementAnnotator.java index dde647a3bf294e8e2aaeeb1e3744ae451ee458c8..7c28ab4f17f65dc43031067f609509afd2d17ed2 100644 --- a/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/ElementAnnotator.java +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/ElementAnnotator.java @@ -40,7 +40,7 @@ public abstract class ElementAnnotator extends CachableInterface { private final List<Class<? extends BioEntity>> validClasses = new ArrayList<>(); /** - * Should be this annotator used as a default annotatior. + * Should be this annotator used as a default annotator. */ private boolean isDefault = false; diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/PdbAnnotator.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/PdbAnnotator.java new file mode 100644 index 0000000000000000000000000000000000000000..4c2d0110f86ab6b7db470889fc34cba371caff6c --- /dev/null +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/PdbAnnotator.java @@ -0,0 +1,260 @@ +package lcsb.mapviewer.annotation.services.annotators; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.Map; +import java.util.List; + +import org.apache.log4j.Logger; +import org.springframework.beans.factory.annotation.Autowired; + +import lcsb.mapviewer.annotation.cache.GeneralCacheInterface; +import lcsb.mapviewer.annotation.cache.SourceNotAvailable; +import lcsb.mapviewer.annotation.cache.WebPageDownloader; +import lcsb.mapviewer.annotation.services.ExternalServiceStatus; +import lcsb.mapviewer.annotation.services.ExternalServiceStatusType; +import lcsb.mapviewer.annotation.services.IExternalService; +import lcsb.mapviewer.annotation.services.WrongResponseCodeIOException; +import lcsb.mapviewer.common.exception.InvalidArgumentException; +import lcsb.mapviewer.model.map.BioEntity; +import lcsb.mapviewer.model.map.MiriamData; +import lcsb.mapviewer.model.map.MiriamType; +//import lcsb.mapviewer.model.map.species.Gene; +import lcsb.mapviewer.model.map.species.Protein; +import lcsb.mapviewer.model.map.species.Rna; +import lcsb.mapviewer.modelutils.map.ElementUtils; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; + +/** + * This is a class that implements a backend to the EBI's PDB SIFTS REST API mapping. + * + * @author David Hoksza + * + */ +/** + * @author hellb + * + */ +public class PdbAnnotator extends ElementAnnotator implements IExternalService { + + /** + * Default class logger. + */ + private static Logger logger = Logger.getLogger(PdbAnnotator.class); + + /** + * Class used for some simple operations on {@link BioEntity} elements. + */ + private ElementUtils elementUtils = new ElementUtils(); + + /** + * Service used for annotation of proteins using {@link MiriamType#UNIPROT + * uniprot}. + */ + @Autowired + private UniprotAnnotator uniprotAnnotator; + + + /** + * Default constructor. + */ + public PdbAnnotator() { + super(PdbAnnotator.class, new Class[] { Protein.class, Rna.class }, false); + } + + @Override + public ExternalServiceStatus getServiceStatus() { + ExternalServiceStatus status = new ExternalServiceStatus(getCommonName(), getUrl()); + + GeneralCacheInterface cacheCopy = getCache(); + this.setCache(null); + + try { + Collection<MiriamData> pdbMiriamData = uniProtToPdb(new MiriamData(MiriamType.UNIPROT, "P29373")); + + if (pdbMiriamData.size() > 0){ + if (pdbMiriamData.iterator().next().getResource() != null) { //TODO - is this id? + status.setStatus(ExternalServiceStatusType.OK); + } else { + status.setStatus(ExternalServiceStatusType.CHANGED); + } + } + else { + status.setStatus(ExternalServiceStatusType.DOWN); + } + } catch (Exception e) { + logger.error(status.getName() + " is down", e); + status.setStatus(ExternalServiceStatusType.DOWN); + } + this.setCache(cacheCopy); + return status; + } + + public MiriamData getUnitProt(BioEntity bioEntity) { + for (MiriamData md : bioEntity.getMiriamData()) { + if (md.getDataType().equals(MiriamType.UNIPROT)) { + return md; + } + } + return null; + } + + @Override + public void annotateElement(BioEntity bioEntity) throws AnnotatorException { + if (isAnnotatable(bioEntity)) { + MiriamData md = getUnitProt(bioEntity); + if (md == null) { + uniprotAnnotator.annotateElement(bioEntity); + md = getUnitProt(bioEntity); + } + + if (md == null) { + return; + } + + try { + Set<MiriamData> annotations = (Set<MiriamData>)uniProtToPdb(md); + if (annotations.size() == 0) { + logger.warn(elementUtils.getElementTag(bioEntity) + " No PDB mapping for UniProt ID: " + md.getResource()); + } else { + bioEntity.addMiriamData(annotations); + } + } catch (WrongResponseCodeIOException exception) { + logger.warn("Response error when trying to find PDB mapping for UniProt ID: " + md.getResource()); + } catch (IOException exception) { + throw new AnnotatorException(exception); + } + } + } + + /** + * Returns url to JSON with best mapping PDB entries given the UniProt entry. + * + * @param uniprotId + * uniprot identifier + * @return url with best mapping PDB entries to the UniProt entry + */ + private String getPdbMappingUrl(String uniprotId) { + return "https://www.ebi.ac.uk/pdbe/api/mappings/best_structures/" + uniprotId; + } + + /** + * Parse UniProt-to-PDB mapping JSON file. + * {@link MiriamType#PDB} and returns them. + * + * @param pageContentJson + * JSON file with the UniProt to PDB mapping + * @return set of PDB identifiers found on the webpage + */ + private Collection<MiriamData> processMappingData(String pageContentJson) { + Collection<MiriamData> result = new HashSet<MiriamData>(); + Gson g = new Gson(); + java.lang.reflect.Type t = new TypeToken<Map<String, List<PdbBestMappingEntry>>>(){}.getType(); + Map<String, List<PdbBestMappingEntry>> m = g.fromJson(pageContentJson, t); + if (m != null){ + for (String key : m.keySet()){ + for(PdbBestMappingEntry e : m.get(key)) { + result.add(new MiriamData(MiriamType.PDB, e.pdb_id)); + } + } + } + + return result; + } + + @Override + public Object refreshCacheQuery(Object query) throws SourceNotAvailable { + String name; + String result = null; + if (query instanceof String) { + name = (String) query; + if (name.startsWith("http")) { + try { + result = getWebPageContent(name); + } catch (IOException e) { + throw new SourceNotAvailable(e); + } + } else { + throw new InvalidArgumentException("Don't know what to do with query: " + query); + } + } else { + throw new InvalidArgumentException("Don't know what to do with class: " + query.getClass()); + } + return result; + } + + + /** + * Tests if given input string is a valid JSON document. + * + * @param json + * Input document as a string. + * @return True or false dependent on whether the input string is a valid JSON document + */ + public static boolean isJson(String json) { + Gson gson = new Gson(); + try { + gson.fromJson(json, Object.class); + return true; + } catch (com.google.gson.JsonSyntaxException ex) { + return false; + } + } + + /** + * Transform UniProt identifier into PDB IDs. + * + * @param uniprot + * {@link MiriamData} with UniProt identifier + * @return JSON String with mapping. + * thrown when there is a problem with accessing external database + */ + public Collection<MiriamData> uniProtToPdb(MiriamData uniprot) throws IOException /*throws UniprotSearchException*/ { + if (uniprot == null) { + return null; + } + + if (!MiriamType.UNIPROT.equals(uniprot.getDataType())) { + throw new InvalidArgumentException(MiriamType.UNIPROT + " expected."); + } + + String accessUrl = getPdbMappingUrl(uniprot.getResource()); +// try { + String json = getWebPageContent(accessUrl); + if (!isJson(json)) { + return null; + } else { + return processMappingData(json); + } + +// } catch (IOException e) { +// throw new UniprotSearchException("Problem with accessing mapping data: ", e); //TODO +// } + + } + + @Override + public String getCommonName() { + return MiriamType.PDB.getCommonName(); + } + + @Override + public String getUrl() { + return MiriamType.PDB.getDbHomepage(); + } + + @Override + protected WebPageDownloader getWebPageDownloader() { + return super.getWebPageDownloader(); + } + + @Override + protected void setWebPageDownloader(WebPageDownloader webPageDownloader) { + super.setWebPageDownloader(webPageDownloader); + } + +} diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/PdbBestMappingEntry.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/PdbBestMappingEntry.java new file mode 100644 index 0000000000000000000000000000000000000000..00f63e8ddd84ed391415351cf9cbd8126ee9b691 --- /dev/null +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/PdbBestMappingEntry.java @@ -0,0 +1,21 @@ +package lcsb.mapviewer.annotation.services.annotators; + +/** + * Structure of the PDB entries returned by the PDBe REST API "Best Structures" + * + * @author David Hoksza + * + */ +public class PdbBestMappingEntry { + public String end; + public String chain_id; + public String experimental_method; + public String pdb_id; + public int start; + public int unp_end; + public double coverage; + public int unp_start; + public double resolution; + public int tax_id; + +} diff --git a/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/AllAnnotatorTests.java b/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/AllAnnotatorTests.java index 71dae4246b80c0f32a5ea1543bb31b73120d149d..218bd397eec6b1d0c43b14046b61f4db209aa6ed 100644 --- a/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/AllAnnotatorTests.java +++ b/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/AllAnnotatorTests.java @@ -13,6 +13,7 @@ import org.junit.runners.Suite.SuiteClasses; EntrezAnnotatorTest.class, // GoAnnotatorTest.class, // HgncAnnotatorTest.class, // + PdbAnnotatorTest.class, // ReconAnnotatorTest.class, // UniprotAnnotatorTest.class, // }) diff --git a/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/PdbAnnotatorTest.java b/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/PdbAnnotatorTest.java new file mode 100644 index 0000000000000000000000000000000000000000..f50382c6fb8e2512988e7bc90dbe1a3b964efece --- /dev/null +++ b/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/PdbAnnotatorTest.java @@ -0,0 +1,194 @@ +package lcsb.mapviewer.annotation.services.annotators; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.when; + +import java.io.IOException; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.springframework.beans.factory.annotation.Autowired; + +import lcsb.mapviewer.annotation.AnnotationTestFunctions; +import lcsb.mapviewer.annotation.cache.GeneralCacheInterface; +import lcsb.mapviewer.annotation.cache.GeneralCacheWithExclusion; +import lcsb.mapviewer.annotation.cache.WebPageDownloader; +import lcsb.mapviewer.annotation.services.ExternalServiceStatusType; +import lcsb.mapviewer.common.exception.InvalidArgumentException; +import lcsb.mapviewer.model.map.MiriamData; +import lcsb.mapviewer.model.map.MiriamType; +import lcsb.mapviewer.model.map.species.GenericProtein; +import lcsb.mapviewer.model.map.species.Species; + +public class PdbAnnotatorTest extends AnnotationTestFunctions { + + @Autowired + PdbAnnotator pdbAnnotator; + + @Autowired + UniprotAnnotator uniprotAnnotator; + + @Before + public void setUp() throws Exception { + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void testAnnotate1() throws Exception { + try { + Species protein = new GenericProtein("id"); + protein.setName("P29373"); + //First, annotate protein by UniProt which is the used in the PDB annotator + uniprotAnnotator.annotateElement(protein); + int cntAnnotations1 = protein.getMiriamData().size(); + pdbAnnotator.annotateElement(protein); + int cntAnnotations2 = protein.getMiriamData().size(); + + assertTrue(cntAnnotations2 > cntAnnotations1); + + boolean pdb = false; + + for (MiriamData md : protein.getMiriamData()) { + if (md.getDataType().equals(MiriamType.PDB)) { + pdb = true; + break; + } + } + assertTrue("No PDB annotation extracted from pdb annotator", pdb); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testAnnotateNotUniprotAnnodated() throws Exception { + try { + Species protein = new GenericProtein("id"); + protein.setName("P29373"); + pdbAnnotator.annotateElement(protein); + + assertTrue("UniProt annotation in PDB annotator failed", protein.getMiriamData().size() > 0); + + boolean pdb = false; + + for (MiriamData md : protein.getMiriamData()) { + if (md.getDataType().equals(MiriamType.PDB)) { + pdb = true; + break; + } + } + assertTrue("No PDB annotation extracted from PDB annotator", pdb); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testAnnotateInvalidUniprot() throws Exception { + try { + Species protein = new GenericProtein("id"); + protein.setName("bla"); + pdbAnnotator.annotateElement(protein); + + assertEquals(0, protein.getMiriamData().size()); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testAnnotateValidUniprotInexistingPdb() throws Exception { + try { + Species protein = new GenericProtein("id"); + protein.setName("Q88VP8"); + pdbAnnotator.annotateElement(protein); + + boolean pdb = false; + for (MiriamData md : protein.getMiriamData()) { + if (md.getDataType().equals(MiriamType.PDB)) { + pdb = true; + break; + } + } + assertTrue("PDB mapping found for structure for which no should be available", !pdb); + assertEquals(1, getWarnings().size()); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testSimulateDownStatus() throws Exception { + WebPageDownloader downloader = pdbAnnotator.getWebPageDownloader(); + try { + WebPageDownloader mockDownloader = Mockito.mock(WebPageDownloader.class); + when(mockDownloader.getFromNetwork(anyString())).thenReturn(""); + pdbAnnotator.setWebPageDownloader(mockDownloader); + assertEquals(ExternalServiceStatusType.DOWN, pdbAnnotator.getServiceStatus().getStatus()); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } finally { + pdbAnnotator.setWebPageDownloader(downloader); + } + } + + @Test + public void testSimulateChangedStatus() throws Exception { + WebPageDownloader downloader = pdbAnnotator.getWebPageDownloader(); + try { + WebPageDownloader mockDownloader = Mockito.mock(WebPageDownloader.class); + when(mockDownloader.getFromNetwork(anyString())).thenReturn("{\"P29373\": [{\"xxx\": 140}]}"); + pdbAnnotator.setWebPageDownloader(mockDownloader); + assertEquals(ExternalServiceStatusType.CHANGED, pdbAnnotator.getServiceStatus().getStatus()); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } finally { + pdbAnnotator.setWebPageDownloader(downloader); + } + } + + @Test + public void testSimulateInvalidJson() throws Exception { + WebPageDownloader downloader = pdbAnnotator.getWebPageDownloader(); + try { + Species protein = new GenericProtein("id"); + protein.setName("Q88VP8"); + uniprotAnnotator.annotateElement(protein); + int cntAnnotations1 = protein.getMiriamData().size(); + + WebPageDownloader mockDownloader = Mockito.mock(WebPageDownloader.class); + when(mockDownloader.getFromNetwork(anyString())).thenReturn("\"P29373\": [{\"xxx\": 140}]}"); + pdbAnnotator.setWebPageDownloader(mockDownloader); + + pdbAnnotator.annotateElement(protein); + int cntAnnotations2 = protein.getMiriamData().size(); + assertTrue(cntAnnotations1 == cntAnnotations2); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } finally { + pdbAnnotator.setWebPageDownloader(downloader); + } + } + +} diff --git a/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java b/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java index cd8b930f1703058b08f2138e4688354fc45bfa4a..31fa7c1338738febf5798debcde11f33fafa604a 100644 --- a/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java +++ b/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java @@ -265,6 +265,14 @@ public enum MiriamType { "http://www.pantherdb.org/", // new String[] { "urn:miriam:panther.family", "urn:miriam:panther" }, // new Class<?>[] { Protein.class, Gene.class, Rna.class }, "MIR:00000060"), // + + /** + * PDB: http://www.pdbe.org/. + */ + PDB("Protein Data Bank", // + "http://www.pdbe.org/", // + "urn:miriam:pdb", // + new Class<?>[] { Protein.class, Gene.class, Rna.class }, "MIR:00000020"), /** * Protein Family Database: http://pfam.xfam.org/. diff --git a/persist/src/db/11.0.1/fix_db_20171307.sql b/persist/src/db/11.0.1/fix_db_20171307.sql new file mode 100644 index 0000000000000000000000000000000000000000..94523a2c4d08e87d3b5af2767a02ebd675043bea --- /dev/null +++ b/persist/src/db/11.0.1/fix_db_20171307.sql @@ -0,0 +1,2 @@ +DELETE FROM cache_type WHERE classname = 'lcsb.mapviewer.annotation.services.annotators.PdbAnnotator' +INSERT INTO cache_type(validity, classname) VALUES (365, 'lcsb.mapviewer.annotation.services.annotators.PdbAnnotator') \ No newline at end of file