diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParser.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParser.java index 1e8e8af9edf74001c7f1b01d813870b5ec6aa3a0..89338e984780ac2639d829a14756557d85e11706 100644 --- a/annotation/src/main/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParser.java +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParser.java @@ -642,15 +642,7 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi if (drug == null) { logger.warn("Cannot find drug that should be there: " + string); } else { - boolean targets = false; - for (Target target : drug.getTargets()) { - for (MiriamData gene : target.getGenes()) { - if (gene.equals(targetMiriamData)) { - targets = true; - } - } - } - if (targets) { + if (isDrugContainTarget(targetMiriamData, drug)) { result.add(drug); } else { logger.debug("Skipping drug that doesn't target required target. Drug name: " + drug.getName() @@ -665,6 +657,18 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi } } + boolean isDrugContainTarget(MiriamData targetMiriamData, Drug drug) { + boolean targets = false; + for (Target target : drug.getTargets()) { + for (MiriamData gene : target.getGenes()) { + if (gene.equals(targetMiriamData)) { + targets = true; + } + } + } + return targets; + } + /** * Returns list of drugs that target element (target) identified be drugbank * identifier. @@ -698,25 +702,13 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi throw new DrugSearchException("Problematic web page for target: " + drugbankTarget + "(" + hgncTarget + ")"); } - int protienLinkPosition = page.indexOf("/polypeptides/"); - // sometimes there might not be an element - if (protienLinkPosition >= 0) { - protienLinkPosition = protienLinkPosition + "/polypeptides/".length(); // 20; - int j = page.indexOf('"', protienLinkPosition); - String uniprotId = page.substring(protienLinkPosition, j); - MiriamData uniprotMiriam = new MiriamData(MiriamType.UNIPROT, uniprotId); - MiriamData hgncMiriam = uniprotAnnotator.uniProtToHgnc(uniprotMiriam); - if (hgncMiriam != null) hgncMiriam.setAnnotator(null); - if (hgncMiriam == null || !hgncMiriam.equals(hgncTarget)) { - logger.debug("Invalid target found. Expected " + hgncTarget + ", but found: " + hgncMiriam + " (" - + uniprotMiriam + ")"); - return drugNames; - } - - } else { + if (!isValidTargetPage(page)) { logger.warn("Invalid target found. No protein data available."); return drugNames; } + if (!targetPageMatchTarget(page, hgncTarget)) { + return drugNames; + } int organismPosition = page.indexOf("Organism</dt><dd class=\"col-md-10 col-sm-8\">"); if (organismPosition >= 0) { @@ -743,6 +735,38 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi } } + /** + * Checks if the content of the page matches our expectations about target. + * Sometimes when we search for targets drugbank returns something similar to + * the target and we need to filter it out (here is an example for TF targets: + * https://www.drugbank.ca/unearth/q?utf8=%E2%9C%93&searcher=targets&query=TF) + * + * @param page - target webpage content + * @param hgncTarget - HGNC identifier that should match the target + * @return + * @throws UniprotSearchException - when there is problem with translating HGNC<->uniprot + */ + boolean targetPageMatchTarget(String page, MiriamData hgncTarget) throws UniprotSearchException { + int protienLinkPosition = page.indexOf("/polypeptides/"); + protienLinkPosition = protienLinkPosition + "/polypeptides/".length(); // 20; + int j = page.indexOf('"', protienLinkPosition); + String uniprotId = page.substring(protienLinkPosition, j); + MiriamData uniprotMiriam = new MiriamData(MiriamType.UNIPROT, uniprotId); + MiriamData hgncMiriam = uniprotAnnotator.uniProtToHgnc(uniprotMiriam); + if (hgncMiriam != null) + hgncMiriam.setAnnotator(null); + if (hgncMiriam == null || !hgncMiriam.equals(hgncTarget)) { + logger.debug( + "Invalid target found. Expected " + hgncTarget + ", but found: " + hgncMiriam + " (" + uniprotMiriam + ")"); + return false; + } + return true; + } + + private boolean isValidTargetPage(String page) { + return page.indexOf("/polypeptides/") >= 0; + } + /** * @return the uniprotAnnotator * @see #uniprotAnnotator diff --git a/annotation/src/test/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParserTest.java b/annotation/src/test/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParserTest.java index 56d4ff56bef1e7b78ffbfbd840569ab353b38160..62801c46e84176865ad929a68e1a8631dd5174f7 100644 --- a/annotation/src/test/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParserTest.java +++ b/annotation/src/test/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParserTest.java @@ -96,9 +96,9 @@ public class DrugbankHTMLParserTest extends AnnotationTestFunctions { public void testFindAfegostat() throws Exception { try { // finding synonym - Drug rapamycinDrug = drugBankHTMLParser.findDrug("Afegostat"); - assertNotNull(rapamycinDrug); - assertFalse(rapamycinDrug.getApproved()); + Drug afegostatDrug = drugBankHTMLParser.findDrug("Afegostat"); + assertNotNull(afegostatDrug); + assertFalse(afegostatDrug.getApproved()); } catch (Exception e) { e.printStackTrace(); @@ -269,13 +269,39 @@ public class DrugbankHTMLParserTest extends AnnotationTestFunctions { } @Test - public void testFindDrugByTFTarget() throws Exception { + public void testTargetPageMatchTargetForTFProtein() throws Exception { try { - List<Drug> drugs = drugBankHTMLParser.getDrugListByTarget(new MiriamData(MiriamType.HGNC_SYMBOL, "TF")); - assertNotNull(drugs); - for (Drug drug : drugs) { - assertFalse(drug.getName().equalsIgnoreCase("Iron saccharate")); - } + MiriamData tfMiriam = new MiriamData(MiriamType.HGNC_SYMBOL, "TF"); + String content = drugBankHTMLParser.getWebPageDownloader().getFromNetwork("https://www.drugbank.ca/biodb/bio_entities/BE0000510"); + assertTrue(drugBankHTMLParser.targetPageMatchTarget(content, tfMiriam)); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testTargetPageMatchTargetForNotMatchingTFProtein() throws Exception { + try { + MiriamData tfMiriam = new MiriamData(MiriamType.HGNC_SYMBOL, "TF"); + String content = drugBankHTMLParser.getWebPageDownloader().getFromNetwork("https://www.drugbank.ca/biodb/bio_entities/BE0002803"); + assertFalse(drugBankHTMLParser.targetPageMatchTarget(content, tfMiriam)); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testIsDrugContainTarget() throws Exception { + try { + Drug afegostat = drugBankHTMLParser.findDrug("Urokinase"); + MiriamData plgMiriam = new MiriamData(MiriamType.HGNC_SYMBOL, "PLG"); + MiriamData tfMiriam = new MiriamData(MiriamType.HGNC_SYMBOL, "TF"); + assertTrue(drugBankHTMLParser.isDrugContainTarget(plgMiriam, afegostat)); + assertFalse(drugBankHTMLParser.isDrugContainTarget(tfMiriam, afegostat)); } catch (Exception e) { e.printStackTrace();