From 3eccf8c431d1f705bd441186ac331ad87ea69d11 Mon Sep 17 00:00:00 2001
From: Piotr Gawron <piotr.gawron@uni.lu>
Date: Mon, 10 Sep 2018 11:42:16 +0200
Subject: [PATCH] small refactor - methods extracted for better unit testing

---
 .../services/DrugbankHTMLParser.java          | 74 ++++++++++++-------
 .../services/DrugbankHTMLParserTest.java      | 44 ++++++++---
 2 files changed, 84 insertions(+), 34 deletions(-)

diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParser.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParser.java
index 1e8e8af9ed..89338e9847 100644
--- a/annotation/src/main/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParser.java
+++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParser.java
@@ -642,15 +642,7 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
         if (drug == null) {
           logger.warn("Cannot find drug that should be there: " + string);
         } else {
-          boolean targets = false;
-          for (Target target : drug.getTargets()) {
-            for (MiriamData gene : target.getGenes()) {
-              if (gene.equals(targetMiriamData)) {
-                targets = true;
-              }
-            }
-          }
-          if (targets) {
+          if (isDrugContainTarget(targetMiriamData, drug)) {
             result.add(drug);
           } else {
             logger.debug("Skipping drug that doesn't target required target. Drug name: " + drug.getName()
@@ -665,6 +657,18 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
     }
   }
 
+  boolean isDrugContainTarget(MiriamData targetMiriamData, Drug drug) {
+    boolean targets = false;
+    for (Target target : drug.getTargets()) {
+      for (MiriamData gene : target.getGenes()) {
+        if (gene.equals(targetMiriamData)) {
+          targets = true;
+        }
+      }
+    }
+    return targets;
+  }
+
   /**
    * Returns list of drugs that target element (target) identified be drugbank
    * identifier.
@@ -698,25 +702,13 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
         throw new DrugSearchException("Problematic web page for target: " + drugbankTarget + "(" + hgncTarget + ")");
       }
 
-      int protienLinkPosition = page.indexOf("/polypeptides/");
-      // sometimes there might not be an element
-      if (protienLinkPosition >= 0) {
-        protienLinkPosition = protienLinkPosition + "/polypeptides/".length(); // 20;
-        int j = page.indexOf('"', protienLinkPosition);
-        String uniprotId = page.substring(protienLinkPosition, j);
-        MiriamData uniprotMiriam = new MiriamData(MiriamType.UNIPROT, uniprotId);
-        MiriamData hgncMiriam = uniprotAnnotator.uniProtToHgnc(uniprotMiriam);
-        if (hgncMiriam != null) hgncMiriam.setAnnotator(null);
-        if (hgncMiriam == null || !hgncMiriam.equals(hgncTarget)) {
-          logger.debug("Invalid target found. Expected " + hgncTarget + ", but found: " + hgncMiriam + " ("
-              + uniprotMiriam + ")");
-          return drugNames;
-        }
-
-      } else {
+      if (!isValidTargetPage(page)) {
         logger.warn("Invalid target found. No protein data available.");
         return drugNames;
       }
+      if (!targetPageMatchTarget(page, hgncTarget)) {
+        return drugNames;
+      }
 
       int organismPosition = page.indexOf("Organism</dt><dd class=\"col-md-10 col-sm-8\">");
       if (organismPosition >= 0) {
@@ -743,6 +735,38 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
     }
   }
 
+  /**
+   * Checks if the content of the page matches our expectations about target.
+   * Sometimes when we search for targets drugbank returns something similar to
+   * the target and we need to filter it out (here is an example for TF targets:
+   * https://www.drugbank.ca/unearth/q?utf8=%E2%9C%93&searcher=targets&query=TF)
+   * 
+   * @param page - target webpage content
+   * @param hgncTarget - HGNC identifier that should match the target
+   * @return
+   * @throws UniprotSearchException - when there is problem with translating HGNC<->uniprot
+   */
+  boolean targetPageMatchTarget(String page, MiriamData hgncTarget) throws UniprotSearchException {
+    int protienLinkPosition = page.indexOf("/polypeptides/");
+    protienLinkPosition = protienLinkPosition + "/polypeptides/".length(); // 20;
+    int j = page.indexOf('"', protienLinkPosition);
+    String uniprotId = page.substring(protienLinkPosition, j);
+    MiriamData uniprotMiriam = new MiriamData(MiriamType.UNIPROT, uniprotId);
+    MiriamData hgncMiriam = uniprotAnnotator.uniProtToHgnc(uniprotMiriam);
+    if (hgncMiriam != null)
+      hgncMiriam.setAnnotator(null);
+    if (hgncMiriam == null || !hgncMiriam.equals(hgncTarget)) {
+      logger.debug(
+          "Invalid target found. Expected " + hgncTarget + ", but found: " + hgncMiriam + " (" + uniprotMiriam + ")");
+      return false;
+    }
+    return true;
+  }
+
+  private boolean isValidTargetPage(String page) {
+    return page.indexOf("/polypeptides/") >= 0;
+  }
+
   /**
    * @return the uniprotAnnotator
    * @see #uniprotAnnotator
diff --git a/annotation/src/test/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParserTest.java b/annotation/src/test/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParserTest.java
index 56d4ff56be..62801c46e8 100644
--- a/annotation/src/test/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParserTest.java
+++ b/annotation/src/test/java/lcsb/mapviewer/annotation/services/DrugbankHTMLParserTest.java
@@ -96,9 +96,9 @@ public class DrugbankHTMLParserTest extends AnnotationTestFunctions {
   public void testFindAfegostat() throws Exception {
     try {
       // finding synonym
-      Drug rapamycinDrug = drugBankHTMLParser.findDrug("Afegostat");
-      assertNotNull(rapamycinDrug);
-      assertFalse(rapamycinDrug.getApproved());
+      Drug afegostatDrug = drugBankHTMLParser.findDrug("Afegostat");
+      assertNotNull(afegostatDrug);
+      assertFalse(afegostatDrug.getApproved());
 
     } catch (Exception e) {
       e.printStackTrace();
@@ -269,13 +269,39 @@ public class DrugbankHTMLParserTest extends AnnotationTestFunctions {
   }
 
   @Test
-  public void testFindDrugByTFTarget() throws Exception {
+  public void testTargetPageMatchTargetForTFProtein() throws Exception {
     try {
-      List<Drug> drugs = drugBankHTMLParser.getDrugListByTarget(new MiriamData(MiriamType.HGNC_SYMBOL, "TF"));
-      assertNotNull(drugs);
-      for (Drug drug : drugs) {
-        assertFalse(drug.getName().equalsIgnoreCase("Iron saccharate"));
-      }
+      MiriamData tfMiriam = new MiriamData(MiriamType.HGNC_SYMBOL, "TF");
+      String content = drugBankHTMLParser.getWebPageDownloader().getFromNetwork("https://www.drugbank.ca/biodb/bio_entities/BE0000510");
+      assertTrue(drugBankHTMLParser.targetPageMatchTarget(content, tfMiriam));
+
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw e;
+    }
+  }
+
+  @Test
+  public void testTargetPageMatchTargetForNotMatchingTFProtein() throws Exception {
+    try {
+      MiriamData tfMiriam = new MiriamData(MiriamType.HGNC_SYMBOL, "TF");
+      String content = drugBankHTMLParser.getWebPageDownloader().getFromNetwork("https://www.drugbank.ca/biodb/bio_entities/BE0002803");
+      assertFalse(drugBankHTMLParser.targetPageMatchTarget(content, tfMiriam));
+
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw e;
+    }
+  }
+
+  @Test
+  public void testIsDrugContainTarget() throws Exception {
+    try {
+      Drug afegostat = drugBankHTMLParser.findDrug("Urokinase");
+      MiriamData plgMiriam = new MiriamData(MiriamType.HGNC_SYMBOL, "PLG");
+      MiriamData tfMiriam = new MiriamData(MiriamType.HGNC_SYMBOL, "TF");
+      assertTrue(drugBankHTMLParser.isDrugContainTarget(plgMiriam, afegostat));
+      assertFalse(drugBankHTMLParser.isDrugContainTarget(tfMiriam, afegostat));
 
     } catch (Exception e) {
       e.printStackTrace();
-- 
GitLab