From f16dc66d64522eafaeb90c63a0ad86012ff56482 Mon Sep 17 00:00:00 2001
From: Piotr Gawron <piotr.gawron@uni.lu>
Date: Fri, 7 Sep 2018 16:19:37 +0200
Subject: [PATCH] unknown entries are also cached

---
 .../annotation/services/TaxonomyBackend.java  | 370 +++++++++---------
 1 file changed, 188 insertions(+), 182 deletions(-)

diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/TaxonomyBackend.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/TaxonomyBackend.java
index 5ab139112c..e85f29d76d 100644
--- a/annotation/src/main/java/lcsb/mapviewer/annotation/services/TaxonomyBackend.java
+++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/TaxonomyBackend.java
@@ -23,187 +23,193 @@ import lcsb.mapviewer.model.map.MiriamType;
  */
 public class TaxonomyBackend extends CachableInterface implements IExternalService {
 
-	/**
-	 * Constant defining human taxonomy.
-	 */
-	public static final MiriamData HUMAN_TAXONOMY							= new MiriamData(MiriamType.TAXONOMY, "9606");
-
-	/**
-	 * Prefix string used for marking the query in database as query by taxonomy
-	 * term.
-	 */
-	static final String						 TAXONOMY_CACHE_PREFIX			= "TAXONOMY_TERM: ";
-
-	/**
-	 * Prefix string used for marking queries in database as queries returning
-	 * name by taxonomy id.
-	 */
-	static final String						 TAXONOMY_NAME_CACHE_PREFIX	= "TAXONOMY_NAME_BY_ID: ";
-
-	@Override
-	public String refreshCacheQuery(Object query) throws SourceNotAvailable {
-		String result = null;
-		try {
-			if (query instanceof String) {
-				String name = (String) query;
-				if (name.startsWith(TAXONOMY_CACHE_PREFIX)) {
-					String term = name.substring(TAXONOMY_CACHE_PREFIX.length());
-					MiriamData md = getByName(term);
-					if (md != null) {
-						result = md.getResource();
-					}
-				} else if (name.startsWith(TAXONOMY_NAME_CACHE_PREFIX)) {
-					String id = name.substring(TAXONOMY_NAME_CACHE_PREFIX.length());
-					result = getNameForTaxonomy(new MiriamData(MiriamType.TAXONOMY, id));
-				} else if (name.startsWith("http")) {
-					result = getWebPageContent(name);
-				} else {
-					throw new InvalidArgumentException("Don't know what to do with query: " + query);
-				}
-			} else {
-				throw new InvalidArgumentException("Don't know what to do with class: " + query.getClass());
-			}
-		} catch (TaxonomySearchException | IOException e) {
-			throw new SourceNotAvailable(e);
-		}
-		return result;
-	}
-
-	/**
-	 * Returns {@link MiriamData} representing taxonomy identified by name.
-	 * 
-	 * @param term
-	 *          name of the organism
-	 * @return {@link MiriamData} representing taxonomy
-	 * @throws TaxonomySearchException
-	 *           thrown when there is a problem with accessing taxonomy database
-	 */
-	public MiriamData getByName(String term) throws TaxonomySearchException {
-		if (term == null || "".equals(term.trim())) {
-			return null;
-		}
-		String res = getCacheValue(TAXONOMY_CACHE_PREFIX + term);
-		if (res != null) {
-			return new MiriamData(MiriamType.TAXONOMY, res);
-		}
-		try {
-
-			String queryTerm = term.trim();
-			if (queryTerm.indexOf("(") >= 0) {
-				queryTerm = queryTerm.substring(0, queryTerm.indexOf("("));
-			}
-			if (queryTerm.toUpperCase().startsWith("C.")) {
-				queryTerm = "Caenorhabditis" + queryTerm.substring(2);
-			} else if (queryTerm.toUpperCase().startsWith("D.")) {
-				queryTerm = "Drosophila" + queryTerm.substring(2);
-			} else if (queryTerm.toUpperCase().startsWith("P.")) {
-				queryTerm = "Pristionchus" + queryTerm.substring(2);
-			} else if (queryTerm.toUpperCase().startsWith("T.")) {
-				queryTerm = "Tribolium" + queryTerm.substring(2);
-			} else if (queryTerm.equalsIgnoreCase("Mouse")) {
-				queryTerm = "Mus musculus";
-			} else if (queryTerm.equalsIgnoreCase("Rat")) {
-				queryTerm = "Rattus norvegicus";
-			}
-			queryTerm = URLEncoder.encode(queryTerm, "UTF-8");
-			String url = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?lvl=0&name=" + queryTerm;
-			String page = getWebPageContent(url);
-			Pattern idPattern = Pattern.compile("Taxonomy ID: ([0-9]+)");
-			Matcher matcher = idPattern.matcher(page);
-			if (!matcher.find()) {
-				logger.warn("Unknown organism: " + term);
-				return null;
-			} else {
-				String name = matcher.group(1);
-				setCacheValue(TAXONOMY_CACHE_PREFIX + term, name);
-				return new MiriamData(MiriamType.TAXONOMY, name);
-			}
-		} catch (IOException e) {
-			throw new TaxonomySearchException("Problem with accessing taxonomy database", e);
-		}
-
-	}
-
-	/**
-	 * Default class logger.
-	 */
-	private Logger logger = Logger.getLogger(TaxonomyBackend.class);
-
-	/**
-	 * Default constructor.
-	 */
-	public TaxonomyBackend() {
-		super(TaxonomyBackend.class);
-	}
-
-	@Override
-	public ExternalServiceStatus getServiceStatus() {
-		ExternalServiceStatus status = new ExternalServiceStatus(MiriamType.TAXONOMY.getCommonName(), MiriamType.TAXONOMY.getDbHomepage());
-
-		GeneralCacheInterface cacheCopy = getCache();
-		this.setCache(null);
-
-		try {
-			MiriamData md = getByName("human");
-			if (md == null) {
-				status.setStatus(ExternalServiceStatusType.CHANGED);
-			} else if (md.equals(TaxonomyBackend.HUMAN_TAXONOMY)) {
-				status.setStatus(ExternalServiceStatusType.OK);
-			} else {
-				status.setStatus(ExternalServiceStatusType.CHANGED);
-			}
-		} catch (Exception e) {
-			logger.error(MiriamType.TAXONOMY.getCommonName() + " is down", e);
-			status.setStatus(ExternalServiceStatusType.DOWN);
-		}
-		this.setCache(cacheCopy);
-		return status;
-	}
-
-	/**
-	 * Returns name for given taxonomy id.
-	 * 
-	 * @param miriamData
-	 *          object representing taxonomy id
-	 * @return name for given taxonomy id
-	 * @throws TaxonomySearchException
-	 *           thrown when there is a problem with accessing external database
-	 */
-	public String getNameForTaxonomy(MiriamData miriamData) throws TaxonomySearchException {
-		if (miriamData == null) {
-			return null;
-		}
-		String res = getCacheValue(TAXONOMY_NAME_CACHE_PREFIX + miriamData.getResource());
-		if (res != null) {
-			return res;
-		}
-
-		try {
-			String url = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" + miriamData.getResource();
-			String page = getWebPageContent(url);
-			Pattern idPattern = Pattern.compile("<title>Taxonomy browser \\(([A-Za-z0-9\\ \\.]+)\\)</title>");
-			Matcher matcher = idPattern.matcher(page);
-			if (!matcher.find()) {
-				logger.warn("Unknown organism: " + miriamData);
-				return null;
-			} else {
-				String name = matcher.group(1);
-				setCacheValue(TAXONOMY_NAME_CACHE_PREFIX + miriamData.getResource(), name);
-				return name;
-			}
-		} catch (IOException e) {
-			throw new TaxonomySearchException("Problem with accessing taxonomy database", e);
-		}
-	}
-
-	@Override
-	protected WebPageDownloader getWebPageDownloader() {
-		return super.getWebPageDownloader();
-	}
-
-	@Override
-	protected void setWebPageDownloader(WebPageDownloader webPageDownloader) {
-		super.setWebPageDownloader(webPageDownloader);
-	}
+  /**
+   * Constant defining human taxonomy.
+   */
+  public static final MiriamData HUMAN_TAXONOMY = new MiriamData(MiriamType.TAXONOMY, "9606");
+
+  /**
+   * Prefix string used for marking the query in database as query by taxonomy
+   * term.
+   */
+  static final String TAXONOMY_CACHE_PREFIX = "TAXONOMY_TERM: ";
+
+  /**
+   * Prefix string used for marking queries in database as queries returning name
+   * by taxonomy id.
+   */
+  static final String TAXONOMY_NAME_CACHE_PREFIX = "TAXONOMY_NAME_BY_ID: ";
+
+  @Override
+  public String refreshCacheQuery(Object query) throws SourceNotAvailable {
+    String result = null;
+    try {
+      if (query instanceof String) {
+        String name = (String) query;
+        if (name.startsWith(TAXONOMY_CACHE_PREFIX)) {
+          String term = name.substring(TAXONOMY_CACHE_PREFIX.length());
+          MiriamData md = getByName(term);
+          if (md != null) {
+            result = md.getResource();
+          }
+        } else if (name.startsWith(TAXONOMY_NAME_CACHE_PREFIX)) {
+          String id = name.substring(TAXONOMY_NAME_CACHE_PREFIX.length());
+          result = getNameForTaxonomy(new MiriamData(MiriamType.TAXONOMY, id));
+        } else if (name.startsWith("http")) {
+          result = getWebPageContent(name);
+        } else {
+          throw new InvalidArgumentException("Don't know what to do with query: " + query);
+        }
+      } else {
+        throw new InvalidArgumentException("Don't know what to do with class: " + query.getClass());
+      }
+    } catch (TaxonomySearchException | IOException e) {
+      throw new SourceNotAvailable(e);
+    }
+    return result;
+  }
+
+  /**
+   * Returns {@link MiriamData} representing taxonomy identified by name.
+   * 
+   * @param term
+   *          name of the organism
+   * @return {@link MiriamData} representing taxonomy
+   * @throws TaxonomySearchException
+   *           thrown when there is a problem with accessing taxonomy database
+   */
+  public MiriamData getByName(String term) throws TaxonomySearchException {
+    if (term == null || "".equals(term.trim())) {
+      return null;
+    }
+    String res = getCacheValue(TAXONOMY_CACHE_PREFIX + term);
+    if (res != null) {
+      if (res.equals("null")) {
+        return null;
+      } else {
+        return new MiriamData(MiriamType.TAXONOMY, res);
+      }
+    }
+    try {
+
+      String queryTerm = term.trim();
+      if (queryTerm.indexOf("(") >= 0) {
+        queryTerm = queryTerm.substring(0, queryTerm.indexOf("("));
+      }
+      if (queryTerm.toUpperCase().startsWith("C.")) {
+        queryTerm = "Caenorhabditis" + queryTerm.substring(2);
+      } else if (queryTerm.toUpperCase().startsWith("D.")) {
+        queryTerm = "Drosophila" + queryTerm.substring(2);
+      } else if (queryTerm.toUpperCase().startsWith("P.")) {
+        queryTerm = "Pristionchus" + queryTerm.substring(2);
+      } else if (queryTerm.toUpperCase().startsWith("T.")) {
+        queryTerm = "Tribolium" + queryTerm.substring(2);
+      } else if (queryTerm.equalsIgnoreCase("Mouse")) {
+        queryTerm = "Mus musculus";
+      } else if (queryTerm.equalsIgnoreCase("Rat")) {
+        queryTerm = "Rattus norvegicus";
+      }
+      queryTerm = URLEncoder.encode(queryTerm, "UTF-8");
+      String url = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?lvl=0&name=" + queryTerm;
+      String page = getWebPageContent(url);
+      Pattern idPattern = Pattern.compile("Taxonomy ID: ([0-9]+)");
+      Matcher matcher = idPattern.matcher(page);
+      if (!matcher.find()) {
+        logger.warn("Unknown organism: " + term);
+        setCacheValue(TAXONOMY_CACHE_PREFIX + term, "null");
+        return null;
+      } else {
+        String name = matcher.group(1);
+        setCacheValue(TAXONOMY_CACHE_PREFIX + term, name);
+        return new MiriamData(MiriamType.TAXONOMY, name);
+      }
+    } catch (IOException e) {
+      throw new TaxonomySearchException("Problem with accessing taxonomy database", e);
+    }
+
+  }
+
+  /**
+   * Default class logger.
+   */
+  private Logger logger = Logger.getLogger(TaxonomyBackend.class);
+
+  /**
+   * Default constructor.
+   */
+  public TaxonomyBackend() {
+    super(TaxonomyBackend.class);
+  }
+
+  @Override
+  public ExternalServiceStatus getServiceStatus() {
+    ExternalServiceStatus status = new ExternalServiceStatus(MiriamType.TAXONOMY.getCommonName(),
+        MiriamType.TAXONOMY.getDbHomepage());
+
+    GeneralCacheInterface cacheCopy = getCache();
+    this.setCache(null);
+
+    try {
+      MiriamData md = getByName("human");
+      if (md == null) {
+        status.setStatus(ExternalServiceStatusType.CHANGED);
+      } else if (md.equals(TaxonomyBackend.HUMAN_TAXONOMY)) {
+        status.setStatus(ExternalServiceStatusType.OK);
+      } else {
+        status.setStatus(ExternalServiceStatusType.CHANGED);
+      }
+    } catch (Exception e) {
+      logger.error(MiriamType.TAXONOMY.getCommonName() + " is down", e);
+      status.setStatus(ExternalServiceStatusType.DOWN);
+    }
+    this.setCache(cacheCopy);
+    return status;
+  }
+
+  /**
+   * Returns name for given taxonomy id.
+   * 
+   * @param miriamData
+   *          object representing taxonomy id
+   * @return name for given taxonomy id
+   * @throws TaxonomySearchException
+   *           thrown when there is a problem with accessing external database
+   */
+  public String getNameForTaxonomy(MiriamData miriamData) throws TaxonomySearchException {
+    if (miriamData == null) {
+      return null;
+    }
+    String res = getCacheValue(TAXONOMY_NAME_CACHE_PREFIX + miriamData.getResource());
+    if (res != null) {
+      return res;
+    }
+
+    try {
+      String url = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" + miriamData.getResource();
+      String page = getWebPageContent(url);
+      Pattern idPattern = Pattern.compile("<title>Taxonomy browser \\(([A-Za-z0-9\\ \\.]+)\\)</title>");
+      Matcher matcher = idPattern.matcher(page);
+      if (!matcher.find()) {
+        logger.warn("Unknown organism: " + miriamData);
+        return null;
+      } else {
+        String name = matcher.group(1);
+        setCacheValue(TAXONOMY_NAME_CACHE_PREFIX + miriamData.getResource(), name);
+        return name;
+      }
+    } catch (IOException e) {
+      throw new TaxonomySearchException("Problem with accessing taxonomy database", e);
+    }
+  }
+
+  @Override
+  protected WebPageDownloader getWebPageDownloader() {
+    return super.getWebPageDownloader();
+  }
+
+  @Override
+  protected void setWebPageDownloader(WebPageDownloader webPageDownloader) {
+    super.setWebPageDownloader(webPageDownloader);
+  }
 
 }
-- 
GitLab