From f16dc66d64522eafaeb90c63a0ad86012ff56482 Mon Sep 17 00:00:00 2001 From: Piotr Gawron <piotr.gawron@uni.lu> Date: Fri, 7 Sep 2018 16:19:37 +0200 Subject: [PATCH] unknown entries are also cached --- .../annotation/services/TaxonomyBackend.java | 370 +++++++++--------- 1 file changed, 188 insertions(+), 182 deletions(-) diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/TaxonomyBackend.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/TaxonomyBackend.java index 5ab139112c..e85f29d76d 100644 --- a/annotation/src/main/java/lcsb/mapviewer/annotation/services/TaxonomyBackend.java +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/TaxonomyBackend.java @@ -23,187 +23,193 @@ import lcsb.mapviewer.model.map.MiriamType; */ public class TaxonomyBackend extends CachableInterface implements IExternalService { - /** - * Constant defining human taxonomy. - */ - public static final MiriamData HUMAN_TAXONOMY = new MiriamData(MiriamType.TAXONOMY, "9606"); - - /** - * Prefix string used for marking the query in database as query by taxonomy - * term. - */ - static final String TAXONOMY_CACHE_PREFIX = "TAXONOMY_TERM: "; - - /** - * Prefix string used for marking queries in database as queries returning - * name by taxonomy id. - */ - static final String TAXONOMY_NAME_CACHE_PREFIX = "TAXONOMY_NAME_BY_ID: "; - - @Override - public String refreshCacheQuery(Object query) throws SourceNotAvailable { - String result = null; - try { - if (query instanceof String) { - String name = (String) query; - if (name.startsWith(TAXONOMY_CACHE_PREFIX)) { - String term = name.substring(TAXONOMY_CACHE_PREFIX.length()); - MiriamData md = getByName(term); - if (md != null) { - result = md.getResource(); - } - } else if (name.startsWith(TAXONOMY_NAME_CACHE_PREFIX)) { - String id = name.substring(TAXONOMY_NAME_CACHE_PREFIX.length()); - result = getNameForTaxonomy(new MiriamData(MiriamType.TAXONOMY, id)); - } else if (name.startsWith("http")) { - result = getWebPageContent(name); - } else { - throw new InvalidArgumentException("Don't know what to do with query: " + query); - } - } else { - throw new InvalidArgumentException("Don't know what to do with class: " + query.getClass()); - } - } catch (TaxonomySearchException | IOException e) { - throw new SourceNotAvailable(e); - } - return result; - } - - /** - * Returns {@link MiriamData} representing taxonomy identified by name. - * - * @param term - * name of the organism - * @return {@link MiriamData} representing taxonomy - * @throws TaxonomySearchException - * thrown when there is a problem with accessing taxonomy database - */ - public MiriamData getByName(String term) throws TaxonomySearchException { - if (term == null || "".equals(term.trim())) { - return null; - } - String res = getCacheValue(TAXONOMY_CACHE_PREFIX + term); - if (res != null) { - return new MiriamData(MiriamType.TAXONOMY, res); - } - try { - - String queryTerm = term.trim(); - if (queryTerm.indexOf("(") >= 0) { - queryTerm = queryTerm.substring(0, queryTerm.indexOf("(")); - } - if (queryTerm.toUpperCase().startsWith("C.")) { - queryTerm = "Caenorhabditis" + queryTerm.substring(2); - } else if (queryTerm.toUpperCase().startsWith("D.")) { - queryTerm = "Drosophila" + queryTerm.substring(2); - } else if (queryTerm.toUpperCase().startsWith("P.")) { - queryTerm = "Pristionchus" + queryTerm.substring(2); - } else if (queryTerm.toUpperCase().startsWith("T.")) { - queryTerm = "Tribolium" + queryTerm.substring(2); - } else if (queryTerm.equalsIgnoreCase("Mouse")) { - queryTerm = "Mus musculus"; - } else if (queryTerm.equalsIgnoreCase("Rat")) { - queryTerm = "Rattus norvegicus"; - } - queryTerm = URLEncoder.encode(queryTerm, "UTF-8"); - String url = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?lvl=0&name=" + queryTerm; - String page = getWebPageContent(url); - Pattern idPattern = Pattern.compile("Taxonomy ID: ([0-9]+)"); - Matcher matcher = idPattern.matcher(page); - if (!matcher.find()) { - logger.warn("Unknown organism: " + term); - return null; - } else { - String name = matcher.group(1); - setCacheValue(TAXONOMY_CACHE_PREFIX + term, name); - return new MiriamData(MiriamType.TAXONOMY, name); - } - } catch (IOException e) { - throw new TaxonomySearchException("Problem with accessing taxonomy database", e); - } - - } - - /** - * Default class logger. - */ - private Logger logger = Logger.getLogger(TaxonomyBackend.class); - - /** - * Default constructor. - */ - public TaxonomyBackend() { - super(TaxonomyBackend.class); - } - - @Override - public ExternalServiceStatus getServiceStatus() { - ExternalServiceStatus status = new ExternalServiceStatus(MiriamType.TAXONOMY.getCommonName(), MiriamType.TAXONOMY.getDbHomepage()); - - GeneralCacheInterface cacheCopy = getCache(); - this.setCache(null); - - try { - MiriamData md = getByName("human"); - if (md == null) { - status.setStatus(ExternalServiceStatusType.CHANGED); - } else if (md.equals(TaxonomyBackend.HUMAN_TAXONOMY)) { - status.setStatus(ExternalServiceStatusType.OK); - } else { - status.setStatus(ExternalServiceStatusType.CHANGED); - } - } catch (Exception e) { - logger.error(MiriamType.TAXONOMY.getCommonName() + " is down", e); - status.setStatus(ExternalServiceStatusType.DOWN); - } - this.setCache(cacheCopy); - return status; - } - - /** - * Returns name for given taxonomy id. - * - * @param miriamData - * object representing taxonomy id - * @return name for given taxonomy id - * @throws TaxonomySearchException - * thrown when there is a problem with accessing external database - */ - public String getNameForTaxonomy(MiriamData miriamData) throws TaxonomySearchException { - if (miriamData == null) { - return null; - } - String res = getCacheValue(TAXONOMY_NAME_CACHE_PREFIX + miriamData.getResource()); - if (res != null) { - return res; - } - - try { - String url = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" + miriamData.getResource(); - String page = getWebPageContent(url); - Pattern idPattern = Pattern.compile("<title>Taxonomy browser \\(([A-Za-z0-9\\ \\.]+)\\)</title>"); - Matcher matcher = idPattern.matcher(page); - if (!matcher.find()) { - logger.warn("Unknown organism: " + miriamData); - return null; - } else { - String name = matcher.group(1); - setCacheValue(TAXONOMY_NAME_CACHE_PREFIX + miriamData.getResource(), name); - return name; - } - } catch (IOException e) { - throw new TaxonomySearchException("Problem with accessing taxonomy database", e); - } - } - - @Override - protected WebPageDownloader getWebPageDownloader() { - return super.getWebPageDownloader(); - } - - @Override - protected void setWebPageDownloader(WebPageDownloader webPageDownloader) { - super.setWebPageDownloader(webPageDownloader); - } + /** + * Constant defining human taxonomy. + */ + public static final MiriamData HUMAN_TAXONOMY = new MiriamData(MiriamType.TAXONOMY, "9606"); + + /** + * Prefix string used for marking the query in database as query by taxonomy + * term. + */ + static final String TAXONOMY_CACHE_PREFIX = "TAXONOMY_TERM: "; + + /** + * Prefix string used for marking queries in database as queries returning name + * by taxonomy id. + */ + static final String TAXONOMY_NAME_CACHE_PREFIX = "TAXONOMY_NAME_BY_ID: "; + + @Override + public String refreshCacheQuery(Object query) throws SourceNotAvailable { + String result = null; + try { + if (query instanceof String) { + String name = (String) query; + if (name.startsWith(TAXONOMY_CACHE_PREFIX)) { + String term = name.substring(TAXONOMY_CACHE_PREFIX.length()); + MiriamData md = getByName(term); + if (md != null) { + result = md.getResource(); + } + } else if (name.startsWith(TAXONOMY_NAME_CACHE_PREFIX)) { + String id = name.substring(TAXONOMY_NAME_CACHE_PREFIX.length()); + result = getNameForTaxonomy(new MiriamData(MiriamType.TAXONOMY, id)); + } else if (name.startsWith("http")) { + result = getWebPageContent(name); + } else { + throw new InvalidArgumentException("Don't know what to do with query: " + query); + } + } else { + throw new InvalidArgumentException("Don't know what to do with class: " + query.getClass()); + } + } catch (TaxonomySearchException | IOException e) { + throw new SourceNotAvailable(e); + } + return result; + } + + /** + * Returns {@link MiriamData} representing taxonomy identified by name. + * + * @param term + * name of the organism + * @return {@link MiriamData} representing taxonomy + * @throws TaxonomySearchException + * thrown when there is a problem with accessing taxonomy database + */ + public MiriamData getByName(String term) throws TaxonomySearchException { + if (term == null || "".equals(term.trim())) { + return null; + } + String res = getCacheValue(TAXONOMY_CACHE_PREFIX + term); + if (res != null) { + if (res.equals("null")) { + return null; + } else { + return new MiriamData(MiriamType.TAXONOMY, res); + } + } + try { + + String queryTerm = term.trim(); + if (queryTerm.indexOf("(") >= 0) { + queryTerm = queryTerm.substring(0, queryTerm.indexOf("(")); + } + if (queryTerm.toUpperCase().startsWith("C.")) { + queryTerm = "Caenorhabditis" + queryTerm.substring(2); + } else if (queryTerm.toUpperCase().startsWith("D.")) { + queryTerm = "Drosophila" + queryTerm.substring(2); + } else if (queryTerm.toUpperCase().startsWith("P.")) { + queryTerm = "Pristionchus" + queryTerm.substring(2); + } else if (queryTerm.toUpperCase().startsWith("T.")) { + queryTerm = "Tribolium" + queryTerm.substring(2); + } else if (queryTerm.equalsIgnoreCase("Mouse")) { + queryTerm = "Mus musculus"; + } else if (queryTerm.equalsIgnoreCase("Rat")) { + queryTerm = "Rattus norvegicus"; + } + queryTerm = URLEncoder.encode(queryTerm, "UTF-8"); + String url = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?lvl=0&name=" + queryTerm; + String page = getWebPageContent(url); + Pattern idPattern = Pattern.compile("Taxonomy ID: ([0-9]+)"); + Matcher matcher = idPattern.matcher(page); + if (!matcher.find()) { + logger.warn("Unknown organism: " + term); + setCacheValue(TAXONOMY_CACHE_PREFIX + term, "null"); + return null; + } else { + String name = matcher.group(1); + setCacheValue(TAXONOMY_CACHE_PREFIX + term, name); + return new MiriamData(MiriamType.TAXONOMY, name); + } + } catch (IOException e) { + throw new TaxonomySearchException("Problem with accessing taxonomy database", e); + } + + } + + /** + * Default class logger. + */ + private Logger logger = Logger.getLogger(TaxonomyBackend.class); + + /** + * Default constructor. + */ + public TaxonomyBackend() { + super(TaxonomyBackend.class); + } + + @Override + public ExternalServiceStatus getServiceStatus() { + ExternalServiceStatus status = new ExternalServiceStatus(MiriamType.TAXONOMY.getCommonName(), + MiriamType.TAXONOMY.getDbHomepage()); + + GeneralCacheInterface cacheCopy = getCache(); + this.setCache(null); + + try { + MiriamData md = getByName("human"); + if (md == null) { + status.setStatus(ExternalServiceStatusType.CHANGED); + } else if (md.equals(TaxonomyBackend.HUMAN_TAXONOMY)) { + status.setStatus(ExternalServiceStatusType.OK); + } else { + status.setStatus(ExternalServiceStatusType.CHANGED); + } + } catch (Exception e) { + logger.error(MiriamType.TAXONOMY.getCommonName() + " is down", e); + status.setStatus(ExternalServiceStatusType.DOWN); + } + this.setCache(cacheCopy); + return status; + } + + /** + * Returns name for given taxonomy id. + * + * @param miriamData + * object representing taxonomy id + * @return name for given taxonomy id + * @throws TaxonomySearchException + * thrown when there is a problem with accessing external database + */ + public String getNameForTaxonomy(MiriamData miriamData) throws TaxonomySearchException { + if (miriamData == null) { + return null; + } + String res = getCacheValue(TAXONOMY_NAME_CACHE_PREFIX + miriamData.getResource()); + if (res != null) { + return res; + } + + try { + String url = "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" + miriamData.getResource(); + String page = getWebPageContent(url); + Pattern idPattern = Pattern.compile("<title>Taxonomy browser \\(([A-Za-z0-9\\ \\.]+)\\)</title>"); + Matcher matcher = idPattern.matcher(page); + if (!matcher.find()) { + logger.warn("Unknown organism: " + miriamData); + return null; + } else { + String name = matcher.group(1); + setCacheValue(TAXONOMY_NAME_CACHE_PREFIX + miriamData.getResource(), name); + return name; + } + } catch (IOException e) { + throw new TaxonomySearchException("Problem with accessing taxonomy database", e); + } + } + + @Override + protected WebPageDownloader getWebPageDownloader() { + return super.getWebPageDownloader(); + } + + @Override + protected void setWebPageDownloader(WebPageDownloader webPageDownloader) { + super.setWebPageDownloader(webPageDownloader); + } } -- GitLab