From 55bae3ad8af2eb067f8b47526692003f8ae151d8 Mon Sep 17 00:00:00 2001
From: Marek Ostaszewski <marek.ostaszewski@uni.lu>
Date: Sun, 7 Jun 2020 20:08:10 +0200
Subject: [PATCH] Alias resolver

---
 Resources/Hipathia/resolve_aliases.R | 91 ++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 Resources/Hipathia/resolve_aliases.R

diff --git a/Resources/Hipathia/resolve_aliases.R b/Resources/Hipathia/resolve_aliases.R
new file mode 100644
index 0000000..1e26160
--- /dev/null
+++ b/Resources/Hipathia/resolve_aliases.R
@@ -0,0 +1,91 @@
+##################################################
+## Project: COVID-19 Disease Map
+## Script purpose: Translate raw CellDesigner SIF to Entrez identifiers using MINERVA 
+## Date: 05.06.2020
+## Author: Marek Ostaszewski
+##################################################
+
+library(httr)
+library(jsonlite)
+
+### A convenience function to handle API queries
+ask_GET <- function(furl, fask) {
+  resp <- httr::GET(url = paste0(furl, fask),
+                    httr::add_headers('Content-Type' = "application/x-www-form-urlencoded"),
+                    ### Currently ignoring SSL!
+                    httr::set_config(config(ssl_verifypeer = 0L)))
+  if(httr::status_code(resp) == 200) {
+    return(httr::content(resp, as = "text"))
+  }
+  return(NULL)
+}
+
+### Define the source file (GitLab, raw link)
+diagram <- "https://git-r3lab.uni.lu/covid/models/-/raw/master/Curation/Apoptosis/Apoptosis_03.06.2020.xml"
+
+### Read in the raw SIF version (here straight from the github of Aurelien)
+raw_sif <- read.table(url("https://raw.githubusercontent.com/aurelien-naldi/preliminary-covid-modeling/master/covid-models/Apoptosis_03.06.2020_raw.sif"),
+                      sep = " ", header = F, stringsAsFactors = F)
+
+### Read the list of resources to be integrated, from the MINERVA build scripts
+res <- read.csv(url("https://git-r3lab.uni.lu/covid/models/raw/master/Integration/MINERVA_build/resources.csv"),
+                header = T, stringsAsFactors = F)
+
+diag_name <- res[res$Resource == diagram, "Name"]
+
+### Get MINERVA elements
+### The address of the COVID-19 Disease Map in MINERVA
+map <- "https://covid19map.elixir-luxembourg.org/minerva/api/"
+### Get configuration of the COVID-19 Disease Map, to obtain the latest (default) version
+cfg <- fromJSON(ask_GET(map, "configuration/"))
+project_id <- cfg$options[cfg$options$type == "DEFAULT_MAP","value"]
+### The address of the latest (default) build 
+mnv_base <- paste0(map,"projects/",project_id,"/")
+
+message(paste0("Asking for diagrams in: ", mnv_base, "models/"))
+
+### Get diagrams
+models <- ask_GET(mnv_base, "models/")
+models <- fromJSON(models, flatten = F)
+
+this_refs <- models[models$name == diag_name]
+
+### Get elements of the chosen diagram
+model_elements <- fromJSON(ask_GET(paste0(mnv_base,"models/",models$idObject[models$name == diag_name],"/"), 
+                                   "bioEntities/elements/?columns=id,name,type,references,elementId,complexId"),
+                           flatten = F)
+
+message("Fetching entrez ids...")
+### Get information about Entrez identifiers from MINERVA elements
+entrez <- sapply(model_elements$references, function(x) ifelse(length(x) == 0, NA, x[x$type == "ENTREZ", "resource"]))
+names(entrez) <- model_elements$elementId
+
+### An utility function to retrieve Entrez based on the species id
+### if the id is a complex, the function goes recursively and fetches the ids of elements in this complex
+group_elements <- function(feid, felements, fentrez) {
+  pos <- which(felements$elementId == feid)
+  ### Any elements that may be nested in the 'feid' (CellDesigner alias)
+  incs <- felements$elementId[felements$complexId %in% felements$id[pos]]
+  if(length(incs) > 0) {
+    ### If nested elements found, run the function recursively for the contained elements
+    return(paste(unlist(sapply(incs, group_elements, felements, fentrez)), collapse = ";"))
+  } else {
+    ### If no nested elements, return Entrez
+    rid <- fentrez[[feid]]
+    if(is.na(rid)) {
+      ### If Entrez not available, return name
+      rid <- felements$name[pos]
+    }
+    return(rid)
+  }
+}
+
+message("Translating...")
+### Create a copy
+translated_sif <- raw_sif
+### Retrieve Entrez for the entire columns of sources and targets
+translated_sif[,1] <- sapply(raw_sif[,1], group_elements, model_elements, entrez)
+translated_sif[,3] <- sapply(raw_sif[,3], group_elements, model_elements, entrez)
+write.table(translated_sif, file = "translated_sif.txt",
+            sep = "\t", quote = F, col.names = F, row.names = F)
+message("Done.")
\ No newline at end of file
-- 
GitLab