Skip to content
Snippets Groups Projects

working test with pdmap submaps application to hipathia

Closed Devrim Gunyel requested to merge devrim_update into master
2 files
+ 230
2
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 225
0
##################################################
## Project: Disease Maps
## Script purpose: Translate the PD map pathways to HiPathia att/sif files
## Date: 27.11.2021
## Authors: Marek Ostaszewski, Devrim Gunyel
##################################################
# NOTE: Change Run casq part
# TODO:
### List of pathways for the main map(4895)
### Iterate the pathways and get the information of bounds: x, y, height, width
### Get the submap covering this rectangle: ....CellDesignerXmlParser&polygonString=x1,y1;x2,y1;x2,y2;x1,y2
options(stringsAsFactors = F)
### Helper functions to access and retrieve content from MINERVA-hosted diagrams
source("https://git-r3lab.uni.lu/covid/models/-/raw/master/Integration/MINERVA_access/minerva_access_functions.R")
library(dplyr) ### For some dplyr magic
### Download the CellDesigner file, run casq, return the raw SIF and tidy up
get_casq_sif <- function(map_address, project_id, this_model_id, x1, x2, y1, y2 ) {
call <- paste0(map_address,"projects/", project_id, "/models/", this_model_id,
":downloadModel?handlerClass=lcsb.mapviewer.converter.model.celldesigner.CellDesignerXmlParser&polygonString=",
x1, ",", y1, ";",
x2, ",", y2, ";",
x2, ",", y2, ";",
x1, ",", y1)
message("get_casq_sif - Asking for a CellDesigner content: ", call)
path <- ask_GET(call)
# To deal with models not downloaded in zip format
if (path != "./model.xml") { # if not zip
modelFile <- file("model.xml")
writeLines(path, modelFile)
close(modelFile)
}
### Change the default name of the downloaded file to the model_id
system(paste0("mv model.xml ", this_model_id, ".xml"))
### Run casq
message("get_casq_sif - call casq")
# system(paste0("casq -s ", this_model_id, ".xml"))
system(paste0("/Users/devrim.gunyel/anaconda3/bin/casq -s ", this_model_id, ".xml"))
### Tidy up
message("get_casq_sif - tidy up")
system(paste0("rm ", this_model_id, ".sif"))
system(paste0("mv ", this_model_id, "_raw.sif ", this_model_id, ".sif"))
### Return the raw sif
read.table(paste0(this_model_id, ".sif"))
}
### A function transforming element types to their HiPathia equivalents
get_role <- function(type, hypothetical) {
case_when(type == "Complex" & hypothetical == TRUE ~ "group",
type == "Complex" & hypothetical == FALSE ~ "complex",
type %in% c("Simple molecule", "Drug", "Ion") ~ "compound",
type %in% c("Gene", "RNA", "Protein") ~ "gene",
type %in% c("Phenotype") ~ "function",
TRUE ~ "other")
}
### An utility function to retrieve Entrez based on the species id
### if the id is a complex, the function goes recursively and fetches the ids of elements in this complex
group_elements <- function(feid, felements, fentrez) {
pos <- which(felements$elementId == feid)
### Any elements that may be nested in the 'feid' (CellDesigner alias)
incs <- felements$elementId[felements$complexId %in% felements$id[pos]]
if(length(incs) > 0) {
### If nested elements found, prune NAs and run the function recursively for the contained elements
collect <- unique(unlist(sapply(incs, group_elements, felements, fentrez)))
collect <- collect[!is.na(collect)]
if(length(collect) > 1) { return(paste(collect, collapse = ";")) }
else if (length(collect) == 1) { return(collect) }
return(NA)
} else {
### If no nested elements, return Entrez
rid <- fentrez[[feid]]
if(length(rid) == 0) { rid <- NA } ### If Entrez not available, return NA
return(rid)
}
}
get_hipathia_files <- function(raw_sif, node_prefix, model_elements) {
### Get information about Entrez identifiers from MINERVA elements
message("get_hipathia_files - fetching entrez ids...")
all_entrez <- sapply(model_elements$references,
function(x) ifelse(length(x) == 0, NA, x[x$type == "ENTREZ", "resource"]))
### Get all HiPathia types
all_types <- model_elements %>% mutate(role = get_role(type, hypothetical)) %>% pull(role, name = elementId)
names(all_entrez) <- model_elements$elementId
message("get_hipathia_files - creating att file...")
### Node attribute table
##### ID / label / X / Y / color / shape / type / label.cex / label.color / width / height / genesLis
att_nodes <- sort(unique(c(raw_sif[,1],raw_sif[,3])))
node_ID <- paste0("N-",node_prefix,"-", att_nodes)
### Retrieve Entrez and type for the entire list of unique nodes
entrez <- sapply(att_nodes, group_elements, model_elements, all_entrez)
type <- all_types[att_nodes]
### Collect x.y information
xy <- t(sapply(att_nodes, function(x) unlist(model_elements$bounds[model_elements$elementId == x, c(3,4)])))
colnames(xy) <- c("X", "Y")
### Combine into a single data frame
att_nodes_table <- data.frame(ID = node_ID, label = att_nodes,
xy,
color = "white", shape = "rectangle",
type = type,
label.cex = "0.5", label.color = "black",
width = "46", height = "17",
genesList = entrez,
tooltip = att_nodes)
### Postprocess the att file
### nodes that are "complex" have '/' as a connector
### nodes that are "group" have ',' as a connector
### both groups and nodes are to become "gene" afterwards
att_nodes_table <- att_nodes_table %>%
mutate(genesList = case_when(type == "complex" ~ gsub(";", "/", genesList),
type == "group" ~ gsub(";", ",", genesList),
TRUE ~ genesList))
att_nodes_table <- att_nodes_table %>%
mutate(type = case_when(type %in% c("complex", "group") ~ "gene",
TRUE ~ type))
message("get_hipathia_files - creating sif file...")
raw_sif[,1] <- paste0("N-",node_prefix,"-", raw_sif[,1])
raw_sif[,3] <- paste0("N-",node_prefix,"-", raw_sif[,3])
raw_sif[raw_sif[,2] == "POSITIVE",2] <- "activation"
raw_sif[raw_sif[,2] == "NEGATIVE",2] <- "inhibition"
### WARINING: For the moment I remove this step, much easier to process by type
### in the downstream graph reduction processes
# ### Hipathia defines functional nodes by appending "_func" to their ids
# ### This type-based change should happen simultaneously in the att_nodes and in the sif file
# ### in the att_nodes_table, nodes that are "function" get "_func" to their name and become "other"
# ### in the sif file, same change happens to the respecive ids
# for(f in which(att_nodes_table$type == "function")) {
# raw_sif[raw_sif == att_nodes_table$ID[f]] <- paste0(att_nodes_table$ID[f], "_func")
# att_nodes_table$ID[f] <- paste0(att_nodes_table$ID[f], "_func")
# att_nodes_table$type[f] <- "other"
# }
### Assert completeness
message("get_hipathia_files - all network nodes have attributes: ",
all(unique(c(raw_sif[,1],raw_sif[,3])) %in% att_nodes_table$ID))
message(setdiff(unique(c(raw_sif[,1],raw_sif[,3])), att_nodes_table$ID))
message("get_hipathia_files - all nodes participate in the network: ",
all(att_nodes_table$ID %in% unique(c(raw_sif[,1],raw_sif[,3]))))
message(setdiff(unique(c(raw_sif[,1],raw_sif[,3])), att_nodes_table$ID))
### Write to file
write.table(att_nodes_table, file = paste0(node_prefix, ".att"),
sep = "\t", quote = F, col.names = T, row.names = F)
message("get_hipathia_files - attribute table write: ", paste0(node_prefix, ".att"))
write.table(raw_sif, file = paste0(node_prefix, ".sif"),
sep = "\t", quote = F, col.names = T, row.names = F)
message("get_hipathia_files - sif file write: ", paste0(node_prefix, ".sif"))
}
### The address of the COVID-19 Disease Map in MINERVA
map <- "https://pdmap.uni.lu/minerva/api/"
components <- get_map_components(map)
default <- get_default_project(map)
### Main run
for(mid in components$models$idObject) {
if(mid != 4895){
message("Not main map...")
} else{
# bounds parameters
message("bounds ", components$map_elements$bounds[[which(components$models$idObject == mid)]])
### Case: Dopaminergic Transcription id: 523480
# x <- 6303.666666666664
# y <- 3140.750000000001
# width <- 808
# height <- 915
### Case: PI3K/AKT signaling id: 523484
x <- 11677.666666666662
y <- 724.2500000000007
width <- 1852
height <- 1860.0000000000002
# coordination for the pathway rectangle
x1 <- as.integer(x)
x2 <- as.integer(x + width)
y1 <- as.integer(y)
y2 <- as.integer(y + height)
message(paste0("Processing ", mid, "..."))
message(paste0("Creating casq sif file for ", mid, "..."))
raw_sif <- get_casq_sif(map, default, mid, x1, x2, y1, y2)
message(paste0("Creating HiPathia sif ..."))
get_hipathia_files(raw_sif = raw_sif,
node_prefix = mid,
model_elements = components$map_elements[[which(components$models$idObject == mid)]])
message("model_elements ", components$map_elements[[which(components$models$idObject == mid)]])
}
}
# Piotr - For pathways
# First you need a list all the pathways that you want to obtain, for example this query will give you such list:
# https://pdmap.uni.lu/minerva/api/projects/pd_map_autumn_19/models/4895/bioEntities/elements/?type=Pathway
# When you have this information now you can try to download the submap that will be limited to the area of the pathway. For example if you see that your pathway is located graphically using following coordinates:
# "height":965.0,"width":979.0,"x":14882.666666666664,"y":664.7500000000007
# You can download submap covering this rectangle with following query:
# https://pdmap.uni.lu/minerva/api/projects/pd_map_autumn_19/models/4895:downloadModel?handlerClass=lcsb.mapviewer.converter.model.celldesigner.CellDesignerXmlParser&polygonString=14882,664;15861,664;15861,1629;14882,1629
Loading