Commit 5eb13f2b authored by Dimitrios Kyriakis's avatar Dimitrios Kyriakis
Browse files

snakemake_docker_pipeline

parent dffe2ba0
# This file contains everything to configure the workflow on a global scale.
# The sample based data must be complemented by a samples.tsv file that contains
# one row per sample. It can be parsed easily via pandas.
samples: "config/samples.tsv"
params:
Preprocess:
tool: "seurat"
project: "IPSCs_pink1"
dataset: "IPSCs_pink1"
organism: "human"
imputation: FALSE
remove_mt: FALSE
remove_ribsomal: FALSE
n_cores: 1
elbow: TRUE
SCT: TRUE
criteria_pass: 3
min.cells: 10
min.features: 200
data_10x: FALSE
sample Condition date batch file
Control_IPSCs Control IPSCs D DADD5_S2_DGE.txt
Control_D06 Control D06 A DADA4_S4_DGE.txt
Control_D10 Control D10 A DADA3_S3_DGE.txt
Control_D15 Control D15 A DADA2_S2_DGE.txt
Control_D21 Control D21 A DADA1_S1_DGE.txt
PINK1_IPSCs PINK1 IPSCs D DADD6_S3_DGE.txt
PINK1_D06 PINK1 D06 A DADA8_S4_DGE.txt
PINK1_D15 PINK1 D15 A DADA6_S2_DGE.txt
PINK1_D21 PINK1 D21 A DADA5_S1_DGE.txt
\ No newline at end of file
File added
from snakemake.utils import min_version
from snakemake.utils import validate
import pandas as pd
import os
from smart_open import open
import yaml
min_version('6.4.0')
# The main entry point of workflow.
# After configuring, running snakemake -n in a clone of this repository should successfully execute a dry-run of the workflow.
configfile: 'config/config.yaml'
container: 'docker://kyriakds/ipscs:version4'
#singularity: 'docker://kyriakds/ipscs_pink1:firstimage'
samples = pd.read_csv(config['samples'], sep='\t', dtype = str).set_index('sample', drop=False)
samples.index.names = ['sample']
rule all:
input:
#expand('result/Preprocess/{sample}/{sample}_{condition}_Cells_Removed.rds',zip,sample=samples['sample'],date=samples['date'],condition=samples['Condition']),
#'result/Mapping/Merged_seurat.rds',
'result/Pairwise/Pink1_venn_diagramm.pdf',
'result/Paper_Figures/Figure4.pdf',
'result/DEG/DF_Group_B_Conserved_all.txt',
'result/DEG/DF_Group_C_Conserved_all.txt',
'result/DEG/DF_Group_D.txt',
'Correlation_Networks.rds',
'result/Network/Num_Nodes_Interactions.pdf'
include: 'rules/preprocess.smk'
include: 'rules/mapping.smk'
include: 'rules/deg_pairwise.smk'
include: 'rules/deg_group_bc.smk'
include: 'rules/deg_group_d.smk'
include: 'rules/paper_figures.smk'
include: 'rules/correlation_network.smk'
include: 'rules/network_validation.smk'
# rule report:
# module load tools/Singularity
# snakemake --use-singularity --singularity-args '-B /scratch/users/dkyriakis:/scratch/users/dkyriakis' -j 1
# snakemake --use-singularity -j 2
rule correlation_network:
input:
file = 'result/Mapping/Merged_seurat.rds',
csv1 = 'DATA/Networks/NODES_and_pathways_11.6.20_9.csv',
csv2 = 'DATA/Networks/RUN_12_EDGES_ST_GM_REMERGE_used_11_6_20.csv',
csv3 = 'DATA/Networks/NODES_May_29_Manual_Mito_Ubiq.csv',
csv4 = 'DATA/Networks/EDGES_part1_manual_May_29.csv'
output:
'Correlation_Networks.rds'
shell:
'Rscript workflow/scripts/4.Correlation_Network.R {input.file} {input.csv1} {input.csv2} {input.csv3} {input.csv4} {output}'
rule DEG_Group_BC:
input:
file = 'result/Mapping/Merged_seurat.rds'
params:
threads = 4
output:
groupB = "result/DEG/DF_Group_B_Conserved_all.txt",
groupC = "result/DEG/DF_Group_C_Conserved_all.txt"
shell:
"Rscript workflow/scripts/2.2.DEG_Group_BC.R {input.file} {params.threads} {output.groupB} {output.groupC}"
rule DEG_Group_D:
input:
file = 'result/Mapping/Merged_seurat.rds'
params:
threads = 4
output:
groupD = "result/DEG/DF_Group_D.txt"
shell:
"Rscript workflow/scripts/2.3.DEG_Group_D.R {input.file} {params.threads} {output.groupD}"
rule DEG_Pairwise:
input:
file = 'result/Mapping/Merged_seurat.rds'
params:
threads = 12
output:
txt = 'result/Pairwise/Pink1_venn_diagramm.pdf'
shell:
'Rscript workflow/scripts/2.1.DEG_Pairwise.R {input.file} {params.threads} {output.txt}'
def get_rds(wildcards):
files_list=expand('result/Preprocess/{sample}/{sample}_{condition}_Cells_Removed.rds',zip,sample=samples['sample'],condition=samples['Condition'])
return files_list
rule Mapping:
input:
file = get_rds,
params:
threads = 6
output:
txt = 'result/Mapping/Merged_seurat.rds'
shell:
'Rscript workflow/scripts/1.Mapping.R {input.file} {params.threads} {output.txt}'
# txt=expand("result/{sample}_Cells_Removed.tsv",sample=samples["sample"])
rule Network_Validation:
input:
file = 'result/Mapping/Merged_seurat.rds',
#groupA = "result/DEG/DF_Group_A_Conserved_all.txt",
groupB = "result/DEG/DF_Group_B_Conserved_all.txt",
groupC = "result/DEG/DF_Group_C_Conserved_all.txt",
groupD = "result/DEG/DF_Group_D.txt"
params:
threads = 4
output:
"result/Network/Num_Nodes_Interactions.pdf"
shell:
"Rscript workflow/scripts/4.Network_Validation.R {input.file} {input.groupB} {input.groupC} {input.groupD} {output}"
rule Paper_Figures:
input:
file = 'result/Mapping/Merged_seurat.rds'
params:
threads = 2
output:
txt = 'result/Paper_Figures/Figure4.pdf'
shell:
'Rscript workflow/scripts/3.Paper_Plots.R {input.file} {params.threads} {output.txt}'
# this container defines the underlying OS for each job when using the workflow
# with --use-conda --use-singularity
# container: "docker://continuumio/miniconda3"
##### load config and sample sheets #####
def get_dem(wildcards):
# no trimming, use raw reads
return samples.loc[(wildcards.sample), ["file"]]
# file=expand('DATA/{file}',file=FILES)
rule Preprocess:
input:
file=get_dem
params:
sample='{sample}'
output:
txt = 'result/Preprocess/{sample}/{sample}_{condition}_Cells_Removed.rds'
shell:
'Rscript workflow/scripts/0.Preprocessing.R {input.file} {params.sample} {output.txt}'
# txt=expand("result/{sample}_Cells_Removed.tsv",sample=samples["sample"])
# ================= LIBRARIES ===================
options(future.globals.maxSize= 2122317824)
library(sctransform)
library(Seurat)
library(RColorBrewer)
library(tictoc)
library(crayon)
library(stringr)
library(Routliers)
library(jcolors)
library(cluster)
library(NMF)
library(ggplot2)
library(ggpubr)
library(cowplot)
library(viridis)
set.seed(123)
# ================ READ ARGS =================
args = commandArgs(trailingOnly=TRUE)
print(args)
source("workflow/scripts/Functions.R")
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else {
print("Arguments Passed")
}
# ---------------------------------------------
file <- args[1]
sample <- args[2]
condition_name <- strsplit(args[2],"_")[[1]][1]
date <- strsplit(args[2],"_")[[1]][2]
output_file <- args[3]
print(condition_name)
# condition_name <- args[2]
# sample <- args[3]
# date <- args[4]
imputation = FALSE
remove_mt=FALSE
remove_ribsomal=FALSE
remove_rb = TRUE
n_cores=1
elbow = TRUE
SCT=TRUE
criteria_pass=3
min.cells <- 10
min.features <- 200
data_10x <- FALSE
vars.to.regress=c("nCount_RNA")
outlier_detector = "MAD"
# # ------------------------------------------------
colormap_d<- c('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928',"black","gray")
color_cond <- c(brewer.pal(8,"Dark2"),"black","gray","magenta4","seagreen4")[c(5,1,2,3,4,9,6,7,8)]
color_cond <- c(brewer.pal(5,"Dark2"),"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6])[c(5,1,2,3,4,9,6,7,8)]
color_clust <- c(brewer.pal(12,"Paired")[-11],"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6],brewer.pal(8,"Dark2"))
color_cells <- c(brewer.pal(9,"Set1")[-6],"goldenrod4","darkblue","seagreen4")
color_list <- list(condition=color_cond,Cluster=color_clust,Cell_Type=color_cells,State=color_clust)
object <- load_files(file = file, data_10x = data_10x)
if (elbow == TRUE) {
Before_col_names <- colnames(object)
object <- elbow_calc(object, sample, iter_qc)
After_col_names_Elbow <- colnames(object)
Rm_Cell_N_Elbow <- Before_col_names[!Before_col_names %in% After_col_names_Elbow]
} else {
After_col_names_Elbow <- colnames(object)
Rm_Cell_N_Elbow <- c()
}
Outlier_object <- Advanced_Outlier_detection(object,
condition = sample,
criteria_pass = criteria_pass)
#saveRDS(Outlier_object,file=output_file)
object <- Outlier_object$object_filtered
oultliers_index <- Outlier_object$oultliers_index
After_col_names_MAD <- colnames(object)
Rm_Cell_N_MAD <- After_col_names_Elbow[!After_col_names_Elbow %in%
After_col_names_MAD]
Rm_Cell_N_Elbow <- c(Rm_Cell_N_Elbow, Rm_Cell_N_MAD)
before_genes <- rownames(object)
metrics_output <- metrics_calc(object = object, remove_mt = remove_mt,
remove_rb = remove_rb)
object <- metrics_output$object
percent.mito <- metrics_output$percent.mito
percent.rb <- metrics_output$percent.rb
Seurat <- CreateSeuratObject(counts = object, project = sample,
min.cells = min.cells, min.features = min.features,
meta.data = data.frame(percent.mito = percent.mito,
percent.rb = percent.rb,
condition=condition_name,
date=date))
Seurat$stim <- sample
After_col_names_SEURAT <- colnames(Seurat)
Rm_Cell_N_SEURAT <- After_col_names_MAD[!After_col_names_MAD %in%
After_col_names_SEURAT]
Rm_Cell_N_Elbow <- c(Rm_Cell_N_Elbow, Rm_Cell_N_SEURAT)
write.table(Rm_Cell_N_Elbow, paste0("result/Preprocess/",sample,"/Cells_Removed.tsv"), sep = "\t")
after_genes <- rownames(Seurat@assays$RNA@counts)
rm_genes <- before_genes[!before_genes %in% after_genes]
write.table(rm_genes, paste0("result/Preprocess/",sample,"/Genes_Removed.tsv"), sep = "\t")
Seurat <- SCTransform(object = Seurat, verbose = FALSE, vars.to.regress = vars.to.regress)
saveRDS(Seurat,file=output_file)
args = commandArgs(trailingOnly=TRUE)
print(args)
# ================= LIBRARIES ===================
options(future.globals.maxSize= 2122317824)
library(sctransform)
library(Seurat)
library(RColorBrewer)
library(tictoc)
library(crayon)
library(stringr)
library(Routliers)
library(jcolors)
library(cluster)
library(NMF)
library(ggplot2)
library(ggpubr)
library(cowplot)
library(viridis)
set.seed(123)
project ="IPSCs_pink1"
colormap_d<- c('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928',"black","gray")
color_cond <- c(brewer.pal(8,"Dark2"),"black","gray","magenta4","seagreen4")[c(5,1,2,3,4,9,6,7,8)]
color_cond <- c(brewer.pal(5,"Dark2"),"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6])[c(5,1,2,3,4,9,6,7,8)]
color_clust <- c(brewer.pal(12,"Paired")[-11],"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6],brewer.pal(8,"Dark2"))
color_cells <- c(brewer.pal(9,"Set1")[-6],"goldenrod4","darkblue","seagreen4")
color_list <- list(condition=color_cond,Cluster=color_clust,Cell_Type=color_cells,State=color_clust)
source("workflow/scripts/Functions.R")
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else {
print("Arguments Passed")
}
input <- args[1:9]
threads <- as.numeric(args[10])
output <- args[11]
print(input)
print(output)
tm_list <- mclapply(input,function(x){
temp_s <- readRDS(x)
}, mc.cores = threads)
# select features that are repeatedly variable across datasets for integration
int.features <- SelectIntegrationFeatures(object.list = tm_list, nfeatures = 3000)
tm_list <- PrepSCTIntegration(object.list = tm_list,
anchor.features = int.features, verbose = FALSE)
map.anchors <- FindIntegrationAnchors(object.list = tm_list,
normalization.method = "SCT", anchor.features = int.features,
verbose = FALSE)
# this command creates an 'integrated' data assay
Combined <- IntegrateData(anchorset = map.anchors,
normalization.method = "SCT", verbose = FALSE)
DefaultAssay(object = Combined) <- "integrated"
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
Combined <- CellCycleScoring(Combined, s.features = s.genes,g2m.features = g2m.genes)
DefaultAssay(Combined) <- 'RNA'
pink.list <-SplitObject(Combined,split.by = "condition")
pink.list <- mclapply(pink.list,function(temp_ss){
temp_ss <- SCTransform(temp_ss, verbose = FALSE,vars.to.regress=c("G2M.Score","S.Score"))
}, mc.cores = threads)
# doi: https://doi.org/10.1101/576827
int.features <- SelectIntegrationFeatures(object.list = pink.list, nfeatures = 3000)
pink.list <- PrepSCTIntegration(object.list = pink.list, anchor.features = int.features,
verbose = FALSE)
int.anchors <- FindIntegrationAnchors(object.list = pink.list, normalization.method = "SCT",
anchor.features = int.features, verbose = FALSE)
Seurat <- IntegrateData(anchorset = int.anchors, normalization.method = "SCT",
verbose = FALSE)
DefaultAssay(object = Seurat) <- "integrated"
# all.genes <- rownames(Seurat)
# Seurat <- ScaleData(Seurat, features = all.genes)
print("Projection")
set.seed(123)
library(RcppAnnoy)
Seurat <- RunPCA(object= Seurat, verbose =FALSE)
pdf("result/Mapping/ElbowPlot.pdf")
ElbowPlot(Seurat)
dev.off()
num_dim <- ICSWrapper::calc_num_pc(object = Seurat, 0.95)
Seurat <- RunUMAP(object = Seurat, reduction = "pca", dims = 1:num_dim)
Seurat <- RunTSNE(object = Seurat, reduction = "pca", dims = 1:num_dim)
Seurat <- FindNeighbors(object = Seurat, reduction = "pca", dims = 1:num_dim)
optimal_output <- optimal_clusters(Seurat, k.max = 10, save = TRUE, resolution = FALSE)
Seurat <- optimal_output$object
opt_num <- optimal_output$opt_num
sil_scor <- optimal_output$sil_scor
Seurat$Treatment <- Seurat$condition
Seurat$condition <- Seurat$orig.ident
print("Plotting")
pdf(paste("result/Mapping/",Sys.Date(),"_umap.pdf",sep=""))
DimPlot(object = Seurat, reduction = "umap", group.by = "Treatment")
DimPlot(object = Seurat, reduction = "umap", group.by = "condition")
DimPlot(object = Seurat, reduction = "tsne", group.by = "condition")
DimPlot(object = Seurat, reduction = "umap", group.by = "date")
DimPlot(object = Seurat, reduction = "umap")
dev.off()
DefaultAssay(object = Seurat) <- "RNA"
all.genes <- rownames(Seurat)
Seurat <- ScaleData(Seurat, features = all.genes)
print("SAVE RDS")
saveRDS(Seurat,output)
# Seurat$sample <- factor(as.factor(Seurat$sample),
# levels = c("Control_IPSCs", "Control_D06" ,"Control_D10", "Control_D15", "Control_D21",
# "PINK1_IPSCs","PINK1_D06", "PINK1_D15", "PINK1_D21"))
# pdf(paste("result/Mapping/",Sys.Date(),"_tsne_projection.pdf",sep=""))
# ICSWrapper::plot_cells(Seurat,target="sample",leg_pos="right",save=FALSE,ncol=1,color_list = color_list)
# ICSWrapper::plot_cells(Seurat,target="Cluster",leg_pos="right",save=FALSE,ncol=1,color_list = color_list)
# dev.off()
# ICSWrapper::plot_nFeatures(Seurat,title="",save=TRUE,tiff=FALSE,reduce="t-SNE",p3D=FALSE)
# ICSWrapper::plot_tot_mRNA(Seurat,title="",save=TRUE,tiff=FALSE,reduce="t-SNE",p3D=FALSE)
# p3 <- DimPlot(object = Seurat, reduction = "umap", group.by = "sample",cols = color_cond)
# p4 <- DimPlot(object = Seurat, reduction = "umap", label = TRUE,cols = color_clust)
# pdf(paste("result/Mapping/",Sys.Date(),project,"umap","Seuratw.pdf",sep="_"))
# print(p3)
# print(p4)
# dev.off()
dir.create(Sys.getenv("R_LIBS_USER"), recursive = TRUE) # create personal library
.libPaths(Sys.getenv("R_LIBS_USER")) # add to the path
## ----setup, include=FALSE--------------------------------------------------------------------
set.seed(123)
# library(reticulate)
options(future.globals.maxSize= 2122317824)
library(sctransform)
library(Seurat)
library(RColorBrewer)
library(tictoc)
library(crayon)
library(stringr)
library(Routliers)
library(jcolors)
library(cluster)
library(NMF)
library(ggplot2)
library(ggpubr)
library(cowplot)
## ----Venn------------------------------------------------------------------------------------
list.of.packages <- c("VennDiagram", "EnhancedVolcano")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) BiocManager::install(new.packages)
library(VennDiagram)
library(EnhancedVolcano)
colormap_d<- c('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928',"black","gray")
color_cond <- c(brewer.pal(5,"Dark2"),"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6])[c(5,1,2,3,4,9,6,7,8)]
color_clust <- c(brewer.pal(12,"Paired")[-11],"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6],brewer.pal(8,"Dark2"))
color_cells <- c(brewer.pal(9,"Set1")[-6],"goldenrod4","darkblue","seagreen4")
color_list <- list(condition=color_cond,Cluster=color_clust,Cell_Type=color_cells,State=color_clust)
# ================ READ ARGS =================
args = commandArgs(trailingOnly=TRUE)
print(args)
source("workflow/scripts/Functions.R")
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else {
print("Arguments Passed")
}
input_file <- args[1]
threads <- as.numeric(args[2])
output_group_A <- args[3]
# ---------------------------------------------
Combined <- readRDS(input_file)
# =============================== PAIRWISE DF ===============================================
Combined$condition <- as.factor(Combined$condition)
Idents(Combined) <- as.factor(Combined$condition)
cl_combinations <- combn(levels(Combined$condition),2)
print(cl_combinations)
cl_combinations <- cl_combinations[,c(5,19,25,30)]
DefaultAssay(Combined) <- "RNA"
Combined <- NormalizeData(Combined)
Combined <- ScaleData(Combined,rownames(Combined@assays$RNA@counts))
Combined$Timepoints <- factor(unlist(lapply(as.vector(Combined$condition),function(x){strsplit(x,"_")[[1]][2]})),levels=c("IPSCs","D06","D10","D15","D21"))
setwd("result/Pairwise/")
library(parallel)
pairwise_df <- function (comb,object,cl_combinations){
DefaultAssay(object) <- "RNA"
# for(comb in 1:dim(cl_combinations)[2]){
title <- paste(cl_combinations[,comb],collapse = "_")
dir.create(paste0(title))
setwd(paste0(title))
target <- "condition"
idents <- as.vector(cl_combinations[,comb])
ident.1 <- idents[1]
print(ident.1)
ident.2 <- idents[2]
print(ident.1)
print(ident.2)
pbmc.markers <- FindMarkers(object = object,
ident.1 = ident.1,
ident.2 =ident.2,
assay ="RNA",min.pct =0.1,
logfc.threshold=0.0,
only.pos = FALSE,
test.use = "MAST",latent.vars = c("nCount_RNA"))
pbmc.markers$gene <- rownames(pbmc.markers)
top <- pbmc.markers[pbmc.markers$p_val_adj<0.05,]
to_fc <- top[order(abs(top$avg_logFC),decreasing = TRUE),]
to_fc_gene <- rownames(to_fc)[1:50]
#top10 <- top %>% top_n(n = 50, wt = abs(avg_logFC))
#top10_genes<- rownames(top10)
temp <- object[,object$condition%in%c(ident.1,ident.2)]
temp$condition <- as.factor(as.vector(temp$condition))
pbmc.markers$avg_logFC2 <- pbmc.markers$avg_logFC/log(2)
min_vlc <- min(pbmc.markers$avg_logFC2)
max_vlc <- max(pbmc.markers$avg_logFC2)
# debugonce(annotated_heat)
pdf(paste0(title,"_Volcano.pdf"))
plot(EnhancedVolcano(pbmc.markers,
lab = pbmc.markers$gene,
x = 'avg_logFC2',
y = 'p_val_adj',subtitle = paste(ident.1,"vs",ident.2,"(FCcutoff=0.6)"),
xlim = c(min_vlc-0.5,max_vlc +0.5),FCcutoff = 0.6))
dev.off()
ICSWrapper::annotated_heat(object=temp,
row_annotation=c(1),
gene_list=to_fc_gene,
Rowv=TRUE,
gene_list_name="DF_genes",
title=title,
ordering="condition",One_annot = TRUE)
DefaultAssay(temp) <- "integrated"
write.table(pbmc.markers, file = paste0(Sys.Date(),"_TO_EXP_each_",target,"_",title,".tsv"),row.names=FALSE, na="", sep="\t")
setwd("../")
}
Idents(Combined) <- Combined$condition
mclapply(c(1:dim(cl_combinations)[2]),FUN=pairwise_df,object=Combined,cl_combinations=cl_combinations,mc.cores=threads)
print(getwd())
# ----------------------------------------------------------------------------------------------
dirs_pairs <- list.dirs(full.names = TRUE )[-1]
dirs_pairs <- grep('D06.*D06|D15.*D15|D21.*D21',dirs_pairs,value = TRUE)
df_return_nt_cntrl <- list()
df_return_nt_pink <- list()
df_return_nt_all <- list()
for (iter in 1:length(dirs_pairs)){
dirs_iter <- dirs_pairs[iter]
file <- paste0(dirs_iter ,"/", dir(dirs_iter, "*.tsv"))
print(file)
l1 <- read.table(file,header=TRUE)
l1$cluster <- l1$avg_logFC
l1$cluster[ l1$avg_logFC<0] <- "PINK"
l1$cluster[ l1$avg_logFC>0] <- "Control"
ctrl_l1 <- l1[grep("Control",l1$cluster),]
pink_l1 <- l1[grep("PINK",l1$cluster),]
all_l1 <- l1
df_return_nt_cntrl[[iter]] <- as.vector(ctrl_l1[ctrl_l1$p_val_adj<0.01 & abs(ctrl_l1$avg_logFC) >0.4,"gene"])
df_return_nt_pink[[iter]] <- as.vector(pink_l1[pink_l1$p_val_adj<0.01 & abs(pink_l1$avg_logFC) >0.4,"gene"])
print(length(df_return_nt_cntrl[[iter]]))
print(length(df_return_nt_pink[[iter]]))
df_return_nt_all[[iter]] <- c(df_return_nt_cntrl[[iter]] ,df_return_nt_pink[[iter]])
}
# # ============= Intersect Common Genes
cntrl_intesect <- Reduce(intersect, df_return_nt_cntrl)
print(cntrl_intesect)
pink_intesect <- Reduce(intersect, df_return_nt_pink)
print(pink_intesect)
# df_return_nt_cntrl[[1]]
# df_return_nt_pink[[1]]
all_intesect <- Reduce(intersect, df_return_nt_all)
print(all_intesect)
# pdf("All_venn_diagramm.pdf")
# myCol <- brewer.pal(3, "Pastel2")
# draw.triple.venn(area1 = 117, area2 = 110, area3 = 114, n12 = 23, n23 = 18, n13 = 10,
# n123 = 7, category = c("Day06", "Day15", "Day21"),
# fill = myCol)
# dev.off()
# =========== Venn Diagrams DF genes
myCol <- brewer.pal(3, "Pastel2")
pdf("Control_venn_diagramm.pdf")
day06 <- df_return_nt_cntrl[[1]]
day15 <- df_return_nt_cntrl[[2]]
day21 <- df_return_nt_cntrl[[3]]
# Generate plot
v <- venn.diagram(list(Day06=day06, Day15=day15,Day21=day21),
fill = myCol,
alpha = c(0.5, 0.5, 0.5), cat.cex = 1.5, cex=1.5,
filename=NULL)
# have a look at the default plot
grid.newpage()
grid.draw(v)
# have a look at the names in the plot object v
lapply(v, names)
# We are interested in the labels
lapply(v, function(i) i$label)
v[[11]]$label <- paste(intersect(intersect(day06, day15),day21), collapse="\n")
# plot
grid.newpage()
grid.draw(v)
dev.off()
pdf("Pink1_venn_diagramm.pdf")
day06 <- df_return_nt_pink[[1]]
day15 <- df_return_nt_pink[[2]]
day21 <- df_return_nt_pink[[3]]
# Generate plot
v <- venn.diagram(list(Day06=day06, Day15=day15,Day21=day21),
fill = myCol,
alpha = c(0.5, 0.5, 0.5), cat.cex = 1.5, cex=1.5,
filename=NULL)
# have a look at the default plot
grid.newpage()
grid.draw(v)
# have a look at the names in the plot object v
lapply(v, names)
# We are interested in the labels
lapply(v, function(i) i$label)
v[[11]]$label <- paste(intersect(intersect(day06, day15),day21), collapse="\n")
# plot
grid.newpage()
grid.draw(v)
dev.off()
# # ====================================== COMMONS ===================================
# dirs_pairs <- list.dirs(full.names = TRUE )[-1]
# dirs_pairs <- grep('IPSCs|D06.*D06|D15.*D15|D21.*D21',dirs_pairs,value = TRUE)
# df_return_nt_cntrl <- list()
# df_return_nt_pink <- list()
# df_return_nt_all <- list()
# Timepoint <- c("D06","D15","D21","IPSCs")
# for (iter in 1:length(dirs_pairs)){
# dirs_iter <- dirs_pairs[iter]
# timp <- Timepoint[iter]
# file <- paste0(dirs_iter ,"/", dir(dirs_iter, "*.tsv"))
# print(file)
# l1 <- read.table(file,header=TRUE)
# l1$cluster <- l1$avg_logFC
# l1$cluster[ l1$avg_logFC2<0] <- "PINK"
# l1$cluster[ l1$avg_logFC2>0] <- "Control"
# l1$p.adj_BY <-p.adjust(l1$p_val,method="BY")
# l1$p.adj_bonferroni <-p.adjust(l1$p_val,method="bonferroni")
# l1$p.adj_BH <- p.adjust(l1$p_val,method="BH")
# ctrl_l1 <- l1[grep("Control",l1$cluster),]
# pink_l1 <- l1[grep("PINK",l1$cluster),]
# all_l1 <- l1
# df_return_nt_cntrl[[iter]] <- as.vector(ctrl_l1[ctrl_l1$p_val_adj<0.05 & abs(ctrl_l1$avg_logFC2) >1,"gene"])
# df_return_nt_pink[[iter]] <- as.vector(pink_l1[pink_l1$p_val_adj<0.05 & abs(pink_l1$avg_logFC2) >1,"gene"])
# print(length(df_return_nt_cntrl[[iter]]))
# print(length(df_return_nt_pink[[iter]]))
# df_return_nt_all[[iter]] <- c(df_return_nt_cntrl[[iter]] ,df_return_nt_pink[[iter]])
# pdf(paste0(timp,"_Volcanos.pdf"))
# l1$avg_logFC2 <- -l1$avg_logFC2
# l1$avg_logFC <- -l1$avg_logFC
# min_vlc <- min(l1$avg_logFC2)
# max_vlc <- max(l1$avg_logFC2)
# plot(EnhancedVolcano(l1,
# lab = l1$gene,
# x = 'avg_logFC2',
# y = 'p_val_adj',subtitle = "FC2 (FCcutoff=1)",
# xlim = c(min_vlc-1,max_vlc +1),FCcutoff = 1)+ggtitle(timp))
# min_vlc <- min(l1$avg_logFC)
# max_vlc <- max(l1$avg_logFC)
# plot(EnhancedVolcano(l1,
# lab = l1$gene,
# x = 'avg_logFC',
# y = 'p_val_adj',subtitle = "FC (FCcutoff=1)",
# xlim = c(min_vlc-1,max_vlc +1),FCcutoff = 1)+xlab("Neutral Log fold change"))
# min_vlc <- min(l1$avg_logFC2)
# max_vlc <- max(l1$avg_logFC2)
# plot(EnhancedVolcano(l1,
# lab = l1$gene,
# x = 'avg_logFC2',
# y = 'p.adj_BY',subtitle = "BY FC2 (FCcutoff=1)",
# xlim = c(min_vlc-1,max_vlc +1),FCcutoff = 1)+xlab("Neutral Log fold change")+ggtitle(timp))
# dev.off()
# }
# library(UpSetR )
# glmer_res <- readRDS("../glmer_pval_mat_10percent.rds")
# glmer_res$max_FC2 <- -glmer_res$max_FC/log(2)
# glmer_res$FC2 <- -glmer_res$FC/log(2)
# glmer_res$p_val_adj <- p.adjust(glmer_res$pvalues_l,method="bonferroni",n=dim(Combined@assays$RNA@counts)[1])
# pdf("GLMER_Volcanos.pdf")
# min_vlc <- min(glmer_res$max_FC2)
# max_vlc <- max(glmer_res$max_FC2)
# plot(EnhancedVolcano(glmer_res,
# lab = rownames(glmer_res),
# x = 'max_FC2',
# y = 'p_val_adj',subtitle = "FC2 (FCcutoff=1)",
# xlim = c(min_vlc-1,max_vlc +1),FCcutoff = 1)+xlab("Log2 fold change")+ggtitle("GLMM"))
# dev.off()
# glmm_genes <- rownames(glmer_res[glmer_res$p_val_adj<0.05 & abs(glmer_res$max_FC2)> 1,])
# df_return_nt_all[[5]] <- glmm_genes
# names(df_return_nt_all) <- c("D06","D15","D21","IPSCs","GLMM")
# pdf("Common.pdf",width=12)
# UpSetR::upset(fromList(df_return_nt_all),text.scale =1.9)
# dev.off()
# unlist(df_return_nt_cntrl)
# # =========================================== ENRICHMENT ANALYSIS ==================================
# r_d00 <- read.table("C:/Users/dimitrios.kyriakis/Desktop/PINK1/Pairwise/Control_IPSCs_PINK1_IPSCs/2021-04-01_TO_EXP_each_condition_Control_IPSCs_PINK1_IPSCs.tsv",header=T)
# r_d06 <- read.table("C:/Users/dimitrios.kyriakis/Desktop/PINK1/Pairwise/Control_D06_PINK1_D06/2021-04-01_TO_EXP_each_condition_Control_D06_PINK1_D06.tsv",header=T)
# r_d15 <- read.table("C:/Users/dimitrios.kyriakis/Desktop/PINK1/Pairwise/Control_D15_PINK1_D15/2021-04-01_TO_EXP_each_condition_Control_D15_PINK1_D15.tsv",header=T)
# r_d21 <- read.table("C:/Users/dimitrios.kyriakis/Desktop/PINK1/Pairwise/Control_D21_PINK1_D21/2021-04-01_TO_EXP_each_condition_Control_D21_PINK1_D21.tsv",header=T)
# r_d00_f <- r_d00[r_d00$p_val_adj<0.05 & abs(r_d00$avg_logFC2) > 0.5,]
# r_d00_f_c <- as.vector(r_d00_f$gene[r_d00_f$avg_logFC2>0])
# r_d00_f_p <- as.vector(r_d00_f$gene[r_d00_f$avg_logFC2<0])
# r_d06_f <- r_d06[r_d06$p_val_adj<0.05 & abs(r_d06$avg_logFC2) > 0.5,]
# r_d06_f_c <- as.vector(r_d06_f$gene[r_d06_f$avg_logFC2>0])
# r_d06_f_p <- as.vector(r_d06_f$gene[r_d06_f$avg_logFC2<0])
# r_d15_f <- r_d15[r_d15$p_val_adj<0.05 & abs(r_d15$avg_logFC2) > 0.5,]
# r_d15_f_c <- as.vector(r_d15_f$gene[r_d15_f$avg_logFC2>0])
# r_d15_f_p <- as.vector(r_d15_f$gene[r_d15_f$avg_logFC2<0])
# r_d21_f <- r_d21[r_d21$p_val_adj<0.05 & abs(r_d21$avg_logFC2) > 0.5,]
# r_d21_f_c <- as.vector(r_d21_f$gene[r_d21_f$avg_logFC2>0])
# r_d21_f_p <- as.vector(r_d21_f$gene[r_d21_f$avg_logFC2<0])
# control_m<- unique(c(r_d00_f_c,r_d06_f_c,r_d15_f_c,r_d21_f_c))
# length(r_d00_f_c)
# length(r_d06_f_c)
# length(r_d15_f_c)
# length(r_d21_f_c)
# length(control_m)
# pink_m <- unique(c(r_d00_f_p,r_d06_f_p,r_d15_f_p,r_d21_f_p))
# length(r_d00_f_p)
# length(r_d06_f_p)
# length(r_d15_f_p)
# length(r_d21_f_p)
# length(pink_m)
# library(clusterProfiler)
# library(enrichplot)
# library(ReactomePA)
# library(org.Hs.eg.db)
# control_m_e <- bitr(control_m, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# pink_m_e <- bitr(pink_m, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# n_c <- length(control_m_e$ENTREZID)
# n_p <- length(pink_m_e$ENTREZID)
# c_d <- data.frame(ENTREZID = control_m_e$ENTREZID , Condition ="Control")
# p_d <- data.frame(ENTREZID = pink_m_e$ENTREZID , Condition ="PINK1")
# all_d <- rbind(c_d,p_d)
# mydf <- as.data.frame(all_d)
# mydf$ENTREZID <- as.vector(mydf$ENTREZID)
# mydf$ENTREZID <-as.numeric(mydf$ENTREZID )
# GOclusterplot <- 0
# Keggclusterplot<-0
# Reactomeclusterplot<-0
# GOclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichGO", OrgDb = "org.Hs.eg.db")
# Keggclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichKEGG")
# Reactomeclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichPathway")
# pdf("GO_Enrichment_DF_Clusters_log2FC_0.5.pdf",width=22,height=11)
# dotplot(GOclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Keggclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Reactomeclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dev.off()
# control_m_e <- bitr(r_d06_f_c, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# pink_m_e <- bitr(r_d06_f_p, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# n_c <- length(control_m_e$ENTREZID)
# n_p <- length(pink_m_e$ENTREZID)
# c_d <- data.frame(ENTREZID = control_m_e$ENTREZID , Condition ="Control")
# p_d <- data.frame(ENTREZID = pink_m_e$ENTREZID , Condition ="PINK1")
# all_d <- rbind(c_d,p_d)
# mydf <- as.data.frame(all_d)
# mydf$ENTREZID <- as.vector(mydf$ENTREZID)
# mydf$ENTREZID <-as.numeric(mydf$ENTREZID )
# GOclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichGO", OrgDb = "org.Hs.eg.db")
# Keggclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichKEGG")
# Reactomeclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichPathway")
# pdf("D06_GO_Enrichment_DF_Clusters.pdf",width=22,height=11)
# dotplot(GOclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Keggclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Reactomeclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dev.off()
# control_m_e <- bitr(r_d15_f_c, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# pink_m_e <- bitr(r_d15_f_p, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# n_c <- length(control_m_e$ENTREZID)
# n_p <- length(pink_m_e$ENTREZID)
# c_d <- data.frame(ENTREZID = control_m_e$ENTREZID , Condition ="Control")
# p_d <- data.frame(ENTREZID = pink_m_e$ENTREZID , Condition ="PINK1")
# all_d <- rbind(c_d,p_d)
# mydf <- as.data.frame(all_d)
# mydf$ENTREZID <- as.vector(mydf$ENTREZID)
# mydf$ENTREZID <-as.numeric(mydf$ENTREZID )
# GOclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichGO", OrgDb = "org.Hs.eg.db")
# Keggclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichKEGG")
# Reactomeclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichPathway")
# pdf("D15_GO_Enrichment_DF_Clusters.pdf",width=22,height=11)
# dotplot(GOclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Keggclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Reactomeclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dev.off()
# control_m_e <- bitr(r_d21_f_c, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# pink_m_e <- bitr(r_d21_f_p, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# n_c <- length(control_m_e$ENTREZID)
# n_p <- length(pink_m_e$ENTREZID)
# c_d <- data.frame(ENTREZID = control_m_e$ENTREZID , Condition ="Control")
# p_d <- data.frame(ENTREZID = pink_m_e$ENTREZID , Condition ="PINK1")
# all_d <- rbind(c_d,p_d)
# mydf <- as.data.frame(all_d)
# mydf$ENTREZID <- as.vector(mydf$ENTREZID)
# mydf$ENTREZID <-as.numeric(mydf$ENTREZID )
# GOclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichGO", OrgDb = "org.Hs.eg.db")
# Keggclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichKEGG")
# Reactomeclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichPathway")
# pdf("D21_GO_Enrichment_DF_Clusters.pdf",width=22,height=11)
# dotplot(GOclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Keggclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Reactomeclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dev.off()
## ----setup, include=FALSE--------------------------------------------------------------------
set.seed(123)
# library(reticulate)
options(future.globals.maxSize= 2122317824)
library(sctransform)
library(Seurat)
library(RColorBrewer)
library(tictoc)
library(crayon)
library(stringr)
library(Routliers)
library(jcolors)
library(cluster)
library(NMF)
library(ggplot2)
library(ggpubr)
library(cowplot)
library(future)
# library(clusterProfiler)
# library(enrichplot)
# library(ReactomePA)
# library(org.Hs.eg.db)
list.of.packages <- c("multtest", "EnhancedVolcano")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) BiocManager::install(new.packages)
colormap_d<- c('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928',"black","gray")
color_cond <- c(brewer.pal(5,"Dark2"),"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6])[c(5,1,2,3,4,9,6,7,8)]
color_clust <- c(brewer.pal(12,"Paired")[-11],"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6],brewer.pal(8,"Dark2"))
color_cells <- c(brewer.pal(9,"Set1")[-6],"goldenrod4","darkblue","seagreen4")
color_list <- list(condition=color_cond,Cluster=color_clust,Cell_Type=color_cells,State=color_clust)
# ================ READ ARGS =================
args = commandArgs(trailingOnly=TRUE)
print(args)
source("workflow/scripts/Functions.R")
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else {
print("Arguments Passed")
}
input_file <- args[1]
threads <- as.numeric(args[2])
output_group_B <- args[3]
output_group_C <- args[4]
# ---------------------------------------------
Combined <- readRDS(input_file)
Conserved_M <- subset(Combined,subset= condition !="Control_D10")
DefaultAssay(Conserved_M) <- "RNA"
Conserved_M <-NormalizeData(Conserved_M)
all.genes <- rownames(Conserved_M)
Conserved_M <- ScaleData(Conserved_M, features = all.genes)
Conserved_M$Timepoints <- as.vector(Conserved_M$date)
Conserved_M$Timepoints[grep("IPSC",Conserved_M$date)] <-"IPSCs"
Conserved_M$Timepoints[grep("D06",Conserved_M$date)] <-"Day06"
Conserved_M$Timepoints[grep("D15",Conserved_M$date)] <-"Day15"
Conserved_M$Timepoints[grep("D21",Conserved_M$date)] <-"Day21"
Idents(Conserved_M) <- "Treatment"
plan("multiprocess", workers = threads)
markers <- FindConservedMarkers(Conserved_M,ident.1 = "Control",ident.2 = "PINK1",grouping.var = "Timepoints",test.use="MAST",latent.vars="nCount_RNA",logfc.threshold=0.0)
index_fc <- c(sign(markers$IPSCs_avg_logFC)==sign(markers$Day06_avg_logFC) & sign(markers$Day06_avg_logFC)==sign(markers$Day15_avg_logFC) & sign(markers$Day15_avg_logFC)==sign(markers$Day21_avg_logFC))
sub_markers <- markers[markers$max_pval < 0.01 & index_fc,]
sub_markers$avg_FC <- rowMeans(sub_markers[,c("IPSCs_avg_logFC","Day06_avg_logFC","Day15_avg_logFC","Day21_avg_logFC")])
sub_markers2 <- sub_markers[abs(sub_markers$avg_FC) >0.1,]
dim(sub_markers2)
# 151
write.table(sub_markers2,output_group_B)
# # ======================= Enrichment ==================================
# control_m <- rownames(sub_markers2[sub_markers2$avg_FC < 0,] )
# pink_m <- sub_markers2[sub_markers2$avg_FC > 0,])
# control_m_e <- bitr(control_m, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# pink_m_e <- bitr(pink_m, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# n_c <- length(control_m_e$ENTREZID)
# n_p <- length(pink_m_e$ENTREZID)
# c_d <- data.frame(ENTREZID = control_m_e$ENTREZID , Condition ="Control")
# p_d <- data.frame(ENTREZID = pink_m_e$ENTREZID , Condition ="PINK1")
# all_d <- rbind(c_d,p_d)
# mydf <- as.data.frame(all_d)
# mydf$ENTREZID <- as.vector(mydf$ENTREZID)
# mydf$ENTREZID <-as.numeric(mydf$ENTREZID )
# GOclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichGO", OrgDb = "org.Hs.eg.db")
# Keggclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichKEGG")
# Reactomeclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichPathway")
# pdf("result/DEG/GO_Enrichment_DEG_Group_B.pdf",width=22,height=11)
# dotplot(GOclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Keggclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Reactomeclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dev.off()
index_fc <- c( sign(markers$Day06_avg_logFC)==sign(markers$Day15_avg_logFC) & sign(markers$Day15_avg_logFC)==sign(markers$Day21_avg_logFC))
sub_markers <- markers[markers$max_pval < 0.01 & index_fc,]
sub_markers$avg_FC <- rowMeans(sub_markers[,c("IPSCs_avg_logFC","Day06_avg_logFC","Day15_avg_logFC","Day21_avg_logFC")])
sub_markers2 <- sub_markers[abs(sub_markers$avg_FC) >0.1,]
dim(sub_markers2)
#172
write.table(sub_markers2,output_group_C)
# --------------------------------------------------------------------
\ No newline at end of file
## ----setup, include=FALSE--------------------------------------------------------------------
set.seed(123)
# library(reticulate)
options(future.globals.maxSize= 2122317824)
library(sctransform)
library(Seurat)
library(RColorBrewer)
library(tictoc)
library(crayon)
library(stringr)
library(Routliers)
library(jcolors)
library(cluster)
library(NMF)
library(ggplot2)
library(ggpubr)
library(cowplot)
library(future)
# library(clusterProfiler)
# library(enrichplot)
# library(ReactomePA)
# library(org.Hs.eg.db)
# list.of.packages <- c("multtest", "EnhancedVolcano")
# new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
# if(length(new.packages)) BiocManager::install(new.packages)
colormap_d<- c('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928',"black","gray")
color_cond <- c(brewer.pal(5,"Dark2"),"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6])[c(5,1,2,3,4,9,6,7,8)]
color_clust <- c(brewer.pal(12,"Paired")[-11],"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6],brewer.pal(8,"Dark2"))
color_cells <- c(brewer.pal(9,"Set1")[-6],"goldenrod4","darkblue","seagreen4")
color_list <- list(condition=color_cond,Cluster=color_clust,Cell_Type=color_cells,State=color_clust)
# ================ READ ARGS =================
args = commandArgs(trailingOnly=TRUE)
print(args)
source("workflow/scripts/Functions.R")
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else {
print("Arguments Passed")
}
input_file <- args[1]
threads <- as.numeric(args[2])
output_group_D <- args[3]
# ---------------------------------------------
Combined <- readRDS(input_file)
Conserved_M <- subset(Combined,subset= condition %in% c("Control_D06","Control_D15","Control_D21","PINK1_D06","PINK1_D15","PINK1_D21"))
DefaultAssay(Conserved_M) <- "RNA"
Conserved_M <-NormalizeData(Conserved_M)
Conserved_M <-ScaleData(Conserved_M)
Conserved_M$Timepoints <- as.vector(Conserved_M$date)
Conserved_M$Timepoints[grep("D06",Conserved_M$date)] <-"Day06"
Conserved_M$Timepoints[grep("D15",Conserved_M$date)] <-"Day15"
Conserved_M$Timepoints[grep("D21",Conserved_M$date)] <-"Day21"
Idents(Conserved_M) <- "Treatment"
plan("multiprocess", workers = threads)
markers <- FindConservedMarkers(Conserved_M,ident.1 = "Control",ident.2 = "PINK1",grouping.var = "Timepoints",test.use="MAST",latent.vars="nCount_RNA",logfc.threshold=0.0)
index_fc <- c( sign(markers$Day06_avg_logFC)==sign(markers$Day15_avg_logFC) & sign(markers$Day15_avg_logFC)==sign(markers$Day21_avg_logFC))
sub_markers <- markers[markers$max_pval < 0.01 & index_fc,]
sub_markers$avg_FC <- rowMeans(sub_markers[,c("Day06_avg_logFC","Day15_avg_logFC","Day21_avg_logFC")])
sub_markers2 <- sub_markers[abs(sub_markers$avg_FC) >0.1,]
dim(sub_markers2)
# 285
write.table(sub_markers2,output_group_D)
# # ======================= Enrichment ==================================
# control_m <- rownames(sub_markers2[sub_markers2$avg_FC < 0,] )
# pink_m <- rownames(sub_markers2[sub_markers2$avg_FC > 0,])
# control_m_e <- bitr(control_m, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# pink_m_e <- bitr(pink_m, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
# n_c <- length(control_m_e$ENTREZID)
# n_p <- length(pink_m_e$ENTREZID)
# c_d <- data.frame(ENTREZID = control_m_e$ENTREZID , Condition ="Control")
# p_d <- data.frame(ENTREZID = pink_m_e$ENTREZID , Condition ="PINK1")
# all_d <- rbind(c_d,p_d)
# mydf <- as.data.frame(all_d)
# mydf$ENTREZID <- as.vector(mydf$ENTREZID)
# mydf$ENTREZID <-as.numeric(mydf$ENTREZID )
# GOclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichGO", OrgDb = "org.Hs.eg.db")
# Keggclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichKEGG")
# Reactomeclusterplot <- compareCluster(ENTREZID~Condition,data=mydf, fun = "enrichPathway")
# pdf("GO_Enrichment_DF_Clusters.pdf",width=22,height=11)
# dotplot(GOclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Keggclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dotplot(Reactomeclusterplot)+ ggplot2::facet_grid(~Condition, scales = "free")
# dev.off()
# ----------------------
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
## ----setup, include=FALSE--------------------------------------------------------------------
set.seed(123)
# library(reticulate)
options(future.globals.maxSize= 2122317824)
library(sctransform)
library(Seurat)
library(RColorBrewer)
library(tictoc)
library(crayon)
library(stringr)
library(Routliers)
library(jcolors)
library(cluster)
library(NMF)
library(ggplot2)
library(ggpubr)
library(cowplot)
library(igraph)
library(plyr)
library(jsonlite)
library(purrr)
library(data.table)
list.of.packages <- c("STRINGdb")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) BiocManager::install(new.packages)
library(STRINGdb)
colormap_d<- c('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928',"black","gray")
color_cond <- c(brewer.pal(5,"Dark2"),"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6])[c(5,1,2,3,4,9,6,7,8)]
color_clust <- c(brewer.pal(12,"Paired")[-11],"black","gray","magenta4","seagreen4",brewer.pal(9,"Set1")[-6],brewer.pal(8,"Dark2"))
color_cells <- c(brewer.pal(9,"Set1")[-6],"goldenrod4","darkblue","seagreen4")
color_list <- list(condition=color_cond,Cluster=color_clust,Cell_Type=color_cells,State=color_clust)
# ================ READ ARGS =================
args = commandArgs(trailingOnly=TRUE)
print(args)
source("workflow/scripts/Functions.R")
source("workflow/scripts/rstring.r")
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else {
print("Arguments Passed")
}
input_file <- args[1]
group_B <- args[2]
group_C<- args[3]
group_D <- args[4]
output_file <- args[5]
# ---------------------------------------------
Combined <- readRDS(input_file)
# ============== READ DF GROUPS ================
# gA <- read.table(args[1])
gB <- read.table(group_B)
gC <- read.table(group_C)
gD <- read.table(group_D)
DEGs <- unique(c(rownames(gB),rownames(gC),rownames(gD)))
all_g <- rownames(Combined@assays$RNA@counts)
rest <- all_g[!all_g%in%DEGs]
length(all_g)
# [1] 18097
length(DEGs)
# [1] 292
length(rest)
# [1] 17805
# -----------------------------------------------
mean_l <- c()
median_l <- c()
degree_l <- list()
num_nodes <- c()
num_inter <- c()
string_db = STRINGdb$new(version="11",species=9606)
string_plot_net <- function(i,mean_l,median_l,degree_l){
# n_genes <- 292
# r_list1 <- sample(rest,n_genes)
# write.table(r_list1,paste0('Random_Genes',i,"_",n_genes,".txt"),row.names = F,col.names = F)
# Rerun
r_list1<- read.table(paste0('DATA/Random_Genes/Random_Genes',i,".txt"),col.names = F)[,1]
Genes <- string_db$mp(r_list1)
#string_db$plot_network(Genes)
inter_g <- string_db$get_interactions(Genes)
full.graph <- string_db$get_subnetwork(Genes)
degrees_f <- igraph::degree(full.graph)
mean_l <-c (mean_l,mean(degrees_f))
median_l <-c (median_l,median(degrees_f))
num_nodes <- c(num_nodes,length(degrees_f))
degree_l[[i]] <-degrees_f
num_inter <- c(num_inter,ecount(full.graph))
return(list("mean_l"=mean_l,"median_l"=median_l,"degree_l"=degree_l,"num_nodes"=num_nodes,"num_inter"=num_inter))
}
for(i in 1:50){
return_res <- string_plot_net(i,mean_l,median_l,degree_l)
mean_l <- return_res$mean_l
median_l <- return_res$median_l
num_nodes <- return_res$num_nodes
degree_l <- return_res$degree_l
num_inter <- return_res$num_inter
}
lapply(list(mean_l,median_l,degree_l,num_inter),length)
# pdf("result/Networks/Ours.pdf")
#write.table(DEGs,paste0('DEGs',51,".txt"),row.names = F,col.names = F)
# DEGs <- read.table(paste0('DEGs',51,".txt"),col.names = F)[,1]
string_db = STRINGdb$new(version="11.0",species=9606)
Genes <- string_db$mp(DEGs)
print("OK1")
# string_db$plot_network(Genes)
# print("OK2")
full.graph <- string_db$get_subnetwork(Genes)
degrees_f <- igraph::degree(full.graph)
mean_l <-c (mean_l,mean(degrees_f))
median_l <-c (median_l,median(degrees_f))
num_nodes <- c(num_nodes,length(degrees_f))
num_inter <- c(num_inter,ecount(full.graph))
degree_l[[51]] <-degrees_f
# dev.off()
print("OK3")
rest_degree <- unlist(degree_l[1:50])
deg_degree <- unlist(degree_l[51])
df <- data.frame("degree"=unlist(degree_l))
length(deg_degree)
length(unlist(degree_l))
n_random <- length(unlist(degree_l)) - length(deg_degree)
df$ord <- "Random"
df$ord[n_random:length(unlist(degree_l))] <- "DEG"
rest_num_nodes <- unlist(num_nodes[1:50])
deg_num_nodes <- unlist(num_nodes[51])
rest_num_inter <- unlist(num_inter[1:50])
deg_num_inter <- unlist(num_inter[51])
d <- density(rest_num_nodes)
print("OK4")
mu <- ddply(df, "ord", summarise, grp.mean=mean(degree))
p1 <- df %>%
ggplot( aes(x=degree, fill=ord)) +
geom_density(alpha=0.8)+
geom_vline(data=mu, aes(xintercept=grp.mean, color=ord),size=2,
linetype="dashed")+theme_cowplot()+
theme(legend.position = "none") + ylab("Degree distribution") +xlab("Degree")
df <- data.frame("nodes"=rest_num_nodes)
df$ord <- "RANDOM"
p2 <- df %>%
ggplot( aes(x=nodes, color=ord)) +
geom_density(fill="#00BFC4", color="#00BFC4",alpha=0.8)+
geom_vline(xintercept = deg_num_nodes,color="#F8766D",linetype="dashed", size = 2)+theme_cowplot()+
theme(legend.position = "top") + ylab("Probability") + xlab("Number of nodes")+
annotate(geom="text", x=190, y=0.045, label="Random",color="#00BFC4",size=7)+
annotate(geom="text", x=240, y=0.045, label="DEG",color="#F8766D",size=7)+xlim(170,260)
df <- data.frame("interactions"=rest_num_inter)
df$ord <- "RANDOM"
p6 <- df %>%
ggplot( aes(x=interactions, color=ord)) +
geom_density(fill="#00BFC4", color="#00BFC4",alpha=0.8)+
geom_vline(xintercept = deg_num_inter,color="#F8766D",linetype="dashed",size = 2)+
theme_cowplot()+
theme(legend.position = "top") + ylab("Probability") + xlab("Number of interactions") +xlim(c(400,1650)) +
annotate(geom="text", x=300, y=0.003, label="Random",color="#00BFC4",size=7)+
annotate(geom="text", x=1300, y=0.003, label="DEG",color="#F8766D",size=7)+xlim(100,1600)
# #69b3a2
# #e9ecef
dt_list <- map(degree_l, as.data.table)
dt <- rbindlist(dt_list, fill = TRUE, idcol = T)
colnames(dt)[2] <- "Degree"
dt$ord <- "RANDOM"
dt$ord[dt$.id==51]<- "DEG"
colnames(dt)[3] <- "Condition"
# p3 <- ggplot(dt, aes(x=.id, y=Degree,group=.id,fill=Condition)) +
# geom_boxplot()+theme_cowplot()
# ggboxplot(dt, x = ".id", y = "V1",group=".id")+
# stat_compare_means() + # Global p-value
# stat_compare_means(ref.group = 51, label = "p.signif",
# label.y = c(22, 29))
my_comparisons <- list( c("DEG","RANDOM"))
# p4 <- ggboxplot(dt, x = "Condition", y = "Degree",
# fill = "Condition")+
# stat_compare_means(comparisons = my_comparisons)+ theme_cowplot() + ylab("Degree") # Add pairwise comparisons p-value
# stat_compare_means(label.y = 50)
dt2 <- dt[order(Condition),]
p4 <- ggboxplot(dt2, x = "Condition", y = "Degree",
fill = "Condition")+
stat_compare_means(comparisons = my_comparisons, ref.group = "RANDOM") +
#annotate(geom="text", x="DEG", y=90, label="***",color="black",size=5)+theme_cowplot()+
theme(legend.position = "none") + ylab("Degree") +xlab("Condition")
pdf(output_file)
p2
p6
p4
dev.off()
# p5 <- ggplot(dt, aes(x=Condition, y=Degree,group=Condition,fill=Condition)) +
# geom_boxplot()+
# stat_compare_means(method = "wilcox.test")+ # Add global p-value
# stat_compare_means(label = "p.signif", method = "t.test",
# ref.group = "RANDOM")+theme_cowplot()+
# theme(legend.position = "top")
# cor_list<- readRDS("Correlation_Networks.rds")
# p26<-ggarrange(plotlist=list(p2,p6),nrow = 2)
# g14 <- p1 + annotation_custom(ggplotGrob(p4), xmin = 30, xmax = 80,
# ymin = 0.04, ymax = 0.11)
# sup6_p2 <- ggarrange(plotlist=list(p26,g14),nrow = 1)
# FigSup_1 <- cor_list[[1]]
# FigSup_2 <- cor_list[[2]]
# FigSup_3 <- cor_list[[3]]
# FigSup_4 <- cor_list[[4]]
# sup6_p1 <- ggarrange(plotlist=list(FigSup_1,sup6_p2),ncol=2)
# sup6_p3 <- ggarrange(plotlist=list(FigSup_2,FigSup_3,FigSup_4),ncol=3)
# pdf("result/Network/Network_Sup_Figures.pdf",width=20,height=10)
# FigSup_1
# FigSup_2
# FigSup_3
# FigSup_4
# sup6_p2
# dev.off()
# pdf("Sup6")
# ggarrange(plotlist=list(p26,g14),nrow = 1)
# dev.off()
# p12 <- ggarrange(plotlist=list(p1,p2),nrow = 1)
# p45 <- ggarrange(plotlist=list(p4,p5),nrow = 1)
# pdf("QC_292.pdf",height=12,width=8)
# ggarrange(plotlist=list(p12,p3,p45),nrow = 3)
# dev.off()
# pdf("QC2_292.pdf",height=8,width=15)
# p125 <- ggarrange(plotlist=list(p1,p2,p6,p4),ncol = 4)
# ggarrange(plotlist=list(p125,p3),nrow = 2)
# dev.off()
# pdf("QC3_292.pdf",width=15,height=10)
# ggarrange(plotlist=list(p2,p1,p6,p4),nrow = 2,ncol=2)
# dev.off()
# pdf("QC4_292.pdf",width=8,height=4)
# p2
# p1
# p6
# p4
# dev.off()
# p6+ annotation_custom(ggplotGrob(p2), xmin = 1, xmax = 3,
# ymin = -0.3, ymax = 0.6)
# pc <- ggarrange(plotlist=list(p6,p4),ncol=1)
# pb <- ggarrange(plotlist=list(p2,p1),ncol=1)
# ggarrange(plotlist=list(FigSup_1,FigSup_3,FigSup_4),c("d","e","f"),ncol=3)
# ggarrange(plotlist=list(FigSup_2,FigSup_3,FigSup_4),c("d","e","f"),ncol=3)
This diff is collapsed.
get_os <- function () {
sysinf <- Sys.info()
if (!is.null(sysinf)) {
os <- sysinf["sysname"]
if (os == "Darwin")
os <- "osx"
}
else {
os <- .Platform$OS.type
if (grepl("^darwin", R.version$os))
os <- "osx"
if (grepl("linux-gnu", R.version$os))
os <- "linux"
}
tolower(os)
}
Vln_QC <- function (df_qc, condition, title, outliers = NULL) {
library(gridExtra)
if (is.null(outliers)) {
outliers <- 1
}
else {
outliers <- as.numeric(!outliers) + 1
}
p1 <- ggplot(df_qc, aes(factor(Cond), nFeatures, fill = Cond)) +
geom_violin(width = 0.8, show.legend = FALSE) + ggtitle("nFeatures") +
xlab("") + ylab("Expression Level") + geom_jitter(width = 0.3,
size = 1, show.legend = FALSE, colour = outliers)
p2 <- ggplot(df_qc, aes(factor(Cond), Total_MRNA, fill = Cond),
width = 1) + geom_violin(width = 0.8, scale = "count",
show.legend = FALSE, adjust = 1/2) + ggtitle("nCounts") +
xlab("") + ylab("Expression Level") + geom_jitter(width = 0.3,
size = 1, show.legend = FALSE, colour = outliers)
p3 <- ggplot(df_qc, aes(factor(Cond), Percent_Mit, fill = Cond),
width = 1) + geom_violin(width = 0.8, show.legend = FALSE) +
ggtitle("Percent Mit") + xlab("") + ylab("Expression Level") +
geom_jitter(width = 0.3, size = 1, show.legend = FALSE,
colour = outliers)
pdf(paste(Sys.Date(), "QC", condition, title, ".pdf",
sep = "_"))
grid.arrange(p1, p2, p3, nrow = 1)
dev.off()
}
object_identifier<- function (object) {
if (class(object) == "Seurat") {
tool = "seurat"
}
else {
tool = "monocle"
}
return(tool)
}
scatter_gene<-function (object, features, assay = "RNA", reduction = "umap",
size = 2, ncol = 2, nrow = 2) {
DefaultAssay(object) <- "RNA"
proj <- object@reductions[[reduction]]@cell.embeddings
exp <- as.matrix(object@assays[[assay]]@data)
df <- cbind(proj, object@meta.data, t(exp))
library(dplyr)
myPalette <- colorRampPalette(brewer.pal(9, "OrRd"))
myPalette <- colorRampPalette(c("lightgrey", "#FDBB84",
"#EF6548", "#D7301F", "#B30000", "#7F0000"))
sc <- scale_colour_gradientn(colours = myPalette(9))
results <- lapply(features, function(x) {
ggplot(df %>% arrange(get(x)), aes(x = get(colnames(df)[1]),
y = get(colnames(df)[2]), color = get(x))) + geom_point(size = size) +
ggtitle(x) + sc + theme_cowplot() + theme(legend.position = "right",
plot.title = element_text(hjust = 0.5), legend.title = element_blank()) +
xlab(colnames(df)[1]) + ylab(colnames(df)[2])
})
p1 <- ggarrange(plotlist = results, ncol = ncol, nrow = nrow)
return(p1)
}
#' Build Correlation Network
#'
#' @author Dimitrios Kyriakis
#' @export
#' @param dataset : A Matrix with columns the nodes
#' @param regulated : A vector with "UP Regulated" and "DOWN Regulated".
#' @param cor_thres : Correlation Threshold
#' @param corr.method : What kind of correlations should be computed? Default is "pearson", but "spearman" and "kendall" are also supported.
#' @return Network
#' @examples cor_net(heat_matrix)
#'
cor_net <- function (dataset,regulated=NULL,cor_thres=0.5,corr.method="pearson"){
require(tidyverse)
require(corrr)
require(igraph)
require(ggraph)
net_mat_plot <- as.matrix(dataset)
# # Create a tidy data frame of correlations
# d <- as.data.frame(t(net_mat_plot))
# tidy_cors <- d %>%
# correlate(method=corr.method) %>%
# stretch()
tryCatch(
{
# Convert correlations stronger than some value
# to an undirected graph object
library("Hmisc")
res2 <- rcorr(t(as.matrix(dataset_r)))
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}
return_cor <-flattenCorrMatrix(res2$r, res2$P)
corner(return_cor)
graph_cors <- return_cor %>%
dplyr::filter(abs(cor) > cor_thres)%>%
dplyr::filter(abs(p) < 0.05) %>%
graph_from_data_frame(directed = FALSE)
# graph_cors <- tidy_cors %>%
# dplyr::filter(abs(r) > cor_thres) %>%
# graph_from_data_frame(directed = FALSE)
G2 <-graph_cors
return(G2)
},
error=function(cond){
return("No Edge Found. Try to decrease MMHC threshold")
}
)
}
#' Build Regulatory Network Using Genie3
#'
#' @author Dimitrios Kyriakis
#' @export
#' @param dataset : A Matrix with columns the nodes
#' @param regulated : A vector with "UP Regulated" and "DOWN Regulated".
#' @param weight.threshold : Weight Threshold (less adds more edges)
#' @param nCores : Number of cores to use for parallel computing. Default: 4.
#' @param nTrees : Number of trees in an ensemble for each target gene. Default: 2000.
#' @return Network
#' @examples genie3_net(dataset,regulated=NULL,weight.threshold=0.05,title="",nCores=4,nTrees=2000)
#'
genie3_net <- function (dataset,regulated=NULL,weight.threshold=0.05,title="",nCores=4,nTrees=2000){
require(GENIE3)
require(igraph)
require(RCy3)
require(Rgraphviz)
net_mat_plot <- as.matrix(dataset)
tryCatch(
{
weightMat <- GENIE3(net_mat_plot, nCores=nCores, verbose=TRUE,nTrees=nTrees)
link.list <- getLinkList(weightMat,threshold=weight.threshold)
edge_listsi <- link.list[!duplicated(link.list),]
Gsi <- graph.data.frame(edge_listsi,directed = FALSE)
Asi <- get.adjacency(Gsi,sparse = F,attr = "weight",type = "both")
G2 <- graph.adjacency(Asi,mode = "undirected",weighted = T)
return(G2)
},
error=function(cond){
return("Debugonce()")
}
)
}
#' Build Network
#'
#' @author Dimitrios Kyriakis
#' @export
#' @param dataset : A Matrix with columns the nodes
#' @param regulated : A vector with "UP Regulated" and "DOWN Regulated".
#' @param threshold : Threshold (aplha parameter for mmhc/h2pc, pvalue for corellation, weeight for genie3)
#' @param nCores : Parallel (aplied only for genie3)
#' @param nTrees : Number of trees for Random forest (aplied only for genie3)
#' @param corr.method : What kind of correlations should be computed? Default is "pearson", but "spearman" and "kendall" are also supported.
#' @param title : Name of the pdf
#' @return Network
#' @examples ics_net(dataset,regulated=NULL,threshold=0.01,nCores=4,nTrees=2000,title="Day09",method="mmhc",corr.method="pearson")
#'
ics_net <- function (dataset,regulated=NULL,threshold=0.05,nCores=4,nTrees=2000,title="",method="mmhc",corr.method="pearson"){
require(bnlearn)
require(bnviewer)
require(igraph)
labelx <- paste0(Sys.Date(),"_",title)
graphics.off()
tryCatch(
{
# ===== MMPC ======================
if(method%in%c("pc","aracne","mmhc")){
net_mat_plot <- as.data.frame(t(as.matrix(dataset)))
if(method=="pc"){
temp_network <- bnlearn::h2pc(net_mat_plot,restrict.args=list("alpha"=threshold))
}else if (method=="aracne"){
temp_network <- bnlearn::aracne(net_mat_plot)
}else{
temp_network <- bnlearn::mmhc(net_mat_plot,restrict.args=list("alpha"=threshold))
}
G1 <- bnviewer::bn.to.igraph(temp_network)
}else{
if(method=="correlation"){
G1 <- ICSWrapper::cor_net(dataset=dataset,regulated=regulated,cor_thres=threshold,corr.method=corr.method)
}else if(method=="genie3"){
G1 <- ICSWrapper::genie3_net(dataset=dataset,regulated=regulated,weight.threshold=threshold,nCores=nCores,nTrees=nTrees)
}else{
print("Not available method")
return()
}
}
G2 <- igraph::simplify(G1)
if(method=="aracne"){
G2 <- as.undirected(G2)
}
if(!method%in%c("pc","mmhc")){
clusterlouvain <- cluster_louvain(G2)
}else{
G3 <- as.undirected(G2)
clusterlouvain <- cluster_louvain(G3)
}
V(G2)$cex <- 0.11
V(G2)$label.cex <-0.3
G3 <- G2
Degree_net <- igraph::degree(G2, mode = "in")
Between_net <- igraph::betweenness(G2)
pdf(paste0(labelx,"_Net_",method,".pdf"))
# =================================== UP AND DOWN REGULATION ==========================================
if(!is.null(regulated)){
regulated <- regulated[names(regulated)%in%vertex_attr(G2)$name]
Color_rest <-c("skyblue","pink")[as.factor(regulated)]
set.seed(1234) # set seed to make the layout reproducible
V(G2)$color <- Color_rest
plot(G2, edge.arrow.size=0.25, main="Regulated",cex=0.1,main=labelx)
legend("bottomleft",bty = "n",
legend=levels(as.factor(regulated)),
fill= c("skyblue","pink"), horiz=F)
}else{
Color_rest <-c("skyblue")
}
set.seed(1234) # set seed to make the layout reproducible
V(G2)$color <- Color_rest
set.seed(1234)
plot(G2, edge.arrow.size=0.25, main="Gene Network",cex=0.1,main=labelx)
set.seed(1234)
plot(clusterlouvain,G2, edge.arrow.size=0.25, main="Gene Network",cex=0.1,main=labelx )
# -----------------------------------------------------------------------------------------------------
# =================================== Betweenness CENTRALITY ==========================================
G4 <- G2
V(G4)$color <-Color_rest
#Edge Options: Color
E(G4)$color <- "grey"
set.seed(1234) # set seed to make the layout reproducible
print(Between_net)
if(length(unique(Between_net))==1){
plot(G4,cex=0.1, edge.arrow.size=0.25,main="Betweeness Centrality")
}else{
V(G4)$size=ICSWrapper::rescale_ics(Between_net) #because we have wide range, I am dividing by 5 to keep the high in-degree nodes from overshadowing everything else.
plot(G4, edge.arrow.size=0.25,main="Betweeness Centrality")
}
# -----------------------------------------------------------------------------------------------------
## ====================================== DEGREE ======================================================
G5 <- G2
#Node or Vetex Options: Size and Color
V(G5)$color <- Color_rest
#Edge Options: Color
E(G5)$color <- "grey"
#Plotting, Now Specifying an arrow size and getting rid of arrow heads
#We are letting the color and the size of the node indicate the directed nature of the graph
set.seed(1234) # set seed to make the layout reproducible
print(Degree_net)
if(length(unique(Degree_net))==1){
plot(G5,cex=0.1, edge.arrow.size=0.25,main="Degree In")
}else{
V(G5)$size=ICSWrapper::rescale_ics(Degree_net) #because we have wide range, I am dividing by 5 to keep the high in-degree nodes from overshadowing everything else.
plot(G5, edge.arrow.size=0.25,main="Degree In")#, vertex.label = NA
}
dev.off()
# -----------------------------------------------------------------------------------------------------
graphics.off()
# ====================================== DATA FRAME ===================================================
if(!method%in%c("pc","mmhc")){
if(!is.null(regulated)){
df_G2 <- data.frame(Gene=vertex_attr(G2)$name,Cluster=clusterlouvain$membership,Degree=Degree_net,BTC=Between_net,Regulated=regulated,Color=vertex_attr(G2)$color)
}else{
df_G2 <- data.frame(Gene=vertex_attr(G2)$name,Cluster=clusterlouvain$membership,Degree=Degree_net,BTC=Between_net,Color=vertex_attr(G2)$color)
}
}else{
if(!is.null(regulated)){
df_G2 <- data.frame(Gene=vertex_attr(G2)$name,Degree=Degree_net,BTC=Between_net,Regulated=regulated,Color=vertex_attr(G2)$color)
}else{
df_G2 <- data.frame(Gene=vertex_attr(G2)$name,Degree=Degree_net,BTC=Between_net,Color=vertex_attr(G2)$color)
}
}
write.table( df_G2,paste0(labelx,"_",method,".tsv"), row.names=FALSE,sep="\t")
graphics.off()
# -----------------------------------------------------------------------------------------------------
return(list("graph"=G3,"df"=df_G2))
},
error=function(cond){
return("Try exception gave an error. Check the treshold. Or use debugonce(ics_net) to check where the error comes from and report it")
}
)
}
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment