Skip to content
Snippets Groups Projects
Commit b6c8e53e authored by Valentina Galata's avatar Valentina Galata
Browse files

prelim set-up: snakemake file and config, src for mmseq fig (issue #16)

parent 511bb410
No related branches found
No related tags found
2 merge requests!71Master,!68Figures valentina
##################################################
# Config
##################################################
configfile: "figures.yml"
##################################################
# Python modules
##################################################
import os
##################################################
# TARGETS
##################################################
FIG_MMSEQ_UPSETR = os.path.join(config["output"], config["fig_mmseq_upsetr"]["output"])
##################################################
# RULES
##################################################
rule all:
input:
FIG_MMSEQ_UPSETR
rule mmseq_upsetr:
input:
overlap_sizes=config["fig_mmseq_upsetr"]["overlap_sizes"]
output:
pdf=FIG_MMSEQ_UPSETR
log:
FIG_MMSEQ_UPSETR + ".log"
params:
utils=config["utils"],
width=config["fig_mmseq_upsetr"]["width"],
height=config["fig_mmseq_upsetr"]["height"]
conda:
"envs/r.yml"
script:
config["fig_mmseq_upsetr"]["script"]
\ No newline at end of file
# general
output: "output" # output directory
utils: "src/utils.R" # path to utils.R
# figures
fig_mmseq_upsetr:
script: "src/mmseq_upsetr.R"
overlap_sizes: "data/overlap_sizes.txt"
output: "fig_mmseq_upsetr.pdf"
width: 7
height: 5
\ No newline at end of file
#!/usr/bin/Rscript
## LOG FILE
sink(file=file(snakemake@log[[1]], open="wt"), type="message")
## NOTE
# UpSetR
# https://cran.r-project.org/web/packages/UpSetR/vignettes/basic.usage.html
## IMPORT
suppressMessages(library(testit))
suppressMessages(library(UpSetR))
# custom
source(snakemake@params$utils)
## DATA
# overlap/intersection sizes
overlap_sizes <- read.csv(
file=snakemake@input$overlap_sizes,
sep=" ",
header=FALSE,
stringsAsFactors=FALSE
)
# check: all expected tools are there
testit::assert(all(sapply(names(TOOL_NAMES), function(x){ x %in% overlap_sizes$V2 })))
# process names
for(tname in names(TOOL_NAMES)){
overlap_sizes$V2 <- sub(tname, TOOL_NAMES[tname], overlap_sizes$V2)
}
overlap_sizes$V2 <- gsub("_", "&", overlap_sizes$V2)
# create input for UpSetR
overlap_names <- overlap_sizes$V2
overlap_sizes <- overlap_sizes$V1
names(overlap_sizes) <- overlap_names
## PLOT
pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
UpSetR::upset(
data=UpSetR::fromExpression(overlap_sizes),
# overlap order
order.by="degree",
decreasing=FALSE,
# colors
# queries=lapply(
# TOOL_NAMES,
# function(x){ list(query=UpSetR::intersects, params=list(x), color=TOOL_COLORS$proc[[x]], active=TRUE) }
# ),
set.metadata=list(
data=data.frame(
sets=TOOL_NAMES,
Tool=TOOL_NAMES,
stringsAsFactors=FALSE
),
plots=list(list(type="matrix_rows", column="Tool", colors=TOOL_COLORS$proc, alpha=0.7))
)
)
dev.off()
#!/usr/bin/Rscript
suppressMessages(library(ggsci)) # colors
# how the tool names should be changed
TOOL_NAMES <- c(
"flye"="Flye",
"megahit"="MEGAHIT",
"metaspades_hybrid"="metaSPAdes (hybrid)",
"metaspades"="metaSPAdes"
)
# tool colors
TOOL_COLORS <- list()
TOOL_COLORS$raw <- ggsci::pal_nejm("default", alpha=1)(4); names(TOOL_COLORS$raw) <- names(TOOL_NAMES)
TOOL_COLORS$proc <- TOOL_COLORS$raw; names(TOOL_COLORS$proc) <- TOOL_NAMES
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment