Skip to content
Snippets Groups Projects
Commit 25b450c3 authored by Valentina Galata's avatar Valentina Galata
Browse files

crispr fig: num of spacers/arrays per assembly; contig intersections (issue #19)

parent 1990f776
No related branches found
No related tags found
2 merge requests!71Master,!70Add latest figure updates
......@@ -8,7 +8,7 @@ sink(file=file(snakemake@log[[1]], open="wt"), type="message")
## IMPORT
suppressMessages(library(testit))
suppressMessages(library(UpSetR))
# suppressMessages(library(UpSetR))
suppressMessages(library(ggplot2))
# custom
source(snakemake@params$utils)
......@@ -24,23 +24,92 @@ stats <- read.csv(
# change names
stats$crispr_tool <- CRISPR_TOOL_NAMES[stats$crispr_tool]
stats$asm_tool <- ASM_TOOL_NAMES[stats$asm_tool]
# aggregate: spacers per contig
# stats_aggr_sc <- aggregate(
# x=stats$spacers,
# by=list(seq_id=stats$seq_id, crispr_tool=stats$crispr_tool, asm_tool=stats$asm_tool),
# FUN=sum
# )
# aggregate: spacers per assembly
stats_aggr_sa <- aggregate(
x=stats$spacers,
by=list(crispr_tool=stats$crispr_tool, asm_tool=stats$asm_tool),
FUN=sum
)
# aggregate: arrays per assembly
stats_aggr_aa <- aggregate(
x=stats$seq_id,
by=list(crispr_tool=stats$crispr_tool, asm_tool=stats$asm_tool),
FUN=length
)
## PLOT: UpSetR + PDF
## PLOTS
# UpSetR plotting function
plot_upsetr <- function(df, asm_tool){
asm_sets <- lapply(
CRISPR_TOOL_NAMES,
function(x){ unique(unlist(df[df$asm_tool==asm_tool & df$crispr_tool == x,"seq_id"])) }
)
names(asm_sets) <- CRISPR_TOOL_NAMES[names(asm_sets)]
UpSetR::upset(data=UpSetR::fromList(asm_sets), order.by="degree", decreasing=FALSE)
UpSetR::upset(
data=UpSetR::fromList(asm_sets),
order.by="degree",
decreasing=FALSE,
mainbar.y.label=sprintf("Contig intersection size (%s)", asm_tool),
sets.x.label="Contigs w/ CRISPR array(s)"
)
}
# Custom ggplot2 theme
my_theme <-
theme_bw() +
theme(
# legend
legend.title=element_blank(),
# grid
panel.grid=element_blank(),
# strip
strip.background=element_rect(fill="white"),
strip.text=element_text(size=12, color="black"),
# axes
axis.title=element_text(size=12, color="black"),
axis.text.y=element_text(size=9, color="black"),
axis.text.x=element_text(size=9, color="black", angle=90, vjust=0.5, hjust=1)
)
# List of ggplot plots
plots <- list()
# Plot: Number of spacers per tool/assembly
plots$spacers_asm <-
ggplot(data=stats_aggr_sa, aes(x=crispr_tool, y=x, fill=asm_tool)) +
geom_col(position="dodge") +
scale_fill_manual(values=ASM_TOOL_COLORS$notmeth, guide="legend") +
labs(
x="",
y="Number of spacers"
) +
my_theme
# Plot: Number of arrays per tool/assembly
plots$arrays_asm <-
ggplot(data=stats_aggr_aa, aes(x=crispr_tool, y=x, fill=asm_tool)) +
geom_col(position="dodge") +
scale_fill_manual(values=ASM_TOOL_COLORS$notmeth, guide="legend") +
labs(
x="",
y="Number of CRISPR arrays"
) +
my_theme
# Plot: UpSetR (+ PDF)
# NOTE:
# For an unknown reason creating and saving multiple plots using a for-loop or sapply did not work - the PDF was empty.
# Creating a PDF per plot in a for-loop or using sapply did not work either - each PDF was empty.
# For an unknown reason creating and saving multiple plots using a for-loop or sapply does not work - the PDF is empty.
# Creating a PDF per plot in a for-loop or using sapply does not work either - each PDF is empty.
pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
for(pp in plots){ print(pp) }
plot_upsetr(stats, "Flye")
plot_upsetr(stats, "MEGAHIT")
plot_upsetr(stats, "metaSPAdes")
plot_upsetr(stats, "metaSPAdes (H)")
dev.off()
\ No newline at end of file
dev.off()
......@@ -90,6 +90,7 @@ PLASFLOW_COLORS <- list(
)
names(PLASFLOW_COLORS$class) <- c("chromosome", "plasmid", "unclassified")
##############################
# RGI
RGI_NAMES <- list(
col=c(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment