Skip to content
Snippets Groups Projects
Commit d75c0381 authored by Valentina Galata's avatar Valentina Galata
Browse files

added crispr fig (issue #19)

parent 629dc3fc
No related branches found
No related tags found
2 merge requests!71Master,!68Figures valentina
......@@ -15,6 +15,7 @@ FIG_MMSEQ_UPSETR = os.path.join(config["output"], config["fig_mmseq_upsetr"][
FIG_QUAST = os.path.join(config["output"], config["fig_quast"]["output"])
FIG_PARTIAL_GENES = os.path.join(config["output"], config["fig_partial_genes"]["output"])
FIG_NANOSTATS = os.path.join(config["output"], config["fig_nanostats"]["output"])
FIG_CRISPR = os.path.join(config["output"], config["fig_crispr"]["output"])
##################################################
# RULES
......@@ -25,7 +26,7 @@ rule all:
FIG_QUAST,
FIG_PARTIAL_GENES,
FIG_NANOSTATS,
"data/crispr_summary.tsv"
FIG_CRISPR
rule fig_mmseq_upsetr:
input:
......@@ -200,3 +201,19 @@ rule fig_crispr_data:
summary = pandas.concat(objs=summary, axis="index")
# wrote to file
summary.to_csv(output[0], sep="\t", header=True, index=False, index_label=False)
rule fig_crispr:
input:
stats=config["fig_crispr"]["input"]["stats"]
output:
pdf=FIG_CRISPR
log:
FIG_CRISPR + ".log"
params:
utils=config["utils"],
width=config["fig_crispr"]["width"],
height=config["fig_crispr"]["height"]
conda:
"envs/r.yml"
script:
config["fig_crispr"]["script"]
\ No newline at end of file
......@@ -41,4 +41,4 @@ fig_crispr:
stats: "data/crispr_summary.tsv"
output: "fig_crispr.pdf"
width: 7
height: 9
\ No newline at end of file
height: 5
\ No newline at end of file
#!/usr/bin/Rscript
## LOG FILE
sink(file=file(snakemake@log[[1]], open="wt"), type="message")
## NOTE
# Plot CRISPR statistics
## IMPORT
suppressMessages(library(testit))
suppressMessages(library(UpSetR))
suppressMessages(library(ggplot2))
# custom
source(snakemake@params$utils)
## DATA
stats <- read.csv(
file=snakemake@input$stats,
sep="\t",
header=TRUE,
stringsAsFactors=FALSE,
check.names=FALSE
)
# change names
stats$crispr_tool <- CRISPR_TOOL_NAMES[stats$crispr_tool]
stats$asm_tool <- ASM_TOOL_NAMES[stats$asm_tool]
## PLOT: UpSetR + PDF
plot_upsetr <- function(df, asm_tool){
asm_sets <- lapply(
CRISPR_TOOL_NAMES,
function(x){ unique(unlist(df[df$asm_tool==asm_tool & df$crispr_tool == x,"seq_id"])) }
)
names(asm_sets) <- CRISPR_TOOL_NAMES[names(asm_sets)]
UpSetR::upset(data=UpSetR::fromList(asm_sets), order.by="degree", decreasing=FALSE)
}
# NOTE:
# For an unknown reason creating and saving multiple plots using a for-loop or sapply did not work - the PDF was empty.
# Creating a PDF per plot in a for-loop or using sapply did not work either - each PDF was empty.
pdf(snakemake@output$pdf, width=snakemake@params$width, height=snakemake@params$height)
plot_upsetr(stats, "Flye")
plot_upsetr(stats, "MEGAHIT")
plot_upsetr(stats, "metaSPAdes")
plot_upsetr(stats, "metaSPAdes (H)")
dev.off()
\ No newline at end of file
......@@ -66,3 +66,10 @@ METHYLATION_COLORS <- list(
METHYLATION_COLORS$new <- METHYLATION_COLORS$raw
names(METHYLATION_COLORS$raw) <- names(METHYLATION_NAMES)
names(METHYLATION_COLORS$new) <- METHYLATION_NAMES
##############################
# CRISPR tools
CRISPR_TOOL_NAMES <- c(
"minced"="MinCED",
"casc"="CasC"
)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment