Skip to content
Snippets Groups Projects
Commit 680d8114 authored by Valentina Galata's avatar Valentina Galata
Browse files

report update (incl metat polished assemblies) (issue #52)

parent c20052fd
No related branches found
No related tags found
1 merge request!76Merge "cleanup" branch with "master" branch
......@@ -139,7 +139,7 @@ rule collect_quast:
ifile_tool = os.path.basename(os.path.dirname(ifile))
ifile_df = pandas.read_csv(ifile, sep="\t", header=0, index_col=0)
ifile_df.rename(columns={"ASSEMBLY": ifile_tool}, inplace=True)
print(ifile_df)
# print(ifile_df)
if summary is None:
summary = ifile_df.copy()
else:
......
......@@ -13,8 +13,10 @@ TABS$nanostats <- read_nanostats(snakemake@input$nanostats)
TABS$mappability <- list()
TABS$mappability$metag <- read_mappability(snakemake@input$mappability[1])
TABS$mappability$metag$tool <- factor(TABS$mappability$metag$tool, ordered=TRUE, levels=ASM_TOOL_NAMES)
if(length(snakemake@input$mappability) > 1){
TABS$mappability$metat <- read_mappability(snakemake@input$mappability[2])
TABS$mappability$metat$tool <- factor(TABS$mappability$metat$tool, ordered=TRUE, levels=ASM_TOOL_NAMES)
} else {
TABS$mappability$metat <- NULL
}
......@@ -112,6 +114,7 @@ FIGS$quast <-
TABS$plasflow <- read_plasflow(snakemake@input$plasflow)
TABS$plasflow_melted <- reshape2::melt(TABS$plasflow, id.vars=c("label", "type", "tool"), variable.name="statstype")
TABS$plasflow_melted$tool <- factor(TABS$plasflow_melted$tool, ordered=TRUE, levels=ASM_TOOL_NAMES)
FIGS$plasflow <- list()
for(stype in c("sum_pct")){
......@@ -144,6 +147,8 @@ for(stype in c("sum_pct")){
TABS$prodigal <- read_prodigal(snakemake@input$prodigal)
TABS$prodigal_melted <- reshape2::melt(TABS$prodigal[,c("tool", "total", "partial")], id.vars=c("tool"))
TABS$prodigal$tool <- factor(TABS$prodigal$tool, ordered=TRUE, levels=ASM_TOOL_NAMES)
TABS$prodigal_melted$tool <- factor(TABS$prodigal_melted$tool, ordered=TRUE, levels=ASM_TOOL_NAMES)
FIGS$prodigal <- list()
FIGS$prodigal$counts <-
......@@ -328,12 +333,14 @@ TABS$crispr_aggr_sa <- aggregate(
by=list(crispr_tool=TABS$crispr$crispr_tool, asm_tool=TABS$crispr$asm_tool),
FUN=sum
)
TABS$crispr_aggr_sa$asm_tool <- factor(TABS$crispr_aggr_sa$asm_tool, ordered=TRUE, levels=ASM_TOOL_NAMES)
# aggregate: arrays per assembly
TABS$crispr_aggr_aa <- aggregate(
x=TABS$crispr$seq_id,
by=list(crispr_tool=TABS$crispr$crispr_tool, asm_tool=TABS$crispr$asm_tool),
FUN=length
)
TABS$crispr_aggr_aa$asm_tool <- factor(TABS$crispr_aggr_aa$asm_tool, ordered=TRUE, levels=ASM_TOOL_NAMES)
FIGS$crispr <- list()
crispr_theme <-
......
......@@ -26,11 +26,11 @@ knitr::kable(TABS$nanostats, caption='NanoStats')
# Mappability
```{r figures-mappability-metag, echo=FALSE, fig.width=10, fig.height=5, fig.cap='Mappability, metaG'}
```{r figures-mappability-metag, echo=FALSE, fig.width=10, fig.height=7, fig.cap='Mappability, metaG'}
print(FIGS$mappability$metag)
```
```{r figures-mappability-metat, echo=FALSE, fig.width=10, fig.height=5, fig.cap='Mappability, metaT'}
```{r figures-mappability-metat, echo=FALSE, fig.width=10, fig.height=7, fig.cap='Mappability, metaT'}
if("metat" %in% names(FIGS$mappability)){
print(FIGS$mappability$metat)
}
......@@ -46,7 +46,7 @@ grid.draw(FIGS$mash)
# Assembly quality
```{r tables-quast, echo=FALSE}
knitr::kable(TABS$quast, caption='QUAST')
knitr::kable(t(TABS$quast), caption='QUAST')
```
```{r figures-quast, echo=FALSE, fig.width=10, fig.height=10, fig.cap='Assembly statistics w/ QUAST'}
......@@ -55,7 +55,7 @@ print(FIGS$quast)
# Plasmid prediction
```{r figures-plasflow, echo=FALSE, fig.width=10, fig.height=5, fig.cap='Plasmid prediction w/ Plasflow'}
```{r figures-plasflow, echo=FALSE, fig.width=10, fig.height=7, fig.cap='Plasmid prediction w/ Plasflow'}
print(FIGS$plasflow$sum_pct)
```
......@@ -108,6 +108,6 @@ FIGS$cdhit
print(FIGS$rgi$total)
```
```{r figures-rgi-overlap, echo=FALSE, fig.width=10, fig.height=5, fig.cap='AMR prediction w/ RGI'}
```{r figures-rgi-overlap, echo=FALSE, fig.width=10, fig.height=7, fig.cap='AMR prediction w/ RGI'}
plot_rgi_overlap(TABS$rgi, "strict", "ARO")
```
\ No newline at end of file
......@@ -170,19 +170,21 @@ plot_rgi_overlap <- function(df, ctype, col){
# overlap order
order.by="degree",
decreasing=FALSE,
# number of sets to plot
nsets=length(ASM_TOOL_NAMES),
# y-label title
mainbar.y.label=sprintf("Intersection size (%s hits, %s)", ctype, col),
# text size
text.scale = c(1.5, 1.5, 1.5, 1.5, 1.5, 1.5),
text.scale = c(1.2, 1.2, 1.2, 1.2, 1.2, 1.2)#,
# colors
set.metadata=list(
data=data.frame(
sets=names(df_list),
Tool=names(df_list),
stringsAsFactors=FALSE
),
plots=list(list(type="matrix_rows", column="Tool", colors=ASM_TOOL_COLORS, alpha=0.7))
)
# set.metadata=list(
# data=data.frame(
# sets=names(df_list),
# Tool=names(df_list),
# stringsAsFactors=FALSE
# ),
# plots=list(list(type="matrix_rows", column="Tool", colors=ASM_TOOL_COLORS, alpha=0.7))
# )
)
}
......@@ -210,14 +212,23 @@ plot_crispr_overlap <- function(df, asm_tool){
# names
ASM_TOOL_NAMES <- c(
# LR, SR, hybrid
"flye"="Flye",
"megahit"="MEGAHIT",
"metaspadeshybrid"="metaSPAdes (H)",
"metaspades"="metaSPAdes",
"operams"="OPERA-MS"
"operams"="OPERA-MS",
"metaspadeshybrid"="metaSPAdes (H)",
# polishing w/ metaT (w/o LR)
"megahitmetatracon"="Racon(MEGAHIT + metaT)",
"metaspadesmetatracon"="Racon(metaSPAdes + metaT)",
"operamsmetatracon"="Racon(OPERA-MS + metaT)",
"metaspadeshybridmetatracon"="Racon(metaSPAdes (H) + metaT)"
)
# colors
ASM_TOOL_COLORS <- ggsci::pal_nejm("default", alpha=1)(length(ASM_TOOL_NAMES))
ASM_TOOL_COLORS <- c(
ggsci::pal_nejm("default", alpha=1)(5),
ggsci::pal_nejm("default", alpha=0.6)(5)[2:5]
)
names(ASM_TOOL_COLORS) <- ASM_TOOL_NAMES
###############
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment