Skip to content
Snippets Groups Projects
Commit b7870822 authored by Valentina Galata's avatar Valentina Galata
Browse files

notes: plots and multimodality test for contig cov (issye #84)

parent d9e49f81
No related branches found
No related tags found
No related merge requests found
# conda env: /home/users/vgalata/miniconda3/envs/ONT_pilot/pipeline/e231f1b8
require(ggplot2)
require(diptest) # conda-forge / r-diptest 0.75_7
require(zoo) # conda-forge / r-zoo 1.8_8
# per-base coverage
df <- read.csv(file="tmp/lr_flye_sr_contig_446.cov", sep="\t", header=FALSE, col.names=c("contig", "base", "cov"), stringsAsFactors=FALSE)
df_ <- df[df$base <= 107500,]
cov_analysis <- function(cov_df, k=51, df=10){
# rolling median
# https://www.rdocumentation.org/packages/zoo/versions/1.8-8/topics/rollmean
cov_df$cov_rmedian <- c(
rep(NA, (k-1)/2),
rollmedian(cov_df$cov, k=k, align="center"),
rep(NA, (k-1)/2)
)
# splines
cov_df$cov_smooth <- smooth.spline(
x=cov_df$base,
y=cov_df$cov,
df=df
)$y
# density
# cov_de <- density(na.omit(cov_df$cov_rmedian))
# multi-mod. test
# cov_dt <- dip.test(cov_df$cov_rmedian, simulate.p.value=FALSE)
# plots
pp_cov <-
ggplot(data=cov_df, aes(x=base, y=cov)) +
geom_line(colour="#666666") +
geom_line(aes(x=base, y=cov_rmedian), colour="#0066CC") +
geom_line(aes(x=base, y=cov_smooth), colour="#CC3333") +
scale_x_continuous(breaks = seq(0, max(cov_df$base), by=5000)) +
theme_bw() +
theme(
axis.text.x=element_text(angle=90, vjust=0.5, hjust=1)
)
pp_den <-
ggplot(data=cov_df, aes(x=cov_rmedian)) +
# geom_line(data=data.frame(x=de_$x, y=de_$y), aes(x=x, y=y), colour="red") +
geom_density(colour="#0066CC") +
geom_density(aes(x=cov_smooth), colour="#CC3333") +
theme_bw()
return(list(
df=cov_df,
# de=cov_de,
# multi-mod. test
dt_rmediain=dip.test(cov_df$cov_rmedian, simulate.p.value=FALSE),
dt_smooth=dip.test(cov_df$cov_smooth, simulate.p.value=FALSE),
# plots
plot_cov=pp_cov,
plot_den=pp_den
))
}
df_analysis <- cov_analysis(df, 10001, 10)
df_analysis_ <- cov_analysis(df_, 10001, 10)
print(df_analysis$dt_rmediain)
print(df_analysis$dt_smooth)
print(df_analysis_$dt_rmediain)
print(df_analysis_$dt_smooth)
pdf("tmp/test.pdf", width=20, height=4)
print(df_analysis$plot_cov)
print(df_analysis$plot_den)
print(df_analysis_$plot_cov)
print(df_analysis_$plot_den)
dev.off()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment