Skip to content
Snippets Groups Projects
Commit 757cefb2 authored by Valentina Galata's avatar Valentina Galata
Browse files

analysis: cov seg: more threads (rule), rm short contigs (script) (issue #107)

parent 3f62a5f0
No related branches found
No related tags found
No related merge requests found
......@@ -365,10 +365,10 @@ rule analysis_genomecov_segmentation:
tool="|".join(ASSEMBLERS)
params:
lmin=10000,
threads: 5
threads: 10
conda:
os.path.join(ENV_DIR, "segclust2d.yaml")
message:
"Analysis: contig coverage multi-modality: {input}"
script:
os.path.join(SRC_DIR, "cov_multimod_segclust2d.R")
\ No newline at end of file
os.path.join(SRC_DIR, "cov_multimod_segclust2d.R")
......@@ -99,17 +99,26 @@ states_median_sd <- function(covs, states){
# lmin param. for segclust2d::segmentation
LMIN <- snakemake@params$lmin
# LMIN <- 1e4
# per-base coverage
COV <- read_perbase_cov(snakemake@input$cov)
# COV <- read_perbase_cov("/scratch/users/vgalata/nwc/results/mapping/metag/lr/flye/ASSEMBLY.POLISHED.sr.cov.perbase")
print(sprintf("Read in %s: %d x %d, %d contigs", snakemake@input$cov, nrow(COV), ncol(COV), length(unique(COV$contig))))
print(head(COV))
# filter by contig length
CLEN <-
COV %>%
group_by(contig) %>%
summarise(
length=length(base)
) %>%
filter(length >= 2 * LMIN)
# contig IDs
CID <- sort(unique(COV$contig))
CID <- CLEN$contig
names(CID) <- CID
print(sprintf("Contigs w/ length >= 2 * %d: %d", LMIN, length(CID)))
# filter cov table
COV <- as.data.frame(COV %>% filter(contig %in% CID))
# coverage segmentation
CID_SEG <- mclapply(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment