Skip to content
Snippets Groups Projects
Commit 3cc031ec authored by Valentina Galata's avatar Valentina Galata
Browse files

added sortmerna: preproc rule, mapping rule for sr split into metag/metat...

added sortmerna: preproc rule, mapping rule for sr split into metag/metat (issue #49); mapping rule for hyhy assembly (issue #76)
parent fe910ac4
No related branches found
No related tags found
No related merge requests found
channels:
- bioconda
- defaults
dependencies:
- sortmerna=4.2.0=0
......@@ -115,16 +115,16 @@ rule mapping_bwa_idx_assembly:
"(date && bwa index {input} -p {params.idx_prefix} && date) &> {log}"
# Short reads
rule mapping_bwa_mem_assembly_sr:
rule mapping_bwa_mem_assembly_sr_metag:
input:
r1=os.path.join(RESULTS_DIR, "preproc/{mtype}/sr/R1.fastp.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/{mtype}/sr/R2.fastp.fastq.gz"),
r1=os.path.join(RESULTS_DIR, "preproc/metag/sr/R1.fastp.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/metag/sr/R2.fastp.fastq.gz"),
asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{atype}.fasta"),
idx=expand(os.path.join(RESULTS_DIR, "assembly/{{rtype}}/{{tool}}/{{atype}}.{ext}"), ext=BWA_IDX_EXT)
output:
os.path.join(RESULTS_DIR, "mapping/{mtype}/{rtype}/{tool}/{atype}.sr.bam")
os.path.join(RESULTS_DIR, "mapping/metag/{rtype}/{tool}/{atype}.sr.bam")
log:
"logs/bwa.mem.{mtype}.{rtype}.{tool}.{atype}.sr.log",
"logs/bwa.mem.metag.{rtype}.{tool}.{atype}.sr.log",
wildcard_constraints:
mtype="|".join(META_TYPES),
rtype="|".join(READ_TYPES),
......@@ -146,6 +146,36 @@ rule mapping_bwa_mem_assembly_sr:
"samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && "
"date) &> {log}"
rule mapping_bwa_mem_assembly_sr_metat:
input:
r1=os.path.join(RESULTS_DIR, "preproc/metat/sr/R1.sortmerna.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/metat/sr/R2.sortmerna.fastq.gz"),
asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{atype}.fasta"),
idx=expand(os.path.join(RESULTS_DIR, "assembly/{{rtype}}/{{tool}}/{{atype}}.{ext}"), ext=BWA_IDX_EXT)
output:
os.path.join(RESULTS_DIR, "mapping/metat/{rtype}/{tool}/{atype}.sr.bam")
log:
"logs/bwa.mem.metat.{rtype}.{tool}.{atype}.sr.log",
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS),
atype="ASSEMBLY|ASSEMBLY.FILTERED"
threads:
config["bwa"]["threads"]
params:
idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0],
bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Mapping short reads to assembly w/ BWA"
shell:
"(date && "
"bwa mem -t {threads} {params.idx_prefix} {input.r1} {input.r2} | "
"samtools view -@ {threads} -SbT {input.asm} | "
"samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && "
"date) &> {log}"
# Long reads
rule mapping_bwa_mem_assembly_lr:
input:
......@@ -196,6 +226,29 @@ rule mapping_bwa_mem_assembly_hy:
shell:
"(date && samtools merge {output} {input.sr} {input.lr} && date) &> {log}"
# Hybrid: "short + long reads" hybrid, "metaT and metaG" hybrid
# NOTE: such assemblies are not implemented in the pipeline
# but can be included in the analysis by linking to the contigs FASTA
rule mapping_bwa_mem_assembly_hy_hy:
input:
srg=os.path.join(RESULTS_DIR, "mapping/metag/metag/{tool}/{atype}.sr.bam"),
srt=os.path.join(RESULTS_DIR, "mapping/metag/metat/{tool}/{atype}.sr.bam"),
lr=os.path.join(RESULTS_DIR, "mapping/metag/{rtype}/{tool}/{atype}.lr.bam")
output:
os.path.join(RESULTS_DIR, "mapping/metag/{rtype}/{tool}/{atype}.hy.bam")
log:
"logs/bwa_mem.metag.hy.{rtype}.{tool}.{atype}.hy.log"
wildcard_constraints:
tool="|".join(config["assemblers"]["hyhy"]),
atype="ASSEMBLY|ASSEMBLY.FILTERED"
threads: 1
conda:
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Mapping: merging short-reads and long-reads mapping results"
shell:
"(date && samtools merge {output} {input} && date) &> {log}"
##################################################
# Assembly coverage
......
......@@ -69,3 +69,36 @@ rule nanostat_guppy_basecalling:
"Preprocessing long reads: NanoStats"
shell:
"(date && NanoStat --fastq {input} --outdir $(dirname {output}) -n $(basename {output}) && date) 2> {log.err} > {log.out}"
##################################################
# rRNA filtering
rule sortmerna_filt:
input:
r1=os.path.join(RESULTS_DIR, "preproc/metat/sr/R1.fastp.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/metat/sr/R2.fastp.fastq.gz")
output:
r1=os.path.join(RESULTS_DIR, "preproc/metat/sr/R1.sortmerna.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/metat/sr/R2.sortmerna.fastq.gz"),
r1_o=temp(os.path.join(RESULTS_DIR, "preproc/metat/sr/sortmerna/out/other_fwd.fq")),
r2_o=temp(os.path.join(RESULTS_DIR, "preproc/metat/sr/sortmerna/out/other_rev.fq")),
r1_a=temp(os.path.join(RESULTS_DIR, "preproc/metat/sr/sortmerna/out/aligned_fwd.fq")),
r2_a=temp(os.path.join(RESULTS_DIR, "preproc/metat/sr/sortmerna/out/aligned_rev.fq"))
log:
out="logs/sortmerna.metat.sr.out.log"
threads:
config["sortmerna"]["threads"]
params:
refs=" ".join([ "-ref %s" % ref for ref in config["sortmerna"]["refs"]])
conda:
os.path.join(ENV_DIR, "sortmerna.yaml")
message:
"Preprocessing metaT short reads: sortmerna"
shell:
"(date && "
"sortmerna {params.refs} -reads {input.r1} -reads {input.r2} -workdir $(dirname {output.r1})/sortmerna "
"--fastx --other -paired_in -v --task 4 --num_alignments 1 --out2 --threads {threads} && "
"gzip -c {output.r1_o} > {output.r1} && "
"gzip -c {output.r2_o} > {output.r2} && "
"gzip -c {output.r1_a} > {output.r1_a}.gz && "
"gzip -c {output.r2_a} > {output.r2_a}.gz && "
"date) &> {log}"
......@@ -17,6 +17,7 @@ rule PREPROCESSING_LR:
rule PREPROCESSING_SR:
input:
fastp=expand(os.path.join(RESULTS_DIR, "preproc/{mtype}/sr/{rid}.fastp.fastq.gz"), mtype=META_TYPES, rid=["R1", "R2"]),
fastqc=expand(os.path.join(RESULTS_DIR, "qc/{mtype}/sr/{rid}.fastp_{ext}"), mtype=META_TYPES, rid=["R1", "R2"], ext=["fastqc.html", "fastqc.zip"])
fastqc=expand(os.path.join(RESULTS_DIR, "qc/{mtype}/sr/{rid}.fastp_{ext}"), mtype=META_TYPES, rid=["R1", "R2"], ext=["fastqc.html", "fastqc.zip"]),
sortmerna=expand(os.path.join(RESULTS_DIR, "preproc/metat/sr/{rid}.sortmerna.fastq.gz"), rid=["R1", "R2"])
output:
touch("status/preprocessing_sr.done")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment