Skip to content
Snippets Groups Projects
Commit 7778266f authored by Valentina Galata's avatar Valentina Galata
Browse files

assembly polishing w/ racon and metat (issue #52)

parent ea06388a
No related branches found
No related tags found
1 merge request!76Merge "cleanup" branch with "master" branch
......@@ -48,8 +48,6 @@ if "assembly" in STEPS:
"steps/assembly.smk"
TARGETS += [
"status/assembly.done",
# "status/mapping.done",
# "status/coverage.done"
]
# Assembly mapping
......
# Assembly
##################################################
# Long reads
rule assembly_lr_flye:
input:
os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz")
......@@ -81,7 +83,9 @@ rule assembly_lr_flye_link:
shell:
"ipath=$(realpath {input}) && cd $(dirname {output}) && ln -sf $(realpath --relative-to=\".\" \"${{ipath}}\") $(basename {output})"
##################################################
# Short reads
rule assembly_sr_megahit:
input:
r1=os.path.join(RESULTS_DIR, "preproc/metag/sr/R1.fastp.fastq.gz"),
......@@ -126,7 +130,9 @@ rule assembly_sr_metaspades:
"cd $(dirname {output}) && ln -sf contigs.fasta $(basename {output}) && "
"date) 2> {log.err} > {log.out}"
##################################################
# Hybrid
rule assembly_hy_metaspades:
input:
lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"),
......@@ -173,4 +179,34 @@ rule assembly_hy_operams:
"perl {config[operams][bin]} --short-read1 {input.r1} --short-read2 {input.r2} --long-read {output.lr} --contig-file {input.asm} "
"--out-dir $(dirname {output.asm}) --long-read-mapper minimap2 --num-processors {threads} && "
"cd $(dirname {output.asm}) && ln -sf contigs.fasta $(basename {output.asm}) && "
"date) 2> {log.err} > {log.out}"
\ No newline at end of file
"date) 2> {log.err} > {log.out}"
##################################################
# Polishing w/ metaT
rule polishing_metat_racon:
input:
fastq=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/reads.fastq"),
asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta"),
sam=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/mapping.sam")
output:
fasta=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/ASSEMBLY.fasta"),
# NOTE: Tmp files for input data to pass file extension checks
asm=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/input.fasta"))
log:
out="logs/racon.metat.{rtype}.{tool}.out.log",
err="logs/racon.metat.{rtype}.{tool}.err.log"
wildcard_constraints:
rtype="|".join(["sr", "hy"]),
tool="|".join(["metaspades", "megahit", "metaspadeshybrid", "operams"])
threads:
config["racon"]["threads"]
conda:
os.path.join(ENV_DIR, "racon.yaml")
message:
"Assembly: polishing w/ Racon and metaT"
shell:
"(date && "
"cp {input.asm} {output.asm} && "
"racon -t {threads} --include-unpolished {input.fastq} {input.sam} {output.asm} > {output.fasta} && "
"date) 2> {log.err} > {log.out}"
......@@ -51,6 +51,47 @@ rule mapping_bwa_mem_polishing:
"samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && "
"date) 2> {log.err} > {log.out}"
##################################################
# For metaT polishing
# NOTE: for racon
rule mapping_bwa_mem_polishing_metat:
input:
# reads
r1=os.path.join(RESULTS_DIR, "preproc/metat/sr/R1.fastp.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/metat/sr/R2.fastp.fastq.gz"),
# assembly
asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta"),
# assembly index
idx=expand(os.path.join(RESULTS_DIR, "assembly/{{rtype}}/{{tool}}/ASSEMBLY.{ext}"), ext=BWA_IDX_EXT)
output:
sam=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/mapping.sam")),
fastq=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/reads.fastq"))
log:
out="logs/bwa_mem.polishing.metat.{rtype}.{tool}.out.log",
err="logs/bwa_mem.polishing.metat.{rtype}.{tool}.err.log"
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
threads:
config["bwa"]["threads"]
params:
idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0],
sam_prefix=lambda wildcards, output: os.path.splitext(output.sam)[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Mapping short reads to assembly w/ BWA"
shell:
"(date && "
"zcat {input.r1} | awk '{{print (NR%4 == 1) ? \"@1_\" ++i : $0}}' > {output.fastq} && "
"zcat {input.r2} | awk '{{print (NR%4 == 1) ? \"@2_\" ++i : $0}}' >> {output.fastq} && "
"bwa mem -t {threads} {params.idx_prefix} {output.fastq} | "
"samtools view -@ {threads} -SbT {input.asm} | "
"samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.sam_prefix} | "
"samtools view -@ {threads} -h > {output.sam} && "
"date) 2> {log.err} > {log.out}"
##################################################
# For assembly
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment