Skip to content
Snippets Groups Projects
Commit d9fec571 authored by Valentina Galata's avatar Valentina Galata
Browse files

added mapping

parent fa54a2e9
No related branches found
No related tags found
1 merge request!76Merge "cleanup" branch with "master" branch
......@@ -86,6 +86,13 @@ if "assembly" in STEPS:
"status/assembly.done"
]
if "mapping" in STEPS:
include:
"workflow/steps/mapping.smk"
TARGETS += [
"status/mapping.done"
]
# # Assembly annotation
# if 'assembly_annotation' in STEPS:
# include:
......
......@@ -3,7 +3,7 @@
# Pipeline steps
# steps: ["assembly_annotation", "mapping", "metaT", "mmseq", "binning", "taxonomy", "analysis"]
steps: ["preprocessing", "assembly"]
steps: ["mapping"]
# Analysis sub-steps
analysis_steps: ["cdhit", "mappability", "crispr", "plasmids", "amr"]
......@@ -95,6 +95,20 @@ metaspades:
megahit:
threads: 10
# Mapping
bwa:
threads: 10
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
samtools:
sort:
# threads: 10
chunk_size: "4G"
view:
# threads: 10
# p7zip:
# bin: "/home/users/claczny/apps/software/p7zip_16.02/bin/7za"
# threads: 4
......
......@@ -60,6 +60,31 @@ assemble_hy_metaspades:
n: 1
explicit: ""
# Mapping
bwa_index_assembly:
time: "00-02:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
bwa_mem_assembly_sr:
time: "00-06:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
bwa_mem_assembly_lr:
time: "00-06:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
# "mmseq2_compare":
# {
# "n": 1,
......
# Rule for running the mapping "workflow" for ONT analsyes
# Mapping (to assembly)
#######################
## Mapping to hybrid ##
#######################
rule build_hybrid_bwa_index:
input: expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fna"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode=BARCODES, hybrid_assembler=HYBRID_ASSEMBLER)
output:
# done=touch("bwa_index.done")
index_amb=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.amb"),
index_ann=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.ann"),
index_bwt=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.bwt"),
index_pac=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.pac"),
index_sa=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.sa")
# wildcard_constraints:
# hybrid_assembler='^\/'
params:
prefix=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}")
log: os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{prefix}.bwa_index")
conda: "../envs/bwa.yaml"
shell: """
(date &&\
bwa index {input} -p {params.prefix} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule hybrid_map_short_reads_to_hybrid_contigs_w_bwa_mem:
# BWA index
rule bwa_index_assembly:
input:
query_r1=expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R1_001.fastp.fq.gz"), sr_sample="ONT3_MG_xx_Rashi_S11"),
query_r2=expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R2_001.fastp.fq.gz"), sr_sample="ONT3_MG_xx_Rashi_S11"),
index_amb=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.amb"),
index_ann=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.ann"),
index_bwt=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"),
index_pac=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.pac"),
index_sa=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.sa"),
index_fa=expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fna"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode=BARCODES, hybrid_assembler=HYBRID_ASSEMBLER)
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.fna")
output:
os.path.join(RESULTS_DIR, "mapping/bwa_sr_{hybrid_assembler}/bwa_sr_{hybrid_assembler}.bam")
expand(
os.path.join(RESULTS_DIR, "assembly/{{rtype}}/{{tool}}/assembly.{ext}"),
ext=["amb", "ann", "bwt", "pac", "sa"]
)
# amb=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.amb"),
# ann=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.ann"),
# bwt=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.bwt"),
# pac=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.pac"),
# sa =os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.sa")
log:
out="logs/mapping_bwa_idx_{rtype}.{tool}.out.log",
err="logs/mapping_bwa_idx_{rtype}.{tool}.err.log"
wildcard_constraints:
rtype="|".join(config["assemblers"].keys()),
tool="|".join("|".join(a) for a in config["assemblers"].values())
threads: 1
params:
index_prefix=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades"),
out_prefix=os.path.join(RESULTS_DIR, "mapping/bwa_sr_{hybrid_assembler}/bwa_sr_{hybrid_assembler}")
log: os.path.join(RESULTS_DIR, "mapping/bwa_sr_{hybrid_assembler}/sr_contigs.bwa_mem_samtools")
conda: "../envs/bwa.yaml"
threads:
config["bwa"]["threads"]
# wildcard_constraints:
# sample='\w+'
shell: """
(date &&\
bwa mem -t {threads} {params.index_prefix} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
idx_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
shell:
"(date && bwa index {input} -p {params.idx_prefix} && date) 2> {log.err} > {log.out}"
rule hybrid_map_long_reads_to_hybrid_contigs_w_bwa_mem:
# Short reads
rule bwa_mem_assembly_sr:
input:
query_lr=expand(os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz"), barcode=BARCODES),
index_amb=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.amb"),
index_ann=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.ann"),
index_bwt=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"),
index_pac=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.pac"),
index_sa=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.sa"),
index_fa=expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fna"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode=BARCODES, hybrid_assembler=HYBRID_ASSEMBLER)
# reads
r1=os.path.join(RESULTS_DIR, "preproc/sr/R1.fastp.fastq.gz"),
r2=os.path.join(RESULTS_DIR, "preproc/sr/R2.fastp.fastq.gz"),
# assembly
asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.fna"),
# assembly index
idx=expand(
os.path.join(RESULTS_DIR, "assembly/{{rtype}}/{{tool}}/assembly.{ext}"),
ext=["amb", "ann", "bwt", "pac", "sa"]
)
# amb=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.amb"),
# ann=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.ann"),
# bwt=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.bwt"),
# pac=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.pac"),
# sa =os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.sa")
output:
os.path.join(RESULTS_DIR, "mapping/bwa_lr_{hybrid_assembler}/bwa_lr_{hybrid_assembler}.bam")
os.path.join(RESULTS_DIR, "mapping/{rtype}/{tool}/assembly_sr.bam")
log:
out="logs/mapping_bwa_mem_{rtype}.{tool}.out.log",
err="logs/mapping_bwa_mem_{rtype}.{tool}.err.log"
wildcard_constraints:
rtype="|".join(["sr", "hy"]),
tool="|".join(config["assemblers"]["sr"] + config["assemblers"]["hy"])
params:
index_prefix=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades"),
out_prefix=os.path.join(RESULTS_DIR, "mapping/bwa_lr_{hybrid_assembler}/bwa_lr_{hybrid_assembler}"),
log: os.path.join(RESULTS_DIR, "mapping/bwa_lr_{hybrid_assembler}/lr_contigs.bwa_mem_samtools")
conda: "../envs/bwa.yaml"
threads: config["bwa"]["threads"]
# wildcard_constraints:
# sample='\w+'
shell: """
(date &&\
bwa mem -x ont2d -t {threads} {params.index_prefix} {input.query_lr} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule hybrid_build_minimap2_long_read_contigs_ont_index:
input: expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fna"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode=BARCODES, hybrid_assembler=HYBRID_ASSEMBLER)
output: os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi")
conda: "../envs/minimap2.yaml"
idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0],
bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
threads:
config["bwa"]["threads"]
shell:
"""
date
minimap2 -x map-ont -d {output} {input}
date
"""
"(date && "
"bwa mem -t {threads} {params.idx_prefix} {input.r1} {input.r2} | "
"samtools view -@ {threads} -SbT {input.asm} | "
"samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && "
"date) 2> {log.err} > {log.out}"
rule hybrid_map_long_reads_to_hybrid_contigs_w_minimap2:
rule bwa_mem_assembly_lr:
input:
query=expand(os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz"), barcode=BARCODES),
index=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"),
index_fa=expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fna"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode=BARCODES, hybrid_assembler=HYBRID_ASSEMBLER)
output: os.path.join(RESULTS_DIR, "mapping/mmi_lr_{hybrid_assembler}/mmi_lr_{hybrid_assembler}.bam")
params:
prefix=os.path.join(RESULTS_DIR, "mapping/mmi_lr_{hybrid_assembler}/mmi_lr_{hybrid_assembler}")
conda: "../envs/minimap2.yaml"
threads: config["minimap2"]["threads"]
log: os.path.join(RESULTS_DIR, "mapping/mmi_lr_{hybrid_assembler}/metaspades.minimap2_samtools")
shell: """
(date &&\
minimap2 -a -t {threads} {input.index} {input.query} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule hybrid_map_short_reads_to_hybrid_contigs_w_minimap2:
input:
query_r1=expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R1_001.fastp.fq.gz"), sr_sample="ONT3_MG_xx_Rashi_S11"),
query_r2=expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R2_001.fastp.fq.gz"), sr_sample="ONT3_MG_xx_Rashi_S11"),
index=os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"),
index_fa=expand(os.path.join(RESULTS_DIR, "assembly/{hybrid_assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fna"), sr_sample="ONT3_MG_xx_Rashi_S11", barcode=BARCODES, hybrid_assembler=HYBRID_ASSEMBLER)
output: os.path.join(RESULTS_DIR, "mapping/mmi_sr_{hybrid_assembler}/mmi_sr_{hybrid_assembler}.bam")
# reads
lr=os.path.join(RESULTS_DIR, "preproc/lr/lr.fastq.gz"),
# assembly
asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.fna"),
# assembly index
idx=expand(
os.path.join(RESULTS_DIR, "assembly/{{rtype}}/{{tool}}/assembly.{ext}"),
ext=["amb", "ann", "bwt", "pac", "sa"]
)
output:
os.path.join(RESULTS_DIR, "mapping/{rtype}/{tool}/assembly_lr.bam")
log:
out="logs/mapping_bwa_mem_{rtype}.{tool}.out.log",
err="logs/mapping_bwa_mem_{rtype}.{tool}.err.log"
wildcard_constraints:
rtype="|".join(["lr", "hy"]),
tool="|".join(config["assemblers"]["lr"] + config["assemblers"]["hy"])
params:
prefix=os.path.join(RESULTS_DIR, "mapping/mmi_sr_{hybrid_assembler}/mmi_sr_{hybrid_assembler}")
conda: "../envs/minimap2.yaml"
threads: config["minimap2"]["threads"]
log: os.path.join(RESULTS_DIR, "mapping/mmi_sr_{hybrid_assembler}/metaspades.minimap2_samtools")
idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0],
bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
threads:
config["bwa"]["threads"]
shell:
"""
(date &&\
minimap2 -a -t {threads} {input.index} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.prefix} > {output}
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
"(date && "
"bwa mem -x ont2d -t {threads} {params.idx_prefix} {input.lr} | "
"samtools view -@ {threads} -SbT {input.asm} | "
"samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && "
"date) 2> {log.err} > {log.out}"
rule hybrid_merge_bam_files_for_metaspades:
input:
bam_sr=os.path.join(RESULTS_DIR, "mapping/{mapper}_sr_{hybrid_assembler}/{mapper}_sr_{hybrid_assembler}.bam"),
bam_lr=os.path.join(RESULTS_DIR, "mapping/{mapper}_lr_{hybrid_assembler}/{mapper}_lr_{hybrid_assembler}.bam")
output: os.path.join(RESULTS_DIR, "mapping/{mapper}_merged_{hybrid_assembler}/{mapper}_merged_{hybrid_assembler}.bam")
conda: "../envs/binning.yaml"
shell:
"""
(date &&\
samtools merge {output} {input.bam_sr} {input.bam_lr} &&\
date)
"""
# rule hybrid_merge_bam_files_for_metaspades:
# input:
# bam_sr=os.path.join(RESULTS_DIR, "mapping/{mapper}_sr_{hybrid_assembler}/{mapper}_sr_{hybrid_assembler}.bam"),
# bam_lr=os.path.join(RESULTS_DIR, "mapping/{mapper}_lr_{hybrid_assembler}/{mapper}_lr_{hybrid_assembler}.bam")
# output: os.path.join(RESULTS_DIR, "mapping/{mapper}_merged_{hybrid_assembler}/{mapper}_merged_{hybrid_assembler}.bam")
# conda: "../envs/binning.yaml"
# shell:
# """
# (date &&\
# samtools merge {output} {input.bam_sr} {input.bam_lr} &&\
# date)
# """
# Workflow for running mapping steps of different assemblers, and mappers for the Binning workflow
# # Workflow for running mapping steps of different assemblers, and mappers for the Binning workflow
# # specify which rules to run
# include:
# '../rules/mapping.smk'
# # Rule all for mapping
# rule MAPPING:
# input:
# expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.amb"), hybrid_assembler=HYBRID_ASSEMBLER),
# expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"), hybrid_assembler=HYBRID_ASSEMBLER),
# expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_{reads}_{hybrid_assembler}/{mapper}_{reads}_{hybrid_assembler}.bam"), mapper=MAPPERS, reads=["sr", "lr"], hybrid_assembler=HYBRID_ASSEMBLER),
# expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"), hybrid_assembler=HYBRID_ASSEMBLER),
# expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_merged_{hybrid_assembler}/{mapper}_merged_{hybrid_assembler}.bam"), mapper=MAPPERS, hybrid_assembler=HYBRID_ASSEMBLER)
# output:
# touch('mapping_for_binning.done')
# Mapping
# specify which rules to run
include:
'../rules/mapping.smk'
# Rule all for mapping
# NOTE: Using "shell: touch ..." to avoid the rule from being autodetected as `localrule`.
# This is needed so that an email can be sent upon event changes for this rule.
rule MAPPING:
input:
expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.amb"), hybrid_assembler=HYBRID_ASSEMBLER),
expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"), hybrid_assembler=HYBRID_ASSEMBLER),
expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_{reads}_{hybrid_assembler}/{mapper}_{reads}_{hybrid_assembler}.bam"), mapper=MAPPERS, reads=["sr", "lr"], hybrid_assembler=HYBRID_ASSEMBLER),
expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"), hybrid_assembler=HYBRID_ASSEMBLER),
expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_merged_{hybrid_assembler}/{mapper}_merged_{hybrid_assembler}.bam"), mapper=MAPPERS, hybrid_assembler=HYBRID_ASSEMBLER)
# TODO
lr=expand(os.path.join(RESULTS_DIR, "mapping/lr/{tool}/assembly_lr.bam"), tool=config["assemblers"]["lr"]),
sr=expand(os.path.join(RESULTS_DIR, "mapping/sr/{tool}/assembly_sr.bam"), tool=config["assemblers"]["sr"]),
hy=expand(
os.path.join(RESULTS_DIR, "mapping/{rtype1}/{tool}/assembly_{rtype2}.bam"),
rtype1=["hy"], rtype2=["sr", "lr"], tool=config["assemblers"]["hy"]
)
output:
touch('mapping_for_binning.done')
"status/mapping.done"
shell:
"touch {output}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment