Skip to content
Snippets Groups Projects
Commit bf7f5ad2 authored by Susheel Busi's avatar Susheel Busi
Browse files

use this for Binning FLYE and MEGAHIT assemblies. To be incorporated into the original Snakefile

parent 246e8c8c
No related branches found
No related tags found
1 merge request!26BINNING-snakefiles
snakefile 0 → 100755
"""
Author: Susheel Bhanu BUSI
Affiliation: ESB group LCSB UniLU
Date: [2019-09-13]
Run: snakemake -s binning_snakefile
Latest modification:
"""
configfile:"config.yaml"
DATA_DIR=config['data_dir']
RESULTS_DIR=config['results_dir']
SAMPLES=config['samples']
# DATABASE=config['database']
###########
rule all:
# input: expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/metaspades.bwt"))
# input: expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/{reads}/metaspades.bam"), reads=["sr"])
# input: expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/{reads}/metaspades.bam"), reads=["sr", "lr"])
# input: expand(os.path.join(DATA_DIR, "{sample}/dastool_output/{sample}/"), sample=SAMPLES),
# input: expand(os.path.join(DATA_DIR, "checkm_output/{sample}/lineage.ms"), sample=SAMPLES)
######
# WRAPPER RULES
######
# Added the `shell:` so as to avoid it from being autodetected as `localrule`. This is needed so that an email can be sent upon event changes for this rule.
rule MAPPING:
input:
expand(os.path.join(DATA_DIR, "metaspades.bwt")),
expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/sr/metaspades.bam")),
expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/lr/metaspades.bam")),
expand(os.path.join(DATA_DIR, "metaspades/mmi/lr/lr_hybrid.mmi")),
expand(os.path.join(DATA_DIR, "metaspades/mmi/lr/metaspades.bam")),
expand(os.path.join(DATA_DIR, "metaspades/mmi/sr/sr_hybrid.mmi")),
expand(os.path.join(DATA_DIR, "metaspades/mmi/sr/metaspades.bam")),
expand(os.path.join(DATA_DIR, "metaspades/{mapper}/merged_bam/metaspades.bam"), mapper=["bwa-mem", "mmi"])
output: "bwa_and_mmi_mapping.done"
shell:
"""
touch {output}
"""
rule BINNING:
input:
expand(os.path.join(DATA_DIR, "{sample}/dastool_output/{sample}"), sample=SAMPLES),
expand(os.path.join(DATA_DIR, "checkm_output/{sample}/lineage.ms"), sample=SAMPLES)
# expand(os.path.join(DATA_DIR, "gtdbtk_output/{sample}/gtdbtk.bac120.summary.tsv"), sample=SAMPLES)
#######################
## Mapping to hybrid ##
#######################
rule build_bwa_index:
input: "metaspades.fna"
output:
# done=touch("bwa_index.done")
index_amb=os.path.join(DATA_DIR, "{prefix}.amb"),
index_ann=os.path.join(DATA_DIR, "{prefix}.ann"),
index_bwt=os.path.join(DATA_DIR, "{prefix}.bwt"),
index_pac=os.path.join(DATA_DIR, "{prefix}.pac"),
index_sa=os.path.join(DATA_DIR, "{prefix}.sa")
log: os.path.join(DATA_DIR, "metaspades/{prefix}.bwa_index")
conda: "envs/bwa.yaml"
shell: """
(date &&\
bwa index {input} -p {wildcards.prefix} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule map_short_reads_to_hybrid_contigs_w_bwa_mem:
input:
query_r1="NEB2_MG_S17_R1_001.fastp.fq.gz",
query_r2="NEB2_MG_S17_R2_001.fastp.fq.gz",
index_amb=os.path.join(DATA_DIR, "metaspades.amb"),
index_ann=os.path.join(DATA_DIR, "metaspades.ann"),
index_bwt=os.path.join(DATA_DIR, "metaspades.bwt"),
index_pac=os.path.join(DATA_DIR, "metaspades.pac"),
index_sa=os.path.join(DATA_DIR, "metaspades.sa"),
index_fa=os.path.join(DATA_DIR, "metaspades.fna")
output:
os.path.join(DATA_DIR, "metaspades/bwa-mem/sr/metaspades.bam")
params:
index_prefix=os.path.join(DATA_DIR, "metaspades"),
out_prefix=os.path.join(DATA_DIR, "metaspades/bwa-mem/sr/metaspades")
log: os.path.join(DATA_DIR, "metaspades/sr_contigs.bwa_mem_samtools")
conda: "envs/bwa.yaml"
threads:
config["bwa"]["threads"]
shell: """
(date &&\
bwa mem -t {threads} {params.index_prefix} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule map_long_reads_to_hybrid_contigs_w_bwa_mem:
input:
query_lr="barcode07.fastq.gz",
index_amb=os.path.join(DATA_DIR, "metaspades.amb"),
index_ann=os.path.join(DATA_DIR, "metaspades.ann"),
index_bwt=os.path.join(DATA_DIR, "metaspades.bwt"),
index_pac=os.path.join(DATA_DIR, "metaspades.pac"),
index_sa=os.path.join(DATA_DIR, "metaspades.sa"),
index_fa=os.path.join(DATA_DIR, "metaspades.fna")
output:
os.path.join(DATA_DIR, "metaspades/bwa-mem/lr/metaspades.bam")
params:
index_prefix=os.path.join(DATA_DIR, "metaspades"),
out_prefix=os.path.join(DATA_DIR, "metaspades/bwa-mem/lr/metaspades")
log: os.path.join(DATA_DIR, "metaspades/lr_contigs.bwa_mem_samtools")
conda: "envs/bwa.yaml"
threads: config["bwa"]["threads"]
shell: """
(date &&\
bwa mem -x ont2d -t {threads} {params.index_prefix} {input.query_lr} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule build_minimap2_long_read_contigs_ont_index:
input: "metaspades.fna"
output: os.path.join(DATA_DIR, "metaspades/mmi/lr/lr_hybrid.mmi")
conda: "envs/minimap2.yaml"
shell:
"""
date
minimap2 -x map-ont -d {output} {input}
date
"""
rule map_long_reads_to_long_read_contigs:
input:
query="barcode07.fastq.gz",
index=os.path.join(DATA_DIR, "metaspades/mmi/lr/lr_hybrid.mmi"),
index_fa="metaspades.fna"
output: os.path.join(DATA_DIR, "metaspades/mmi/lr/metaspades.bam")
params:
prefix=os.path.join(DATA_DIR, "metaspades/mmi/lr/metaspades")
conda: "envs/minimap2.yaml"
threads: config["minimap2"]["threads"]
log: os.path.join(DATA_DIR, "metaspades/mmi/lr/metaspades.minimap2_samtools")
shell: """
(date &&\
minimap2 -a -t {threads} {input.index} {input.query} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule build_minimap2_long_read_contigs_sr_index:
input: "metaspades.fna"
output: os.path.join(DATA_DIR, "metaspades/mmi/sr/sr_hybrid.mmi")
conda: "envs/minimap2.yaml"
shell:
"""
date
minimap2 -x sr -d {output} {input}
date
"""
rule map_short_reads_to_long_read_contigs_w_minimap2:
input:
query_r1="NEB2_MG_S17_R1_001.fastp.fq.gz",
query_r2="NEB2_MG_S17_R2_001.fastp.fq.gz",
index=os.path.join(DATA_DIR, "metaspades/mmi/sr/sr_hybrid.mmi"),
index_fa="metaspades.fna"
output: os.path.join(DATA_DIR, "metaspades/mmi/sr/metaspades.bam")
params:
prefix=os.path.join(DATA_DIR, "metaspades/mmi/sr/metaspades")
conda: "envs/minimap2.yaml"
threads: config["minimap2"]["threads"]
log: os.path.join(DATA_DIR, "metaspades/mmi/sr/metaspades.minimap2_samtools")
shell:
"""
(date &&\
minimap2 -a -t {threads} {input.index} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.prefix} > {output}
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule merge_bam_files_for_metaspades:
input:
bam_sr=os.path.join(DATA_DIR, "metaspades/{mapper}/sr/metaspades.bam"),
bam_lr=os.path.join(DATA_DIR, "metaspades/{mapper}/lr/metaspades.bam")
output: os.path.join(DATA_DIR, "metaspades/{mapper}/merged_bam/metaspades.bam")
conda: "/home/users/sbusi/apps/environments/binning.yml"
shell:
"""
(date &&\
samtools merge {output} {input.bam_sr} {input.bam_lr} &&\
date)
"""
#####################################
########### MAXBIN2 #################
#####################################
rule maxbin2:
input:
bbref="{datadir}/{sample}.fna",
bbam="{datadir}/{sample}/{sample}.bam"
threads:config["threads"]
conda:"/home/users/sbusi/apps/environments/binning.yml"
output:
map="{datadir}/{sample}/{sample}.sam",
coverage="{datadir}/{sample}/{sample}_cov.txt",
abund="{datadir}/{sample}/{sample}_abundance.txt",
file="{datadir}/{sample}/maxbin_output/maxbin_output.001.fasta"
# mx=directory("{datadir}/{sample}/maxbin_output/")
shell:
"""
date
samtools view -h -o {output.map} {input.bbam}
pileup.sh in={output.map} out={output.coverage}
awk '{{print $1"\\t"$2}}' {output.coverage} | grep -v '^#' > {output.abund}
run_MaxBin.pl -thread {threads} -contig {input.bbref} -out {wildcards.datadir}/{wildcards.sample}/maxbin_output/maxbin_output -abund {output.abund}
touch maxbin.done
date
"""
#####################################
########### METABAT2 ################
#####################################
rule depth_files:
input:
dep1="{datadir}/{sample}/{sample}.bam"
threads:config["threads"]
conda:"/home/users/sbusi/apps/environments/binning.yml"
output:
depout="{datadir}/{sample}/{sample}_depth.txt"
shell:
"""
date
jgi_summarize_bam_contig_depths --outputDepth {output.depout} {input.dep1}
touch depth_file.done
date
"""
rule metabat2:
input:
met1="{datadir}/{sample}.fna",
met2="{datadir}/{sample}/{sample}_depth.txt"
threads:config["threads"]
conda:"/home/users/sbusi/apps/environments/binning.yml"
output:
outdir=directory("{datadir}/{sample}/metabat_output/")
shell:
"""
date
metabat2 -i {input.met1} -a {input.met2} -o {wildcards.datadir}/{wildcards.sample}/metabat_output/{wildcards.sample}.metabat -t {threads} -m 1500 -v --unbinned
touch metabat2.done
date
"""
#####################################
########### DAS_Tool ################
#####################################
rule scaffold_list:
input:
sca1="{datadir}/{sample}/metabat_output/",
# sca2="{datadir}/{sample}/maxbin_output/"
file = "{datadir}/{sample}/maxbin_output/maxbin_output.001.fasta"
threads:config["threads"]
conda:"/home/users/sbusi/apps/environments/base.yml"
output:
scout1="{datadir}/{sample}/dastool_output/{sample}_metabat.scaffolds2bin.tsv",
scout2="{datadir}/{sample}/dastool_output/{sample}_maxbin.scaffolds2bin.tsv"
shell:
"""
date
Fasta_to_Scaffolds2Bin.sh -i {input.sca1} -e fa > {output.scout1}
Fasta_to_Scaffolds2Bin.sh -i {wildcards.datadir}/{wildcards.sample}/maxbin_output -e fasta > {output.scout2}
date
touch scaffold_list.done
"""
rule DAS_Tool:
input:
da1="{datadir}/{sample}/dastool_output/{sample}_metabat.scaffolds2bin.tsv",
da2="{datadir}/{sample}/dastool_output/{sample}_maxbin.scaffolds2bin.tsv",
da3="{datadir}/{sample}.fna",
db=config["dastool_database"]
threads:config["threads"]
conda:"/home/users/sbusi/apps/environments/base.yml"
output:
daout=directory("{datadir}/{sample}/dastool_output/{sample}")
shell:
"""
date
DAS_Tool -i {input.da1},{input.da2} -c {input.da3} -o {output.daout} --search_engine diamond -l maxbin2,metabat2 --write_bins 1 --write_bin_evals 1 --threads {threads} --db_directory {input.db} --create_plots 1 &&\
2> >(tee {log}.stderr) > >(tee {log}.stdout)
touch das_tool.done
date
"""
#checkpoint collect_bins:
# input:
# col1="{datadir}/{sample}/dastool_output"
# col1="{datadir}/{sample}/dastool_output/{file_i}.fa"
# output:
# colout="{datadir}/{sample}/dastool_output/high_quality_bins/{file_i}.fa"
# shell: """
# date
# cp {input.col1} {output.colout}
# date
# """
#####################################
######## GTDBTK & CHECKM ############
#####################################
rule gtdbtk:
input:
gen="{datadir}/{sample}/dastool_output/{sample}/"
threads:config["threads"]
conda:"gtdbtk.yml"
output:
gtdb=directory("{datadir}/gtdbtk_output/{sample}")
shell:"""
date
gtdbtk classify_wf --cpus {threads} -x fa --genome_dir {input.gen} --out_dir {output.gtdb}
touch gtdbtk.done
date
"""
rule checkm:
input:
chkm="{datadir}/{sample}/dastool_output/{sample}/"
threads:config["threads"]
conda:"checkm.yml"
output:
chkmdir=directory("{datadir}/checkm_output/{sample}/lineage.ms")
shell:
"""
date
checkm lineage_wf -r -t {threads} -x fa {input.chkm} {output.chkmdir}
touch checkm.done
date
"""
rule plot_checkm:
input:
pl1="{datadir}/checkm_output/{sample}",
pl2="{datadir}/{sample}/dastool_output/high_quality_bins"
threads:config["threads"]
conda:"/home/users/sbusi/apps/environments/checkm.yml"
output:
plt=directory("{datadir}/checkm_output/{sample}/plots")
shell:
"""
date
checkm bin_qa_plot --image_type png --width 8 --dpi 200 --font_size 8 -x fa -q {input.pl1} {input.pl2} {output.plt}
touch plot_checkm.done
date
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment