From bf7f5ad2cc63be954c1ff8e71b440459f5383109 Mon Sep 17 00:00:00 2001
From: "Susheel Bhanu Busi susheel.busi@uni.lu" <susheel.busi@uni.lu>
Date: Mon, 20 Apr 2020 22:02:12 +0200
Subject: [PATCH] use this for Binning FLYE and MEGAHIT assemblies. To be
 incorporated into the original Snakefile

---
 snakefile | 351 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 351 insertions(+)
 create mode 100755 snakefile

diff --git a/snakefile b/snakefile
new file mode 100755
index 0000000..1fb685e
--- /dev/null
+++ b/snakefile
@@ -0,0 +1,351 @@
+"""
+Author: Susheel Bhanu BUSI
+Affiliation: ESB group LCSB UniLU
+Date: [2019-09-13]
+Run: snakemake -s binning_snakefile
+Latest modification:
+"""
+
+configfile:"config.yaml"
+
+DATA_DIR=config['data_dir']
+RESULTS_DIR=config['results_dir']
+SAMPLES=config['samples']
+# DATABASE=config['database']
+
+###########
+rule all:
+#    input: expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/metaspades.bwt"))
+#    input: expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/{reads}/metaspades.bam"), reads=["sr"])
+#    input: expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/{reads}/metaspades.bam"), reads=["sr", "lr"])
+#    input: expand(os.path.join(DATA_DIR, "{sample}/dastool_output/{sample}/"), sample=SAMPLES),
+#    input: expand(os.path.join(DATA_DIR, "checkm_output/{sample}/lineage.ms"), sample=SAMPLES)
+
+######
+# WRAPPER RULES
+######
+# Added the `shell:` so as to avoid it from being autodetected as `localrule`. This is needed so that an email can be sent upon event changes for this rule.
+rule MAPPING:
+    input:
+        expand(os.path.join(DATA_DIR, "metaspades.bwt")),
+        expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/sr/metaspades.bam")),
+        expand(os.path.join(DATA_DIR, "metaspades/bwa-mem/lr/metaspades.bam")),
+        expand(os.path.join(DATA_DIR, "metaspades/mmi/lr/lr_hybrid.mmi")),
+        expand(os.path.join(DATA_DIR, "metaspades/mmi/lr/metaspades.bam")),
+        expand(os.path.join(DATA_DIR, "metaspades/mmi/sr/sr_hybrid.mmi")),
+        expand(os.path.join(DATA_DIR, "metaspades/mmi/sr/metaspades.bam")),
+        expand(os.path.join(DATA_DIR, "metaspades/{mapper}/merged_bam/metaspades.bam"), mapper=["bwa-mem", "mmi"])
+    output: "bwa_and_mmi_mapping.done"
+    shell:
+        """
+        touch {output}
+        """
+
+rule BINNING:
+    input:
+        expand(os.path.join(DATA_DIR, "{sample}/dastool_output/{sample}"), sample=SAMPLES),
+        expand(os.path.join(DATA_DIR, "checkm_output/{sample}/lineage.ms"), sample=SAMPLES)
+#        expand(os.path.join(DATA_DIR, "gtdbtk_output/{sample}/gtdbtk.bac120.summary.tsv"), sample=SAMPLES)
+
+#######################
+## Mapping to hybrid ##
+#######################
+rule build_bwa_index:
+    input: "metaspades.fna"
+    output:
+#           done=touch("bwa_index.done")
+           index_amb=os.path.join(DATA_DIR, "{prefix}.amb"),
+           index_ann=os.path.join(DATA_DIR, "{prefix}.ann"),
+           index_bwt=os.path.join(DATA_DIR, "{prefix}.bwt"),
+           index_pac=os.path.join(DATA_DIR, "{prefix}.pac"),
+           index_sa=os.path.join(DATA_DIR, "{prefix}.sa")
+    log: os.path.join(DATA_DIR, "metaspades/{prefix}.bwa_index")
+    conda: "envs/bwa.yaml"
+    shell: """
+           (date &&\
+           bwa index {input} -p {wildcards.prefix} &&\
+           date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+           """
+
+rule map_short_reads_to_hybrid_contigs_w_bwa_mem:
+    input:
+          query_r1="NEB2_MG_S17_R1_001.fastp.fq.gz",
+          query_r2="NEB2_MG_S17_R2_001.fastp.fq.gz",
+          index_amb=os.path.join(DATA_DIR, "metaspades.amb"),
+          index_ann=os.path.join(DATA_DIR, "metaspades.ann"),
+          index_bwt=os.path.join(DATA_DIR, "metaspades.bwt"),
+          index_pac=os.path.join(DATA_DIR, "metaspades.pac"),
+          index_sa=os.path.join(DATA_DIR, "metaspades.sa"),
+          index_fa=os.path.join(DATA_DIR, "metaspades.fna")
+    output:
+          os.path.join(DATA_DIR, "metaspades/bwa-mem/sr/metaspades.bam")
+    params:
+          index_prefix=os.path.join(DATA_DIR, "metaspades"),
+          out_prefix=os.path.join(DATA_DIR, "metaspades/bwa-mem/sr/metaspades")
+    log: os.path.join(DATA_DIR, "metaspades/sr_contigs.bwa_mem_samtools")
+    conda: "envs/bwa.yaml"
+    threads:
+          config["bwa"]["threads"]
+    shell: """
+           (date &&\
+           bwa mem -t {threads} {params.index_prefix} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
+           date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+           """
+
+rule map_long_reads_to_hybrid_contigs_w_bwa_mem:
+    input:
+          query_lr="barcode07.fastq.gz",
+          index_amb=os.path.join(DATA_DIR, "metaspades.amb"),
+          index_ann=os.path.join(DATA_DIR, "metaspades.ann"),
+          index_bwt=os.path.join(DATA_DIR, "metaspades.bwt"),
+          index_pac=os.path.join(DATA_DIR, "metaspades.pac"),
+          index_sa=os.path.join(DATA_DIR, "metaspades.sa"),
+          index_fa=os.path.join(DATA_DIR, "metaspades.fna")
+    output:
+          os.path.join(DATA_DIR, "metaspades/bwa-mem/lr/metaspades.bam")
+    params:
+          index_prefix=os.path.join(DATA_DIR, "metaspades"),
+          out_prefix=os.path.join(DATA_DIR, "metaspades/bwa-mem/lr/metaspades")
+    log:  os.path.join(DATA_DIR, "metaspades/lr_contigs.bwa_mem_samtools")
+    conda: "envs/bwa.yaml"
+    threads: config["bwa"]["threads"]
+    shell: """
+          (date &&\
+          bwa mem -x ont2d -t {threads} {params.index_prefix} {input.query_lr} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
+          date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+          """
+
+rule build_minimap2_long_read_contigs_ont_index:
+    input: "metaspades.fna"
+    output: os.path.join(DATA_DIR, "metaspades/mmi/lr/lr_hybrid.mmi")
+    conda: "envs/minimap2.yaml"
+    shell:
+           """
+            date
+            minimap2 -x map-ont -d {output} {input}
+            date
+            """
+
+rule map_long_reads_to_long_read_contigs:
+    input:
+          query="barcode07.fastq.gz",
+          index=os.path.join(DATA_DIR, "metaspades/mmi/lr/lr_hybrid.mmi"),
+          index_fa="metaspades.fna"
+    output: os.path.join(DATA_DIR, "metaspades/mmi/lr/metaspades.bam")
+    params:
+          prefix=os.path.join(DATA_DIR, "metaspades/mmi/lr/metaspades")
+    conda: "envs/minimap2.yaml"
+    threads: config["minimap2"]["threads"]
+    log: os.path.join(DATA_DIR, "metaspades/mmi/lr/metaspades.minimap2_samtools")
+    shell: """
+            (date &&\
+            minimap2 -a -t {threads} {input.index} {input.query} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.prefix} > {output} &&\
+            date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+            """
+
+rule build_minimap2_long_read_contigs_sr_index:
+    input: "metaspades.fna"
+    output: os.path.join(DATA_DIR, "metaspades/mmi/sr/sr_hybrid.mmi")
+    conda: "envs/minimap2.yaml"
+    shell:
+            """
+            date
+            minimap2 -x sr -d {output} {input}
+            date
+            """
+
+rule map_short_reads_to_long_read_contigs_w_minimap2:
+    input:
+          query_r1="NEB2_MG_S17_R1_001.fastp.fq.gz",
+          query_r2="NEB2_MG_S17_R2_001.fastp.fq.gz",
+          index=os.path.join(DATA_DIR, "metaspades/mmi/sr/sr_hybrid.mmi"),
+          index_fa="metaspades.fna"
+    output: os.path.join(DATA_DIR, "metaspades/mmi/sr/metaspades.bam")
+    params:
+          prefix=os.path.join(DATA_DIR, "metaspades/mmi/sr/metaspades")
+    conda: "envs/minimap2.yaml"
+    threads: config["minimap2"]["threads"]
+    log: os.path.join(DATA_DIR, "metaspades/mmi/sr/metaspades.minimap2_samtools")
+    shell:
+           """
+            (date &&\
+           minimap2 -a -t {threads} {input.index} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.prefix} > {output}
+           date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
+            """
+ 
+rule merge_bam_files_for_metaspades:
+    input:
+          bam_sr=os.path.join(DATA_DIR, "metaspades/{mapper}/sr/metaspades.bam"),
+          bam_lr=os.path.join(DATA_DIR, "metaspades/{mapper}/lr/metaspades.bam")
+    output: os.path.join(DATA_DIR, "metaspades/{mapper}/merged_bam/metaspades.bam")
+    conda: "/home/users/sbusi/apps/environments/binning.yml"
+    shell:
+          """
+          (date &&\
+          samtools merge {output} {input.bam_sr} {input.bam_lr} &&\
+          date)
+          """
+
+   
+#####################################
+########### MAXBIN2 #################
+#####################################
+rule maxbin2:
+    input:
+          bbref="{datadir}/{sample}.fna",
+          bbam="{datadir}/{sample}/{sample}.bam"
+    threads:config["threads"]
+    conda:"/home/users/sbusi/apps/environments/binning.yml"
+    output:
+           map="{datadir}/{sample}/{sample}.sam",
+           coverage="{datadir}/{sample}/{sample}_cov.txt",
+           abund="{datadir}/{sample}/{sample}_abundance.txt",
+           file="{datadir}/{sample}/maxbin_output/maxbin_output.001.fasta"
+#        mx=directory("{datadir}/{sample}/maxbin_output/")
+    shell:
+           """
+            date
+            samtools view -h -o {output.map} {input.bbam}
+            pileup.sh in={output.map} out={output.coverage}
+            awk '{{print $1"\\t"$2}}' {output.coverage} | grep -v '^#' > {output.abund}
+            run_MaxBin.pl -thread {threads} -contig {input.bbref} -out {wildcards.datadir}/{wildcards.sample}/maxbin_output/maxbin_output -abund {output.abund}
+            touch maxbin.done
+            date
+            """
+
+
+#####################################
+########### METABAT2 ################
+#####################################
+
+rule depth_files:
+    input:
+          dep1="{datadir}/{sample}/{sample}.bam"
+    threads:config["threads"]
+    conda:"/home/users/sbusi/apps/environments/binning.yml"
+    output:
+          depout="{datadir}/{sample}/{sample}_depth.txt"
+    shell:
+           """
+            date
+            jgi_summarize_bam_contig_depths --outputDepth {output.depout} {input.dep1}
+            touch depth_file.done 
+            date
+            """
+
+rule metabat2:
+    input:
+           met1="{datadir}/{sample}.fna",
+           met2="{datadir}/{sample}/{sample}_depth.txt"
+    threads:config["threads"]
+    conda:"/home/users/sbusi/apps/environments/binning.yml"
+    output:
+           outdir=directory("{datadir}/{sample}/metabat_output/")
+    shell:
+           """
+            date
+            metabat2 -i {input.met1} -a {input.met2} -o {wildcards.datadir}/{wildcards.sample}/metabat_output/{wildcards.sample}.metabat -t {threads} -m 1500 -v --unbinned
+            touch metabat2.done
+            date
+            """
+
+#####################################
+########### DAS_Tool ################
+#####################################
+rule scaffold_list:
+    input:
+        sca1="{datadir}/{sample}/metabat_output/",
+#        sca2="{datadir}/{sample}/maxbin_output/"
+        file = "{datadir}/{sample}/maxbin_output/maxbin_output.001.fasta"
+    threads:config["threads"]
+    conda:"/home/users/sbusi/apps/environments/base.yml"
+    output:
+            scout1="{datadir}/{sample}/dastool_output/{sample}_metabat.scaffolds2bin.tsv",
+            scout2="{datadir}/{sample}/dastool_output/{sample}_maxbin.scaffolds2bin.tsv"
+    shell:
+            """
+            date
+            Fasta_to_Scaffolds2Bin.sh -i {input.sca1} -e fa > {output.scout1}
+            Fasta_to_Scaffolds2Bin.sh -i {wildcards.datadir}/{wildcards.sample}/maxbin_output -e fasta > {output.scout2}
+            date
+            touch scaffold_list.done
+            """
+
+rule DAS_Tool:
+    input:
+            da1="{datadir}/{sample}/dastool_output/{sample}_metabat.scaffolds2bin.tsv",
+            da2="{datadir}/{sample}/dastool_output/{sample}_maxbin.scaffolds2bin.tsv",
+            da3="{datadir}/{sample}.fna",
+            db=config["dastool_database"]
+    threads:config["threads"]
+    conda:"/home/users/sbusi/apps/environments/base.yml"
+    output:
+            daout=directory("{datadir}/{sample}/dastool_output/{sample}")
+    shell:
+            """
+            date
+            DAS_Tool -i {input.da1},{input.da2} -c {input.da3} -o {output.daout} --search_engine diamond -l maxbin2,metabat2 --write_bins 1 --write_bin_evals 1 --threads {threads} --db_directory {input.db} --create_plots 1 &&\
+            2> >(tee {log}.stderr) > >(tee {log}.stdout)
+            touch das_tool.done
+            date
+            """
+#checkpoint collect_bins:
+#    input:
+#        col1="{datadir}/{sample}/dastool_output"
+#        col1="{datadir}/{sample}/dastool_output/{file_i}.fa"
+#    output:
+#        colout="{datadir}/{sample}/dastool_output/high_quality_bins/{file_i}.fa"
+#    shell:    """
+#        date
+#        cp {input.col1} {output.colout}
+#        date
+#        """
+
+#####################################
+######## GTDBTK & CHECKM ############
+#####################################
+
+rule gtdbtk:
+    input:
+           gen="{datadir}/{sample}/dastool_output/{sample}/"
+    threads:config["threads"]
+    conda:"gtdbtk.yml"
+    output:
+           gtdb=directory("{datadir}/gtdbtk_output/{sample}")
+    shell:"""
+           date
+           gtdbtk classify_wf --cpus {threads} -x fa --genome_dir {input.gen} --out_dir {output.gtdb}
+           touch gtdbtk.done
+           date
+           """
+
+rule checkm:
+    input:
+           chkm="{datadir}/{sample}/dastool_output/{sample}/"
+    threads:config["threads"]
+    conda:"checkm.yml"
+    output:
+           chkmdir=directory("{datadir}/checkm_output/{sample}/lineage.ms")
+    shell:
+           """
+           date
+           checkm lineage_wf -r -t {threads} -x fa {input.chkm} {output.chkmdir}
+           touch checkm.done
+           date
+           """
+
+rule plot_checkm:
+    input:
+           pl1="{datadir}/checkm_output/{sample}",
+           pl2="{datadir}/{sample}/dastool_output/high_quality_bins"
+    threads:config["threads"]
+    conda:"/home/users/sbusi/apps/environments/checkm.yml"
+    output:
+           plt=directory("{datadir}/checkm_output/{sample}/plots")
+    shell:
+           """
+           date
+           checkm bin_qa_plot --image_type png --width 8 --dpi 200 --font_size 8 -x fa -q {input.pl1} {input.pl2} {output.plt}
+           touch plot_checkm.done
+           date
+           """
-- 
GitLab