diff --git a/config/GDB/config.yaml b/config/GDB/config.yaml index 22d49a1a32453d730feb37f253e8da2dfb1e10b2..9b5b6cc909ed1bc3425f6444b314c847a66f7ab8 100644 --- a/config/GDB/config.yaml +++ b/config/GDB/config.yaml @@ -3,7 +3,7 @@ # Steps to be done # steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"] -steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"] +steps: ["preprocessing", "assembly"] steps_annotation: ["diamond", "rgi", "plasflow", "minced", "barrnap"] # prodigal is run in any case steps_analysis: ["quast", "cdhit", "mash_dist"] steps_taxonomy: ["kraken2", "kaiju"] @@ -84,8 +84,8 @@ fastqc: # List of assemblers for different read types: assembler names MUST be UNIQUE assemblers: sr: ["megahit", "metaspades"] - lr: ["flye", "wtdbg2", "canu"] - hy: ["metaspadeshybrid", "operams"] + lr: ["flye", "wtdbg2", "canu", "flye_sr", "wtdbg2_sr", "canu_sr"] + hy: ["metaspadeshybrid", "operams", "metaspadeshybrid_sr", "operams_sr"] hyhy: ["imp3"] # https://github.com/fenderglass/Flye @@ -146,6 +146,7 @@ bwa: samtools: sort: chunk_size: "4G" + chunk_size_bigmem: "16G" ############################## # Annotation diff --git a/config/GDB/slurm.yaml b/config/GDB/slurm.yaml index df5c0c3d9ffeee3999bffc0ce50a73f3d264b5ad..3fde0e9bdbf5eff53dde5451a2161f05ebf44ef9 100644 --- a/config/GDB/slurm.yaml +++ b/config/GDB/slurm.yaml @@ -142,48 +142,16 @@ mapping_bwa_mem_assembly_hyhy: explicit: "" # Assembly polishing -mapping_bwa_idx_polishing: - time: "00-02:00:00" - partition: "batch" - qos: "qos-batch" - nodes: 1 - n: 1 - explicit: "" - -mapping_bwa_mem_polishing: - time: "00-06:00:00" - partition: "batch" - qos: "qos-batch" - nodes: 1 - n: 1 - explicit: "" - -polishing_lr_racon: - time: "00-04:00:00" +polishing_racon_lr: + time: "00-18:00:00" partition: "bigmem" qos: "qos-bigmem" nodes: 1 n: 1 explicit: "" -polishing_lr_medaka: - time: "01-00:00:00" - partition: "bigmem" - qos: "qos-bigmem" - nodes: 1 - n: 1 - explicit: "" - -mapping_bwa_mem_polishing_metat: - time: "00-02:00:00" - partition: "batch" - qos: "qos-batch" - nodes: 1 - n: 1 - explicit: "" - -polishing_metat_racon: - time: "00-04:00:00" +polishing_racon_sr: + time: "00-18:00:00" partition: "bigmem" qos: "qos-bigmem" nodes: 1 diff --git a/workflow/Snakefile b/workflow/Snakefile index ad19dd33552116edcea520b9a49749eec890c916..b7c4ad8f806f2b8bfcbe16237e84e733c5f30b73 100755 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -5,6 +5,7 @@ ############################## # MODULES import os +import re from scripts.utils import find_fast5, assembler_pairs ############################## diff --git a/workflow/envs/bwa.yaml b/workflow/envs/bwa.yaml deleted file mode 100644 index b519b0d11cbb517ab16dae573957ba4b5d997e4c..0000000000000000000000000000000000000000 --- a/workflow/envs/bwa.yaml +++ /dev/null @@ -1,5 +0,0 @@ -channels: - - bioconda -dependencies: - - bwa=0.7.17 - - samtools=1.9 diff --git a/workflow/envs/racon.yaml b/workflow/envs/racon.yaml index 3141bafcea0d140c53239a0cda66aaab7e65e0b3..c7b56f11cb98648396c645cc92aa996cd59db46e 100644 --- a/workflow/envs/racon.yaml +++ b/workflow/envs/racon.yaml @@ -1,5 +1,41 @@ channels: - - bioconda + - conda-forge + - bioconda + - defaults dependencies: - - racon=1.4.3 - - samtools=1.9 + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=1_gnu + - bwa=0.7.17=hed695b0_7 + - bzip2=1.0.8=h516909a_2 + - c-ares=1.16.1=h516909a_0 + - ca-certificates=2020.6.20=hecda079_0 + - certifi=2020.6.20=py38h32f6830_0 + - curl=7.71.1=he644dc0_5 + - htslib=1.9=h4da6232_3 + - krb5=1.17.1=hfafb76e_2 + - ld_impl_linux-64=2.34=hc38a660_9 + - libcurl=7.71.1=hcdd3856_5 + - libdeflate=1.6=h516909a_0 + - libedit=3.1.20191231=h46ee950_2 + - libev=4.33=h516909a_0 + - libffi=3.2.1=he1b5a44_1007 + - libgcc-ng=9.3.0=h24d8f2e_15 + - libgomp=9.3.0=h24d8f2e_15 + - libnghttp2=1.41.0=hab1572f_1 + - libssh2=1.9.0=hab1572f_5 + - libstdcxx-ng=9.3.0=hdf63c60_15 + - ncurses=6.1=hf484d3e_1002 + - openssl=1.1.1g=h516909a_1 + - perl=5.26.2=h516909a_1006 + - pip=20.2.2=py_0 + - python=3.8.5=h4d41432_2_cpython + - python_abi=3.8=1_cp38 + - racon=1.4.3=he513fc3_0 + - readline=8.0=h46ee950_1 + - samtools=1.9=h10a08f8_12 + - setuptools=49.6.0=py38h32f6830_0 + - sqlite=3.32.3=hcee41ef_1 + - tk=8.6.10=hed695b0_0 + - wheel=0.35.1=pyh9f0ad1d_0 + - xz=5.2.5=h516909a_1 + - zlib=1.2.11=h516909a_1007 diff --git a/workflow/rules/assembly.smk b/workflow/rules/assembly.smk index 651832e98bc034ab41848555ff7d248b2f80d143..0b12eaf45821fd29f8f5c1eda715504135394ac3 100644 --- a/workflow/rules/assembly.smk +++ b/workflow/rules/assembly.smk @@ -60,62 +60,27 @@ rule assembly_lr_canu: "cd $(dirname {output}) && ln -s assembly.contigs.fasta assembly.fasta && " "date) &> {log}" -rule polishing_lr_racon: - input: - lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"), - asm=os.path.join(RESULTS_DIR, "assembly/lr/{tool}/assembly.fasta"), - bam=os.path.join(RESULTS_DIR, "mapping/metag/lr/{tool}/polishing/assembly_lr.bam") - output: - fasta=os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon/assembly.fasta"), - # NOTE: Tmp files for input data to pass file extension checks - sam=temp(os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon/input.sam")), - asm=temp(os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon/input.fasta")) - log: - "logs/racon.{tool}.log" - wildcard_constraints: - tool="|".join(config["assemblers"]["lr"]) - threads: - config["racon"]["threads"] - conda: - os.path.join(ENV_DIR, "racon.yaml") - message: - "Assembly: long reads: polishing w/ Racon" - shell: - "(date && " - "samtools view -h {input.bam} > {output.sam} && " - "cp {input.asm} {output.asm} && " - "racon -t {threads} {input.lr} {output.sam} {output.asm} > {output.fasta} && " - "date) &> {log}" +localrules: assembly_lr_link -rule polishing_lr_medaka: - input: - lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"), - asm=os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon/assembly.fasta") +rule assembly_lr_link: + input: + os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing_raconLR/assembly.fasta") output: - os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon_medaka/consensus.fasta") - log: - "logs/racon.medaka.{tool}.log" + os.path.join(RESULTS_DIR, "assembly/lr/{tool}/ASSEMBLY.fasta") wildcard_constraints: - tool="|".join(config["assemblers"]["lr"]) - threads: - config["medaka"]["threads"] - conda: - os.path.join(ENV_DIR, "medaka.yaml") - message: - "Assembly: long reads: polishing w/ Medaka" + tool="|".join([tool for tool in config["assemblers"]["lr"] if not re.search("_sr$", tool)]), shell: - "(date && " - "medaka_consensus -i {input.lr} -d {input.asm} -o $(dirname {output}) -t {threads} -m {config[medaka][model]} && " - # "cd $(dirname {output}) && ln -sf consensus.fasta $(basename {output}) && " # NOTE: flye already creates this file - "date) &> {log}" + "ipath=$(realpath {input}) && cd $(dirname {output}) && ln -sf $(realpath --relative-to=\".\" \"${{ipath}}\") $(basename {output})" -localrules: assembly_lr_link +localrules: assembly_lr_polished_sr -rule assembly_lr_link: - input: - os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon_medaka/consensus.fasta") - output: +rule assembly_lr_polished_sr: + input: + os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing_raconLR_raconSR/assembly.fasta") + output: os.path.join(RESULTS_DIR, "assembly/lr/{tool}/ASSEMBLY.fasta") + wildcard_constraints: + tool="|".join([tool for tool in config["assemblers"]["lr"] if re.search("_sr$", tool)]), shell: "ipath=$(realpath {input}) && cd $(dirname {output}) && ln -sf $(realpath --relative-to=\".\" \"${{ipath}}\") $(basename {output})" @@ -213,6 +178,18 @@ rule assembly_hy_operams: "cd $(dirname {output.asm}) && ln -sf contigs.fasta $(basename {output.asm}) && " "date) &> {log}" +localrules: assembly_hy_polished_sr + +rule assembly_hy_polished_sr: + input: + os.path.join(RESULTS_DIR, "assembly/hy/{tool}_sr/polishing_raconSR/assembly.fasta") + output: + os.path.join(RESULTS_DIR, "assembly/hy/{tool}_sr/ASSEMBLY.fasta") + wildcard_constraints: + tool="|".join(config["assemblers"]["hy"]), + shell: + "ipath=$(realpath {input}) && cd $(dirname {output}) && ln -sf $(realpath --relative-to=\".\" \"${{ipath}}\") $(basename {output})" + ################################################## # Polishing w/ metaT @@ -242,6 +219,142 @@ rule assembly_hy_operams: # "racon -t {threads} --include-unpolished {input.fastq} {input.sam} {output.asm} > {output.fasta} && " # "date) &> {log}" +################################################## +# Polishing + +localrules: polishing_input_sr + +rule polishing_input_sr: + input: + os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") + output: + os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}_sr/assembly.fasta") + wildcard_constraints: + rtype="|".join(READ_TYPES), + tool="|".join([tool for tool in ASSEMBLERS if not re.search("_sr$", tool)]) + shell: + "cp {input} {output}" + +localrules: polishing_input + +rule polishing_input: + input: + os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.fasta") + output: + os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/polishing/assembly.fasta") + wildcard_constraints: + rtype="|".join(READ_TYPES), + tool="|".join([tool for tool in ASSEMBLERS if not re.search("_sr$", tool)]) + shell: + "cd $(dirname {output}) && ln -sf ../$(basename {input}) $(basename {output})" + +rule polishing_racon_lr: + input: + lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"), + asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}/assembly.fasta"), + output: + fasta=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconLR/assembly.fasta"), + bam=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconLR/input.bam"), + sam=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconLR/input.sam")), + asm=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconLR/input.fasta")) + log: + "logs/polishing.{ptype}_raconLR.{rtype}.{tool}.log" + wildcard_constraints: + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS), + ptype="polishing.*" + threads: + config["racon"]["threads"] + params: + idx_prefix=lambda wildcards, output, input: os.path.splitext(input.asm)[0], + bam_prefix=lambda wildcards, output: os.path.splitext(output.bam)[0], + chunk_size=config["samtools"]["sort"]["chunk_size_bigmem"] + conda: + os.path.join(ENV_DIR, "racon.yaml") + message: + "Assembly polishing w/ Racon and LR" + shell: + "(date && " + # index for mapping + "bwa index {input.asm} -p {params.idx_prefix} && " + # mapping + "bwa mem -x ont2d -t {threads} {params.idx_prefix} {input.lr} | " + "samtools view -@ {threads} -SbT {input.asm} | " + "samtools sort -@ {threads} -m {params.chunk_size} -T {params.bam_prefix} > {output.bam} && " + "samtools view -@ {threads} -h -o {output.sam} {output.bam} && " + # copy assembly + "cp {input.asm} {output.asm} && " + # polish + "racon --include-unpolished -t {threads} {input.lr} {output.sam} {output.asm} > {output.fasta} && " + "date) &> {log}" + +rule polishing_racon_sr: + input: + r1=os.path.join(RESULTS_DIR, "preproc/metag/sr/R1.fastp.fastq.gz"), + r2=os.path.join(RESULTS_DIR, "preproc/metag/sr/R2.fastp.fastq.gz"), + asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/assembly.fasta"), + output: + fasta=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconSR/assembly.fasta"), + bam=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconSR/input.bam"), + sam=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconSR/input.sam")), + asm=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconSR/input.fasta")), + fastq=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{ptype}_raconSR/input.fastq")), + log: + "logs/polishing.{ptype}_raconSR.{rtype}.{tool}.log" + wildcard_constraints: + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS), + ptype="polishing.*" + threads: + config["racon"]["threads"] + params: + idx_prefix=lambda wildcards, output, input: os.path.splitext(input.asm)[0], + bam_prefix=lambda wildcards, output: os.path.splitext(output.bam)[0], + chunk_size=config["samtools"]["sort"]["chunk_size_bigmem"] + conda: + os.path.join(ENV_DIR, "racon.yaml") + message: + "Assembly polishing w/ Racon and LR" + shell: + "(date && " + # index for mapping + "bwa index {input.asm} -p {params.idx_prefix} && " + # proc reads + "zcat {input.r1} | awk '{{print (NR%4 == 1) ? \"@1_\" ++i : $0}}' > {output.fastq} && " + "zcat {input.r2} | awk '{{print (NR%4 == 1) ? \"@2_\" ++i : $0}}' >> {output.fastq} && " + # mapping + "bwa mem -t {threads} {params.idx_prefix} {output.fastq} | " + "samtools view -@ {threads} -SbT {input.asm} | " + "samtools sort -@ {threads} -m {params.chunk_size} -T {params.bam_prefix} > {output.bam} && " + "samtools view -@ {threads} -h -o {output.sam} {output.bam} && " + # copy assembly + "cp {input.asm} {output.asm} && " + # polish + "racon --include-unpolished -t {threads} {output.fastq} {output.sam} {output.asm} > {output.fasta} && " + "date) &> {log}" + +# rule polishing_lr_medaka: +# input: +# lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"), +# asm=os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon/assembly.fasta") +# output: +# os.path.join(RESULTS_DIR, "assembly/lr/{tool}/polishing/racon_medaka/consensus.fasta") +# log: +# "logs/racon.medaka.{tool}.log" +# wildcard_constraints: +# tool="|".join(config["assemblers"]["lr"]) +# threads: +# config["medaka"]["threads"] +# conda: +# os.path.join(ENV_DIR, "medaka.yaml") +# message: +# "Assembly: long reads: polishing w/ Medaka" +# shell: +# "(date && " +# "medaka_consensus -i {input.lr} -d {input.asm} -o $(dirname {output}) -t {threads} -m {config[medaka][model]} && " +# # "cd $(dirname {output}) && ln -sf consensus.fasta $(basename {output}) && " # NOTE: flye already creates this file +# "date) &> {log}" + ################################################## # Contig filtering diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk index 3807aa9c5b3d24d2484a2bcba31e836be130e311..54e3ad96eac30bc36c5a49095d14709f9f8f9069 100644 --- a/workflow/rules/mapping.smk +++ b/workflow/rules/mapping.smk @@ -3,91 +3,91 @@ ################################################## # For polishing -rule mapping_bwa_idx_polishing: - input: - os.path.join(RESULTS_DIR, "assembly/lr/{tool}/assembly.fasta") - output: - temp(expand(os.path.join(RESULTS_DIR, "assembly/lr/{{tool}}/polishing/assembly.{ext}"), ext=BWA_IDX_EXT)) - log: - "logs/bwa_index.polishing.{tool}.log" - wildcard_constraints: - tool="|".join(config["assemblers"]["lr"]) - threads: 1 - params: - idx_prefix=lambda wildcards, output: os.path.splitext(output[0])[0] - conda: - os.path.join(ENV_DIR, "bwa.yaml") - message: - "Mapping: BWA index for assembly polishing" - shell: - "(date && bwa index {input} -p {params.idx_prefix} && date) &> {log}" +# rule mapping_bwa_idx_polishing: +# input: +# os.path.join(RESULTS_DIR, "assembly/lr/{tool}/assembly.fasta") +# output: +# temp(expand(os.path.join(RESULTS_DIR, "assembly/lr/{{tool}}/polishing/assembly.{ext}"), ext=BWA_IDX_EXT)) +# log: +# "logs/bwa_index.polishing.{tool}.log" +# wildcard_constraints: +# tool="|".join(config["assemblers"]["lr"]) +# threads: 1 +# params: +# idx_prefix=lambda wildcards, output: os.path.splitext(output[0])[0] +# conda: +# os.path.join(ENV_DIR, "racon.yaml") +# message: +# "Mapping: BWA index for assembly polishing" +# shell: +# "(date && bwa index {input} -p {params.idx_prefix} && date) &> {log}" -rule mapping_bwa_mem_polishing: - input: - lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"), - asm=os.path.join(RESULTS_DIR, "assembly/lr/{tool}/assembly.fasta"), - idx=expand(os.path.join(RESULTS_DIR, "assembly/lr/{{tool}}/polishing/assembly.{ext}"), ext=BWA_IDX_EXT) - output: - temp(os.path.join(RESULTS_DIR, "mapping/metag/lr/{tool}/polishing/assembly_lr.bam")) - log: - "logs/bwa_mem.polishing.metag.lr.{tool}.log" - wildcard_constraints: - tool="|".join(config["assemblers"]["lr"]) - threads: - config["bwa"]["threads"] - params: - idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0], - bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0] - conda: - os.path.join(ENV_DIR, "bwa.yaml") - message: - "Mapping long reads to assembly w/ BWA for polishing" - shell: - "(date && " - "bwa mem -x ont2d -t {threads} {params.idx_prefix} {input.lr} | " - "samtools view -@ {threads} -SbT {input.asm} | " - "samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && " - "date) &> {log}" +# rule mapping_bwa_mem_polishing: +# input: +# lr=os.path.join(RESULTS_DIR, "preproc/metag/lr/lr.fastq.gz"), +# asm=os.path.join(RESULTS_DIR, "assembly/lr/{tool}/assembly.fasta"), +# idx=expand(os.path.join(RESULTS_DIR, "assembly/lr/{{tool}}/polishing/assembly.{ext}"), ext=BWA_IDX_EXT) +# output: +# temp(os.path.join(RESULTS_DIR, "mapping/metag/lr/{tool}/polishing/assembly_lr.bam")) +# log: +# "logs/bwa_mem.polishing.metag.lr.{tool}.log" +# wildcard_constraints: +# tool="|".join(config["assemblers"]["lr"]) +# threads: +# config["bwa"]["threads"] +# params: +# idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0], +# bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0] +# conda: +# os.path.join(ENV_DIR, "racon.yaml") +# message: +# "Mapping long reads to assembly w/ BWA for polishing" +# shell: +# "(date && " +# "bwa mem -x ont2d -t {threads} {params.idx_prefix} {input.lr} | " +# "samtools view -@ {threads} -SbT {input.asm} | " +# "samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && " +# "date) &> {log}" ################################################## # For metaT polishing # NOTE: for racon -rule mapping_bwa_mem_polishing_metat: - input: - # reads - r1=os.path.join(RESULTS_DIR, "preproc/metat/sr/R1.fastp.fastq.gz"), - r2=os.path.join(RESULTS_DIR, "preproc/metat/sr/R2.fastp.fastq.gz"), - # assembly - asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta"), - # assembly index - idx=expand(os.path.join(RESULTS_DIR, "assembly/{{rtype}}/{{tool}}/ASSEMBLY.{ext}"), ext=BWA_IDX_EXT) - output: - sam=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/mapping.sam")), - fastq=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/reads.fastq")) - log: - "logs/bwa_mem.polishing.metat.{rtype}.{tool}.log" - wildcard_constraints: - rtype="|".join(READ_TYPES), - tool="|".join(ASSEMBLERS) - threads: - config["bwa"]["threads"] - params: - idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0], - sam_prefix=lambda wildcards, output: os.path.splitext(output.sam)[0] - conda: - os.path.join(ENV_DIR, "bwa.yaml") - message: - "Mapping short reads to assembly w/ BWA" - shell: - "(date && " - "zcat {input.r1} | awk '{{print (NR%4 == 1) ? \"@1_\" ++i : $0}}' > {output.fastq} && " - "zcat {input.r2} | awk '{{print (NR%4 == 1) ? \"@2_\" ++i : $0}}' >> {output.fastq} && " - "bwa mem -t {threads} {params.idx_prefix} {output.fastq} | " - "samtools view -@ {threads} -SbT {input.asm} | " - "samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.sam_prefix} | " - "samtools view -@ {threads} -h > {output.sam} && " - "date) &> {log}" +# rule mapping_bwa_mem_polishing_metat: +# input: +# # reads +# r1=os.path.join(RESULTS_DIR, "preproc/metat/sr/R1.fastp.fastq.gz"), +# r2=os.path.join(RESULTS_DIR, "preproc/metat/sr/R2.fastp.fastq.gz"), +# # assembly +# asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta"), +# # assembly index +# idx=expand(os.path.join(RESULTS_DIR, "assembly/{{rtype}}/{{tool}}/ASSEMBLY.{ext}"), ext=BWA_IDX_EXT) +# output: +# sam=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/mapping.sam")), +# fastq=temp(os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}metatracon/reads.fastq")) +# log: +# "logs/bwa_mem.polishing.metat.{rtype}.{tool}.log" +# wildcard_constraints: +# rtype="|".join(READ_TYPES), +# tool="|".join(ASSEMBLERS) +# threads: +# config["bwa"]["threads"] +# params: +# idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0], +# sam_prefix=lambda wildcards, output: os.path.splitext(output.sam)[0] +# conda: +# os.path.join(ENV_DIR, "racon.yaml") +# message: +# "Mapping short reads to assembly w/ BWA" +# shell: +# "(date && " +# "zcat {input.r1} | awk '{{print (NR%4 == 1) ? \"@1_\" ++i : $0}}' > {output.fastq} && " +# "zcat {input.r2} | awk '{{print (NR%4 == 1) ? \"@2_\" ++i : $0}}' >> {output.fastq} && " +# "bwa mem -t {threads} {params.idx_prefix} {output.fastq} | " +# "samtools view -@ {threads} -SbT {input.asm} | " +# "samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.sam_prefix} | " +# "samtools view -@ {threads} -h > {output.sam} && " +# "date) &> {log}" ################################################## # For assembly @@ -108,7 +108,7 @@ rule mapping_bwa_idx_assembly: params: idx_prefix=lambda wildcards, output: os.path.splitext(output[0])[0] conda: - os.path.join(ENV_DIR, "bwa.yaml") + os.path.join(ENV_DIR, "racon.yaml") message: "Mapping: BWA index for assembly mapping" shell: @@ -134,16 +134,17 @@ rule mapping_bwa_mem_assembly_sr_metag: config["bwa"]["threads"] params: idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0], - bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0] + bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0], + chunk_size=config["samtools"]["sort"]["chunk_size"] conda: - os.path.join(ENV_DIR, "bwa.yaml") + os.path.join(ENV_DIR, "racon.yaml") message: "Mapping short reads to assembly w/ BWA" shell: "(date && " "bwa mem -t {threads} {params.idx_prefix} {input.r1} {input.r2} | " "samtools view -@ {threads} -SbT {input.asm} | " - "samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && " + "samtools sort -@ {threads} -m {params.chunk_size} -T {params.bam_prefix} > {output} && " "date) &> {log}" rule mapping_bwa_mem_assembly_sr_metat: @@ -164,16 +165,17 @@ rule mapping_bwa_mem_assembly_sr_metat: config["bwa"]["threads"] params: idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0], - bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0] + bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0], + chunk_size=config["samtools"]["sort"]["chunk_size"] conda: - os.path.join(ENV_DIR, "bwa.yaml") + os.path.join(ENV_DIR, "racon.yaml") message: "Mapping short reads to assembly w/ BWA" shell: "(date && " "bwa mem -t {threads} {params.idx_prefix} {input.r1} {input.r2} | " "samtools view -@ {threads} -SbT {input.asm} | " - "samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && " + "samtools sort -@ {threads} -m {params.chunk_size} -T {params.bam_prefix} > {output} && " "date) &> {log}" # Long reads @@ -194,16 +196,17 @@ rule mapping_bwa_mem_assembly_lr: config["bwa"]["threads"] params: idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0], - bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0] + bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0], + chunk_size=config["samtools"]["sort"]["chunk_size"] conda: - os.path.join(ENV_DIR, "bwa.yaml") + os.path.join(ENV_DIR, "racon.yaml") message: "Mapping long reads to assembly w/ BWA" shell: "(date && " "bwa mem -x ont2d -t {threads} {params.idx_prefix} {input.lr} | " "samtools view -@ {threads} -SbT {input.asm} | " - "samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output} && " + "samtools sort -@ {threads} -m {params.chunk_size} -T {params.bam_prefix} > {output} && " "date) &> {log}" # Hybrid: short + long reads @@ -220,7 +223,7 @@ rule mapping_bwa_mem_assembly_hy: atype="ASSEMBLY|ASSEMBLY.FILTERED" threads: 1 conda: - os.path.join(ENV_DIR, "bwa.yaml") + os.path.join(ENV_DIR, "racon.yaml") message: "Mapping: merging short-reads and long-reads mapping results" shell: @@ -248,16 +251,17 @@ rule mapping_bwa_mem_assembly_hyhy: config["bwa"]["threads"] params: idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0], - bam_prefix=lambda wildcards, output: os.path.splitext(output.bam_metat)[0] + bam_prefix=lambda wildcards, output: os.path.splitext(output.bam_metat)[0], + chunk_size=config["samtools"]["sort"]["chunk_size"] conda: - os.path.join(ENV_DIR, "bwa.yaml") + os.path.join(ENV_DIR, "racon.yaml") message: "Mapping: merging short-reads and long-reads mapping results" shell: "(date && " "bwa mem -t {threads} {params.idx_prefix} {input.metat_r1} {input.metat_r2} | " "samtools view -@ {threads} -SbT {input.asm} | " - "samtools sort -@ {threads} -m {config[samtools][sort][chunk_size]} -T {params.bam_prefix} > {output.bam_metat} && " + "samtools sort -@ {threads} -m {params.chunk_size} -T {params.bam_prefix} > {output.bam_metat} && " "samtools merge {output.bam} {input.bam_sr} {output.bam_metat} {input.bam_lr} && " "date) &> {log}" @@ -329,7 +333,7 @@ rule mapping_assembly_flagstat: tool="|".join(ASSEMBLERS), atype="ASSEMBLY|ASSEMBLY.FILTERED" conda: - os.path.join(ENV_DIR, "bwa.yaml") + os.path.join(ENV_DIR, "racon.yaml") message: "Mapping: assembly coverage stats w/ samtools flagstat" shell: