Skip to content
Snippets Groups Projects
Commit 56248f29 authored by Susheel Busi's avatar Susheel Busi
Browse files

updated Snakefile, cluster and config for metaT read mapping

parent 43c3c4e7
No related branches found
No related tags found
1 merge request!28updated Snakefile, cluster and config for metaT read mapping
......@@ -232,7 +232,14 @@ rule PLOT_MMSEQ2:
rule METAT:
input:
expand(os.path.join(RESULTS_DIR, "preprocessing/metaT/{metaT_sample}.fastp.{report_type}"), metaT_sample="GDB_2018_metaT", report_type=["html", "json"])
expand(os.path.join(RESULTS_DIR, "preprocessing/metaT/{metaT_sample}.fastp.{report_type}"), metaT_sample="GDB_2018_metaT", report_type=["html", "json"]),
# expand(os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}-x-{barcode}.bam"), metaT_sample="GDB_2018_metaT", barcode=BARCODES),
expand(os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", assembler="megahit"),
expand(os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", barcode=BARCODES, assembler="metaspades_hybrid"),
expand(os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}_reads-x-{barcode}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", barcode=BARCODES, assembler=ASSEMBLERS),
expand(os.path.join(RESULTS_DIR, "genomecov/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", assembler="megahit"),
expand(os.path.join(RESULTS_DIR, "genomecov/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", barcode=BARCODES, assembler="metaspades_hybrid"),
expand(os.path.join(RESULTS_DIR, "genomecov/metaT/lr/{metaT_sample}_reads-x-{barcode}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", barcode=BARCODES, assembler=ASSEMBLERS)
######
# RULES
......@@ -1411,4 +1418,106 @@ rule run_fastp_on_metaT_reads:
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule run_fastqc_on_preprocessed_metaT_reads:
input: os.path.join(RESULTS_DIR, "preprocessing/metaT/{metaT_sample}_{suffix}.fq.gz"),
output:
html=os.path.join(RESULTS_DIR, "qc/metaT/fastqc/{metaT_sample}/{metaT_sample}_{suffix}.fastqc.html"),
zip=os.path.join(RESULTS_DIR, "qc/metaT/fastqc/{metaT_sample}/{metaT_sample}_{suffix}.fastqc.zip") # the suffix _fastqc.zip is necessary for multiqc to find the file. If not using multiqc, you are free to choose an arbitrary filename
conda: "envs/fastqc.yaml"
script: "scripts/run_fastqc.py" # Use a separate script rather than a simple call to FASTQC to overcome possible race conditions when running in parallel (https://bitbucket.org/snakemake/snakemake-wrappers/raw/7f5e01706728e32d52aa413f213e950f50bf4129/bio/fastqc/wrapper.py)
# rule map_metaT_reads_to_long_reads:
# input:
# query_r1=rules.run_fastp_on_metaT_reads.output.r1,
# query_r2=rules.run_fastp_on_metaT_reads.output.r2,
# index=os.path.join(RESULTS_DIR, "mapping/lr/merged/{barcode}.amb"),
# index_fa=os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fna"),
# output: os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}-x-{barcode}.bam")
# threads: config["bwa"]["threads"]
# params:
# index_prefix=os.path.join(RESULTS_DIR, "mapping/lr/merged/{barcode}"),
# out_prefix=os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}-x-{barcode}.")
# log: os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}-x-{barcode}.bwa_mem_samtools")
# conda: "envs/bwa.yaml"
# shell:
# """
# (date &&\
# bwa mem {config[bwa][long_reads_index][opts]} -t {threads} {params.index_prefix} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -S -b -u -T {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
# date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
# """
rule map_metaT_reads_to_long_read_contigs_w_bwa_mem:
input:
query_r1=rules.run_fastp_on_metaT_reads.output.r1,
query_r2=rules.run_fastp_on_metaT_reads.output.r2,
index_amb=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr/merged/{barcode}/assembly.amb"),
index_ann=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr/merged/{barcode}/assembly.ann"),
index_bwt=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr/merged/{barcode}/assembly.bwt"),
index_pac=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr/merged/{barcode}/assembly.pac"),
index_sa=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr/merged/{barcode}/assembly.sa"),
index_fa=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr/merged/{barcode}/assembly.fna")
output: os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}_reads-x-{barcode}-{assembler}_contigs.bam")
params:
index_prefix=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr/merged/{barcode}/assembly"),
out_prefix=os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}_reads-x_{barcode}_contigs")
conda: "envs/bwa.yaml"
threads: config["bwa"]["threads"]
log: os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}_reads-x-{barcode}-{assembler}_contigs.bwa_mem_samtools")
shell:
"""
(date &&\
bwa mem -t {threads} {params.index_prefix} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule map_metaT_reads_to_short_read_contigs_w_bwa_mem:
input:
query_r1=rules.run_fastp_on_metaT_reads.output.r1,
query_r2=rules.run_fastp_on_metaT_reads.output.r2,
index_amb=os.path.join(RESULTS_DIR, "assembly/{assembler}/{sr_sample}/final.contigs.amb"),
index_ann=os.path.join(RESULTS_DIR, "assembly/{assembler}/{sr_sample}/final.contigs.ann"),
index_bwt=os.path.join(RESULTS_DIR, "assembly/{assembler}/{sr_sample}/final.contigs.bwt"),
index_pac=os.path.join(RESULTS_DIR, "assembly/{assembler}/{sr_sample}/final.contigs.pac"),
index_sa=os.path.join(RESULTS_DIR, "assembly/{assembler}/{sr_sample}/final.contigs.sa"),
index_fa=rules.assemble_short_reads_w_megahit.output
output: os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs.bam")
params:
index_prefix=os.path.join(RESULTS_DIR, "assembly/{assembler}/{sr_sample}/final.contigs"),
out_prefix=os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs")
conda: "envs/bwa.yaml"
threads: config["bwa"]["threads"]
log: os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs.bwa_mem_samtools")
shell:
"""
(date &&\
bwa mem -t {threads} {params.index_prefix} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
rule map_metaT_reads_to_hybrid_contigs_w_bwa_mem:
input:
query_r1=rules.run_fastp_on_metaT_reads.output.r1,
query_r2=rules.run_fastp_on_metaT_reads.output.r2,
index_amb=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sr_sample}/contigs.amb"),
index_ann=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sr_sample}/contigs.ann"),
index_bwt=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sr_sample}/contigs.bwt"),
index_pac=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sr_sample}/contigs.pac"),
index_sa=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sr_sample}/contigs.sa"),
index_fa=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sr_sample}/contigs.fna")
output: os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs.bam")
params:
index_prefix=os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sr_sample}/contigs"),
out_prefix=os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs")
log: os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs.bwa_mem_samtools")
conda: "envs/bwa.yaml"
threads: config["bwa"]["threads"]
shell:
"""
(date &&\
bwa mem -t {threads} {params.index_prefix} {input.query_r1} {input.query_r2} | samtools view -@ {config[samtools][view][threads]} -SbT {input.index_fa} | samtools sort -@ {config[samtools][sort][threads]} -m {config[samtools][sort][chunk_size]} -T {params.out_prefix} > {output} &&\
date) 2> >(tee {log}.stderr) > >(tee {log}.stdout)
"""
#######
# End of Snakefile; 2020-04-20
#######
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment