# For running the ASSEMBLY and ANNOTATION part of the ONT pipeline # specify which rules to run include: '../rules/assembly_annotation.smk' # Rule all for running the assembly and annotation # rule ASSEMBLY_ANNOTATION: # input: # "assemble_and_coverage.done", # "basecall_merge_qc.done", # "basecall_merge_qc_NO_MOD.done", # "coverage_of_references.done", # "annotate.done", # "prodigal_gene_call.done", # "diamond_proteins.done" ###### # WRAPPER RULES ###### # Added the `shell:` so as to avoid it from being autodetected as `localrule`. This is needed so that an email can be sent upon event changes for this rule. rule BASECALL_MERGE_QC: input: expand(os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz"), barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats.txt"), barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats.txt"), run=RUNS, barcode=BARCODES), # expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}_R1_001.fastp.fq.gz"), sr_sample=["ONT3_MG_xx_Rashi_S11"]), expand(os.path.join(RESULTS_DIR, "preprocessing/sr/{sr_sample}.fastp.{report_type}"), sr_sample=SAMPLES, report_type=["html", "json"]), expand(os.path.join(RESULTS_DIR, "qc/sr/fastqc/{sr_sample}/{sr_sample}_R{orientation}_001.fastp.fastqc.html"), sr_sample=SAMPLES, orientation=["1", "2"]) output: "basecall_merge_qc.done" shell: "touch {output}" rule BASECALL_MERGE_QC_NO_MOD: input: expand(os.path.join(RESULTS_DIR, "basecalled_NO_MOD/merged/{barcode}.fastq.gz"), barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "qc/lr/merged/{barcode}/{barcode}NanoStats_NO_MOD.txt"), barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "qc/lr/{run}/{barcode}/{barcode}NanoStats_NO_MOD.txt"), run=RUNS, barcode=BARCODES) output: "basecall_merge_qc_NO_MOD.done" shell: "touch {output}" rule COVERAGE_OF_REFERENCES: input: expand(os.path.join(RESULTS_DIR, "genomecov/sr/minimap2/{sr_sample}-x-{reference}.avg_cov.txt"), sr_sample=SAMPLES, reference=REFERENCES), expand(os.path.join(RESULTS_DIR, "genomecov/sr/bwa_mem/{sr_sample}-x-{reference}.avg_cov.txt"), sr_sample=SAMPLES, reference=REFERENCES), expand(os.path.join(RESULTS_DIR, "genomecov/lr/merged/{barcode}/{barcode}-x-{reference}.avg_cov.txt"), barcode=BARCODES, reference=REFERENCES) output: "coverage_of_references.done" shell: "touch {output}" rule TEST: input: # Commented out as it crashes due to a Key error #expand(os.path.join(RESULTS_DIR, "assembly/rebaler/lr/merged/{barcode}/lr_{barcode}-sr_contigs_{sample}.fna"), barcode=BARCODES, sample=SAMPLES) expand(os.path.join(RESULTS_DIR, "annotation/diamond/lr/merged/{barcode}.diamond.daa"), barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sample}/contigs.fna"), barcode=BARCODES, sample="ONT3_MG_xx_Rashi_S11", assembler="metaspades_hybrid") rule TEST_PRODIGAL: input: expand(os.path.join(RESULTS_DIR, "annotation/proteins/{assembler}/lr/merged/{barcode}/assembly.faa"), assembler="flye", barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "annotation/proteins/{assembler}/polished/{polisher}/round_{round}/lr/merged/{barcode}/assembly_polished.faa"), assembler="flye", polisher=["medaka", "racon"], round="01", barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "annotation/proteins/{assembler}/{sample}/final.contigs.faa"), assembler=["megahit", "metaspades"], sample="ONT3_MG_xx_Rashi_S11"), expand(os.path.join(RESULTS_DIR, "annotation/proteins/{assembler}/lr_{barcode}-sr_{sample}/contigs.faa"), assembler="metaspades_hybrid", barcode=BARCODES, sample="ONT3_MG_xx_Rashi_S11") output: "prodigal_gene_call.done" shell: "touch {output}" rule TEST_DIAMOND: input: expand(os.path.join(RESULTS_DIR, "annotation/diamond/{assembler}/lr/merged/{barcode}/assembly.tsv"), assembler="flye", barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "annotation/diamond/{assembler}/polished/{polisher}/round_{round}/lr/merged/{barcode}/assembly_polished.tsv"), assembler="flye", polisher=["medaka", "racon"], round="01", barcode=BARCODES), expand(os.path.join(RESULTS_DIR, "annotation/diamond/{assembler}/{sample}/final.contigs.tsv"), assembler="megahit", sample="ONT3_MG_xx_Rashi_S11"), expand(os.path.join(RESULTS_DIR, "annotation/diamond/{assembler}/lr_{barcode}-sr_{sample}/contigs.tsv"), assembler="metaspades_hybrid", barcode=BARCODES, sample="ONT3_MG_xx_Rashi_S11") output: "diamond_proteins.done" shell: "touch {output}" rule ASSEMBLE_AND_COVERAGE: input: # long reads on long read contigs expand(os.path.join(RESULTS_DIR, "genomecov/lr/merged/{barcode}/{barcode}_reads-x-{barcode}-{assembler}_contigs.avg_cov.txt"), barcode=BARCODES, assembler=ASSEMBLERS), # short reads on long read contigs expand(os.path.join(RESULTS_DIR, "genomecov/sr/minimap2/{sample}_reads-x-{barcode}-{assembler}_contigs.avg_cov.txt"), sample="ONT3_MG_xx_Rashi_S11", barcode=BARCODES, assembler=ASSEMBLERS), # short reads on long read contigs expand(os.path.join(RESULTS_DIR, "genomecov/sr/bwa_mem/{sample}_reads-x-{barcode}-{assembler}_contigs.avg_cov.txt"), sample="ONT3_MG_xx_Rashi_S11", barcode=BARCODES, assembler=ASSEMBLERS), #expand(os.path.join(RESULTS_DIR, "assembly/operams/lr_{barcode}-sr_{sample}/contigs.fna"), sample=SAMPLES, barcode=BARCODES), #expand(os.path.join(RESULTS_DIR, "mapping/lr/merged/{barcode}/{barcode}_reads-x-{barcode}-{assembler}_contigs.bam"), barcode=BARCODES, assembler="flye"), # short reads on hybrid contigs #expand(os.path.join(RESULTS_DIR, "genomecov/sr/bwa_mem/{sample}_reads-x-lr_{barcode}_sr_{sample}-{assembler}_contigs.avg_cov.txt"), sample=SAMPLES, barcode=BARCODES, assembler="operams"), # long reads on hybrid contigs #expand(os.path.join(RESULTS_DIR, "genomecov/lr/bwa_mem/{barcode}_reads-x-lr_{barcode}_sr_{sample}-{assembler}_contigs.avg_cov.txt"), barcode=BARCODES, sample=SAMPLES, assembler="operams"), expand(os.path.join(RESULTS_DIR, "assembly/flye/lr/merged/{barcode}/assembly.fna"), barcode=BARCODES), # short reads on short read contigs expand(os.path.join(RESULTS_DIR, "genomecov/sr/bwa_mem/{sr_sample}_reads-x-{sr_sample}-{assembler}_contigs.avg_cov.txt"), sr_sample=SAMPLES, assembler=["megahit", "metaspades"]), # assemble short reads with metaspades # expand(os.path.join(RESULTS_DIR, "assembly/metaspades/{sr_sample}/contigs.fna"), sr_sample=SAMPLES), # long reads on short read contigs expand(os.path.join(RESULTS_DIR, "genomecov/lr/bwa_mem/{barcode}_reads-x-{sr_sample}-{assembler}_contigs.avg_cov.txt"), barcode=BARCODES, sr_sample=SAMPLES, assembler="megahit") output: "assemble_and_coverage.done" shell: "touch {output}" rule ANNOTATE: input: # short reads coverage and diversity estimate expand(os.path.join(RESULTS_DIR, "annotation/nonpareil/{sr_sample}_R{orientation}_001.fastp.npa"), sr_sample=["ONT3_MG_xx_Rashi_S11"], orientation=["1", "2"]) # compute methylation frequencies # expand(os.path.join(RESULTS_DIR, "annotation/methylation/{assembler}/nanopolish/round_{round}/lr/merged/{barcode}/methylation_frequencies.tsv"), assembler="flye", barcode=BARCODES, round="01") output: "annotate.done" shell: "touch {output}" rule POLISH_AND_COVERAGE: input: # short read-polished contigs from OPERA-MS #expand(os.path.join(RESULTS_DIR, "assembly/{assembler}/lr_{barcode}-sr_{sample}/contigs.fna"), sample=SAMPLES, barcode=BARCODES, assembler="operams"), #long read polished contigs #expand(os.path.join(RESULTS_DIR, "assembly/{assembler}/polished/{polisher}/round_{round}/lr/merged/{barcode}/assembly_polished.fna"), assembler="flye", polisher="medaka", round="01", barcode=BARCODES), # map long reads to long read polished contigs expand(os.path.join(RESULTS_DIR, "genomecov/polished/{polisher}/round_{round}/lr/merged/{barcode}/{barcode}_reads-x-{barcode}-{assembler}_polished_contigs.avg_cov.txt"), assembler="flye", polisher="medaka", round="01", barcode=BARCODES), # create nanonoplish index #expand(os.path.join(RESULTS_DIR, "basecalled/merged/{barcode}.fastq.gz.index.readdb"), barcode=BARCODES) # polish flye contigs with nanopolish #expand(os.path.join(RESULTS_DIR, "assembly/{assembler}/polished/nanopolish/round_{round}/lr/merged/{barcode}/assembly_polished.fna"), assembler="flye", round="01", barcode=BARCODES), # polish using racon expand(os.path.join(RESULTS_DIR, "assembly/{assembler}/polished/racon/round_{round}/lr/merged/{barcode}/assembly_polished.fna"), assembler="flye", round="01", barcode=BARCODES)