From e4c8ebf06bf267002d16d56e3772cfe1d6845e72 Mon Sep 17 00:00:00 2001 From: Valentina Galata <valentina.galata@uni.lu> Date: Sun, 16 Aug 2020 19:03:10 +0200 Subject: [PATCH] updated tax rules: kaiju for all proteins, kraken2 for filtered contigs only --- workflow/rules/taxonomy.smk | 64 +++++++++++-------------------------- workflow/steps/taxonomy.smk | 29 ++++------------- 2 files changed, 25 insertions(+), 68 deletions(-) diff --git a/workflow/rules/taxonomy.smk b/workflow/rules/taxonomy.smk index 87b7e12..b21c3d4 100644 --- a/workflow/rules/taxonomy.smk +++ b/workflow/rules/taxonomy.smk @@ -5,17 +5,16 @@ rule tax_kraken2_contigs: input: - contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{atype}.fasta"), + contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta"), db=lambda wildcards: config["kraken2"]["db"][wildcards.db] output: - labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{atype}.{db}.labels.txt"), - report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{atype}.{db}.report.txt") + labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{db}.labels.txt"), + report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{db}.report.txt") log: - "logs/kraken2.{rtype}.{tool}.{atype}.{db}.log" + "logs/kraken2.{rtype}.{tool}.{db}.log" wildcard_constraints: rtype="|".join(READ_TYPES), tool="|".join(ASSEMBLERS), - atype="ASSEMBLY|ASSEMBLY.FILTERED", db="|".join(config["kraken2"]["db"].keys()) threads: config["kraken2"]["threads"] @@ -85,68 +84,43 @@ rule tax_kraken2_lr: ################################################## # Kaiju -rule tax_kaiju_cdhit: +rule tax_kaiju: input: - faa=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"), + faa=os.path.join(RESULTS_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa"), nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"), names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"), fmi=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "%s.fmi" % wildcards.db), output: - out=temp(os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.tsv.tmp")), - names=os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.tsv") + out=temp(os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv.tmp")), + names=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv") log: - "logs/kaiju.cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.log" + "logs/kaiju.{rtype}.{tool}.{db}.log" wildcard_constraints: - rtype1="|".join(READ_TYPES), - rtype2="|".join(READ_TYPES), - tool1="|".join(ASSEMBLERS), - tool2="|".join(ASSEMBLERS), + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS), db="|".join(config["kaiju"]["db"].keys()) threads: config["kaiju"]["threads"] conda: "../envs/kaiju.yaml" message: - "Tax. classification w/ Kaiju ({wildcards.db}, CD-HIT)" + "Tax. classification w/ Kaiju ({wildcards.db}, proteins)" shell: "(date && " "kaiju -t {input.nodes} -f {input.fmi} -i {input.faa} -o {output.out} -z {threads} -v -p && " "kaiju-addTaxonNames -p -t {input.nodes} -n {input.names} -i {output.out} -o {output.names} && " "date) &> {log}" -# rule tax_kaiju_summary: -# input: -# out=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{bname}.{db}.tsv"), -# nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"), -# names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"), -# output: -# os.path.join(RESULTS_DIR, "taxonomy/kaiju/{bname}.{db}.{rank}") -# wildcard_constraints: -# db="|".join(config["kaiju"]["db"].keys()), -# rank="|".join(config["kaiju"]["ranks"]) -# threads: -# 1 -# conda: -# "../envs/kaiju.yaml" -# message: -# "Tax. classification summary w/ Kaiju ({wildcards.db}, {wildcards.rank})" -# shell: -# "kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v" - -rule tax_kaiju_cdhit_summary: +rule tax_kaiju_summary: input: - out=expand( - os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{{db}}.tsv"), - combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS] - ) + expand( - os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{{db}}.tsv"), - combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS] - ), + out=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv"), nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"), - names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp") + names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"), output: - os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{db}.summary.{rank}.tsv") + os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.summary.{rank}.tsv") wildcard_constraints: + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS), db="|".join(config["kaiju"]["db"].keys()), rank="|".join(config["kaiju"]["ranks"]) threads: @@ -156,4 +130,4 @@ rule tax_kaiju_cdhit_summary: message: "Tax. classification summary w/ Kaiju ({wildcards.db}, {wildcards.rank})" shell: - "kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v" \ No newline at end of file + "kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v" diff --git a/workflow/steps/taxonomy.smk b/workflow/steps/taxonomy.smk index 008c989..eb39f66 100644 --- a/workflow/steps/taxonomy.smk +++ b/workflow/steps/taxonomy.smk @@ -7,9 +7,8 @@ rule TAXONOMY: input: # Kraken2 expand( - os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype_tool}.{atype}.{db}.{otype}.txt"), + os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype_tool}.{db}.{otype}.txt"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS], - atype=["ASSEMBLY", "ASSEMBLY.FILTERED"], db=config["kraken2"]["db"].keys(), otype=["labels", "report"] ) if "kraken2" in config["steps_taxonomy"] else [], @@ -26,31 +25,15 @@ rule TAXONOMY: ) if "kraken2" in config["steps_taxonomy"] else [], # Kaiju expand( - os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.tsv"), - combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS], - db=config["kaiju"]["db"].keys() - ) if "kaiju" in config["steps_taxonomy"] else [], - expand( - os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.tsv"), - combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS], + os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype_tool}.{db}.tsv"), + rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS], db=config["kaiju"]["db"].keys() ) if "kaiju" in config["steps_taxonomy"] else [], expand( - os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{db}.summary.{rank}.tsv"), + os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype_tool}.{db}.summary.{rank}.tsv"), + rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS], db=config["kaiju"]["db"].keys(), rank=config["kaiju"]["ranks"] - ) if "kaiju" in config["steps_taxonomy"] else [] - # expand( - # os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.{rank}"), - # combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS], - # db=config["kaiju"]["db"].keys(), - # rank=config["kaiju"]["ranks"] - # ) if "kaiju" in config["steps_taxonomy"] else [], - # expand( - # os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.{rank}"), - # combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS], - # db=config["kaiju"]["db"].keys(), - # rank=config["kaiju"]["ranks"] - # ) if "kaiju" in config["steps_taxonomy"] else [], + ) if "kaiju" in config["steps_taxonomy"] else [], output: touch("status/taxonomy.done") -- GitLab