From e4c8ebf06bf267002d16d56e3772cfe1d6845e72 Mon Sep 17 00:00:00 2001
From: Valentina Galata <valentina.galata@uni.lu>
Date: Sun, 16 Aug 2020 19:03:10 +0200
Subject: [PATCH] updated tax rules: kaiju for all proteins, kraken2 for
 filtered contigs only

---
 workflow/rules/taxonomy.smk | 64 +++++++++++--------------------------
 workflow/steps/taxonomy.smk | 29 ++++-------------
 2 files changed, 25 insertions(+), 68 deletions(-)

diff --git a/workflow/rules/taxonomy.smk b/workflow/rules/taxonomy.smk
index 87b7e12..b21c3d4 100644
--- a/workflow/rules/taxonomy.smk
+++ b/workflow/rules/taxonomy.smk
@@ -5,17 +5,16 @@
 
 rule tax_kraken2_contigs:
     input:
-        contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{atype}.fasta"),
+        contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta"),
         db=lambda wildcards: config["kraken2"]["db"][wildcards.db]
     output:
-        labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{atype}.{db}.labels.txt"),
-        report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{atype}.{db}.report.txt")
+        labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{db}.labels.txt"),
+        report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{db}.report.txt")
     log:
-        "logs/kraken2.{rtype}.{tool}.{atype}.{db}.log"
+        "logs/kraken2.{rtype}.{tool}.{db}.log"
     wildcard_constraints:
         rtype="|".join(READ_TYPES),
         tool="|".join(ASSEMBLERS),
-        atype="ASSEMBLY|ASSEMBLY.FILTERED",
         db="|".join(config["kraken2"]["db"].keys())
     threads:
         config["kraken2"]["threads"]
@@ -85,68 +84,43 @@ rule tax_kraken2_lr:
 ##################################################
 # Kaiju
 
-rule tax_kaiju_cdhit:
+rule tax_kaiju:
     input:
-        faa=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"),
+        faa=os.path.join(RESULTS_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa"),
         nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"),
         names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"),
         fmi=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "%s.fmi" % wildcards.db),
     output:
-        out=temp(os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.tsv.tmp")),
-        names=os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.tsv")
+        out=temp(os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv.tmp")),
+        names=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv")
     log:
-        "logs/kaiju.cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.log"
+        "logs/kaiju.{rtype}.{tool}.{db}.log"
     wildcard_constraints:
-        rtype1="|".join(READ_TYPES),
-        rtype2="|".join(READ_TYPES),
-        tool1="|".join(ASSEMBLERS),
-        tool2="|".join(ASSEMBLERS),
+        rtype="|".join(READ_TYPES),
+        tool="|".join(ASSEMBLERS),
         db="|".join(config["kaiju"]["db"].keys())
     threads:
         config["kaiju"]["threads"]
     conda:
         "../envs/kaiju.yaml"
     message:
-        "Tax. classification w/ Kaiju ({wildcards.db}, CD-HIT)"
+        "Tax. classification w/ Kaiju ({wildcards.db}, proteins)"
     shell:
         "(date && "
         "kaiju -t {input.nodes} -f {input.fmi} -i {input.faa} -o {output.out} -z {threads} -v -p && "
         "kaiju-addTaxonNames -p -t {input.nodes} -n {input.names} -i {output.out} -o {output.names} && "
         "date) &> {log}"
 
-# rule tax_kaiju_summary:
-#     input:
-#         out=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{bname}.{db}.tsv"),
-#         nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"),
-#         names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"),
-#     output:
-#         os.path.join(RESULTS_DIR, "taxonomy/kaiju/{bname}.{db}.{rank}")
-#     wildcard_constraints:
-#         db="|".join(config["kaiju"]["db"].keys()),
-#         rank="|".join(config["kaiju"]["ranks"])
-#     threads:
-#         1
-#     conda:
-#         "../envs/kaiju.yaml"
-#     message:
-#         "Tax. classification summary w/ Kaiju ({wildcards.db}, {wildcards.rank})"
-#     shell:
-#         "kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v"
-
-rule tax_kaiju_cdhit_summary:
+rule tax_kaiju_summary:
     input:
-        out=expand(
-            os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{{db}}.tsv"),
-            combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS]
-        ) + expand(
-            os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{{db}}.tsv"),
-            combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]
-        ),
+        out=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv"),
         nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"),
-        names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp")
+        names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"),
     output:
-        os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{db}.summary.{rank}.tsv")
+        os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.summary.{rank}.tsv")
     wildcard_constraints:
+        rtype="|".join(READ_TYPES),
+        tool="|".join(ASSEMBLERS),
         db="|".join(config["kaiju"]["db"].keys()),
         rank="|".join(config["kaiju"]["ranks"])
     threads:
@@ -156,4 +130,4 @@ rule tax_kaiju_cdhit_summary:
     message:
         "Tax. classification summary w/ Kaiju ({wildcards.db}, {wildcards.rank})"
     shell:
-        "kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v"
\ No newline at end of file
+        "kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v"
diff --git a/workflow/steps/taxonomy.smk b/workflow/steps/taxonomy.smk
index 008c989..eb39f66 100644
--- a/workflow/steps/taxonomy.smk
+++ b/workflow/steps/taxonomy.smk
@@ -7,9 +7,8 @@ rule TAXONOMY:
     input:
         # Kraken2
         expand(
-            os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype_tool}.{atype}.{db}.{otype}.txt"),
+            os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype_tool}.{db}.{otype}.txt"),
             rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
-            atype=["ASSEMBLY", "ASSEMBLY.FILTERED"],
             db=config["kraken2"]["db"].keys(),
             otype=["labels", "report"]
         ) if "kraken2" in config["steps_taxonomy"] else [],
@@ -26,31 +25,15 @@ rule TAXONOMY:
         ) if "kraken2" in config["steps_taxonomy"] else [],
         # Kaiju
         expand(
-            os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.tsv"),
-            combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS],
-            db=config["kaiju"]["db"].keys()
-        ) if "kaiju" in config["steps_taxonomy"] else [],
-        expand(
-            os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.tsv"),
-            combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS],
+            os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype_tool}.{db}.tsv"),
+            rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
             db=config["kaiju"]["db"].keys()
         ) if "kaiju" in config["steps_taxonomy"] else [],
         expand(
-            os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{db}.summary.{rank}.tsv"),
+            os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype_tool}.{db}.summary.{rank}.tsv"),
+            rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
             db=config["kaiju"]["db"].keys(),
             rank=config["kaiju"]["ranks"]
-        ) if "kaiju" in config["steps_taxonomy"] else []
-        # expand(
-        #     os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.{rank}"),
-        #     combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS],
-        #     db=config["kaiju"]["db"].keys(),
-        #     rank=config["kaiju"]["ranks"]
-        # ) if "kaiju" in config["steps_taxonomy"] else [],
-        # expand(
-        #     os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.{rank}"),
-        #     combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS],
-        #     db=config["kaiju"]["db"].keys(),
-        #     rank=config["kaiju"]["ranks"]
-        # ) if "kaiju" in config["steps_taxonomy"] else [],
+        ) if "kaiju" in config["steps_taxonomy"] else [],
     output:
         touch("status/taxonomy.done")
-- 
GitLab