Skip to content
Snippets Groups Projects
Commit e4c8ebf0 authored by Valentina Galata's avatar Valentina Galata
Browse files

updated tax rules: kaiju for all proteins, kraken2 for filtered contigs only

parent 7bca601e
No related branches found
No related tags found
No related merge requests found
......@@ -5,17 +5,16 @@
rule tax_kraken2_contigs:
input:
contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/{atype}.fasta"),
contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta"),
db=lambda wildcards: config["kraken2"]["db"][wildcards.db]
output:
labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{atype}.{db}.labels.txt"),
report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{atype}.{db}.report.txt")
labels=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{db}.labels.txt"),
report=os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype}.{tool}.{db}.report.txt")
log:
"logs/kraken2.{rtype}.{tool}.{atype}.{db}.log"
"logs/kraken2.{rtype}.{tool}.{db}.log"
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS),
atype="ASSEMBLY|ASSEMBLY.FILTERED",
db="|".join(config["kraken2"]["db"].keys())
threads:
config["kraken2"]["threads"]
......@@ -85,68 +84,43 @@ rule tax_kraken2_lr:
##################################################
# Kaiju
rule tax_kaiju_cdhit:
rule tax_kaiju:
input:
faa=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"),
faa=os.path.join(RESULTS_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa"),
nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"),
names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"),
fmi=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "%s.fmi" % wildcards.db),
output:
out=temp(os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.tsv.tmp")),
names=os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.tsv")
out=temp(os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv.tmp")),
names=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv")
log:
"logs/kaiju.cdhit.{rtype1}_{tool1}__{rtype2}_{tool2}.{db}.log"
"logs/kaiju.{rtype}.{tool}.{db}.log"
wildcard_constraints:
rtype1="|".join(READ_TYPES),
rtype2="|".join(READ_TYPES),
tool1="|".join(ASSEMBLERS),
tool2="|".join(ASSEMBLERS),
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS),
db="|".join(config["kaiju"]["db"].keys())
threads:
config["kaiju"]["threads"]
conda:
"../envs/kaiju.yaml"
message:
"Tax. classification w/ Kaiju ({wildcards.db}, CD-HIT)"
"Tax. classification w/ Kaiju ({wildcards.db}, proteins)"
shell:
"(date && "
"kaiju -t {input.nodes} -f {input.fmi} -i {input.faa} -o {output.out} -z {threads} -v -p && "
"kaiju-addTaxonNames -p -t {input.nodes} -n {input.names} -i {output.out} -o {output.names} && "
"date) &> {log}"
# rule tax_kaiju_summary:
# input:
# out=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{bname}.{db}.tsv"),
# nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"),
# names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"),
# output:
# os.path.join(RESULTS_DIR, "taxonomy/kaiju/{bname}.{db}.{rank}")
# wildcard_constraints:
# db="|".join(config["kaiju"]["db"].keys()),
# rank="|".join(config["kaiju"]["ranks"])
# threads:
# 1
# conda:
# "../envs/kaiju.yaml"
# message:
# "Tax. classification summary w/ Kaiju ({wildcards.db}, {wildcards.rank})"
# shell:
# "kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v"
rule tax_kaiju_cdhit_summary:
rule tax_kaiju_summary:
input:
out=expand(
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{{db}}.tsv"),
combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS]
) + expand(
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{{db}}.tsv"),
combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]
),
out=os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.tsv"),
nodes=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "nodes.dmp"),
names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp")
names=lambda wildcards: os.path.join(config["kaiju"]["db"][wildcards.db], "names.dmp"),
output:
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{db}.summary.{rank}.tsv")
os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype}.{tool}.{db}.summary.{rank}.tsv")
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS),
db="|".join(config["kaiju"]["db"].keys()),
rank="|".join(config["kaiju"]["ranks"])
threads:
......@@ -156,4 +130,4 @@ rule tax_kaiju_cdhit_summary:
message:
"Tax. classification summary w/ Kaiju ({wildcards.db}, {wildcards.rank})"
shell:
"kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v"
\ No newline at end of file
"kaiju2table -p -t {input.nodes} -n {input.names} -r {wildcards.rank} -o {output} {input.out} -v"
......@@ -7,9 +7,8 @@ rule TAXONOMY:
input:
# Kraken2
expand(
os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype_tool}.{atype}.{db}.{otype}.txt"),
os.path.join(RESULTS_DIR, "taxonomy/kraken2/{rtype_tool}.{db}.{otype}.txt"),
rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
atype=["ASSEMBLY", "ASSEMBLY.FILTERED"],
db=config["kraken2"]["db"].keys(),
otype=["labels", "report"]
) if "kraken2" in config["steps_taxonomy"] else [],
......@@ -26,31 +25,15 @@ rule TAXONOMY:
) if "kraken2" in config["steps_taxonomy"] else [],
# Kaiju
expand(
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.tsv"),
combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS],
db=config["kaiju"]["db"].keys()
) if "kaiju" in config["steps_taxonomy"] else [],
expand(
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.tsv"),
combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS],
os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype_tool}.{db}.tsv"),
rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
db=config["kaiju"]["db"].keys()
) if "kaiju" in config["steps_taxonomy"] else [],
expand(
os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{db}.summary.{rank}.tsv"),
os.path.join(RESULTS_DIR, "taxonomy/kaiju/{rtype_tool}.{db}.summary.{rank}.tsv"),
rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
db=config["kaiju"]["db"].keys(),
rank=config["kaiju"]["ranks"]
) if "kaiju" in config["steps_taxonomy"] else []
# expand(
# os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.{rank}"),
# combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS],
# db=config["kaiju"]["db"].keys(),
# rank=config["kaiju"]["ranks"]
# ) if "kaiju" in config["steps_taxonomy"] else [],
# expand(
# os.path.join(RESULTS_DIR, "taxonomy/kaiju/cdhit.{combi}.{db}.{rank}"),
# combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS],
# db=config["kaiju"]["db"].keys(),
# rank=config["kaiju"]["ranks"]
# ) if "kaiju" in config["steps_taxonomy"] else [],
) if "kaiju" in config["steps_taxonomy"] else [],
output:
touch("status/taxonomy.done")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment