diff --git a/workflow/rules/annotation.smk b/workflow/rules/annotation.smk index e72e5ecd703b13006b01a43c8c79839a40916b34..246cf89ea18123cfadae4716f4ab7f1d7a04431b 100644 --- a/workflow/rules/annotation.smk +++ b/workflow/rules/annotation.smk @@ -9,11 +9,11 @@ rule annotation_prodigal: output: os.path.join(RESULTS_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa") log: - out="logs/annotation_prodigal_{rtype}.{tool}.out.log", - err="logs/annotation_prodigal_{rtype}.{tool}.err.log" + out="logs/prodigal.{rtype}.{tool}.out.log", + err="logs/prodigal.{rtype}.{tool}.err.log" wildcard_constraints: - rtype="|".join(config["assemblers"].keys()), - tool="|".join(["|".join(a) for a in config["assemblers"].values()]) + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) threads: 1 conda: os.path.join(ENV_DIR, "prodigal.yaml") @@ -34,11 +34,11 @@ rule annotation_diamond_daa: # NOTE: use uncompressed DAA format; other formats can be derived using `diamond view` os.path.join(RESULTS_DIR, "annotation/diamond/{rtype}/{tool}/proteins.daa") log: - out="logs/annotation_diamond_{rtype}.{tool}.out.log", - err="logs/annotation_diamond_{rtype}.{tool}.err.log" + out="logs/diamond_daa.{rtype}.{tool}.out.log", + err="logs/diamond_daa.{rtype}.{tool}.err.log" wildcard_constraints: - rtype="|".join(config["assemblers"].keys()), - tool="|".join(["|".join(a) for a in config["assemblers"].values()]) + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) threads: config["diamond"]["threads"] params: @@ -58,11 +58,11 @@ rule annotation_diamond_tsv: output: os.path.join(RESULTS_DIR, "annotation/diamond/{rtype}/{tool}/proteins.tsv") log: - out="logs/annotation_diamond_tsv_{rtype}.{tool}.out.log", - err="logs/annotation_diamond_tsv_{rtype}.{tool}.err.log" + out="logs/diamond_tsv.{rtype}.{tool}.out.log", + err="logs/diamond_tsv.{rtype}.{tool}.err.log" wildcard_constraints: - rtype="|".join(config["assemblers"].keys()), - tool="|".join(["|".join(a) for a in config["assemblers"].values()]) + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) params: # NOTE: For the used conda version, need to specify the entire column layout (outfmt) explicitly outfmt="6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen", @@ -85,8 +85,8 @@ rule annotation_rgi_input: output: temp(os.path.join(RESULTS_DIR, "annotation/rgi/{rtype}/{tool}/input.faa")) wildcard_constraints: - rtype="|".join(config["assemblers"].keys()), - tool="|".join(["|".join(a) for a in config["assemblers"].values()]) + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) message: "RGI: create input FAA: {input}" shell: @@ -99,8 +99,8 @@ rule annotation_rgi_db: archive=temp(os.path.join(DB_DIR, "rgi/card-data.tar.bz2")), json=os.path.join(DB_DIR, "rgi/card.json") log: - out="logs/annotation_rgi_db.out.log", - err="logs/annotation_rgi_db.err.log" + out="logs/rgi_db.out.log", + err="logs/rgi_db.err.log" params: db_url=config["rgi"]["db_url"] message: @@ -119,11 +119,11 @@ rule annotation_rgi: output: os.path.join(RESULTS_DIR, "annotation/rgi/{rtype}/{tool}/rgi.txt") log: - out="logs/annotation_rgi_{rtype}.{tool}.out.log", - err="logs/annotation_rgi_{rtype}.{tool}.err.log" + out="logs/rgi.{rtype}.{tool}.out.log", + err="logs/rgi.{rtype}.{tool}.err.log" wildcard_constraints: - rtype="|".join(config["assemblers"].keys()), - tool="|".join(["|".join(a) for a in config["assemblers"].values()]) + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) threads: config["rgi"]["threads"] params: @@ -154,8 +154,11 @@ rule casc: output: os.path.join(RESULTS_DIR, "annotation/casc/{rtype}/{tool}/ASSEMBLY.results.txt") log: - out="logs/annotation_casc_{rtype}.{tool}.out.log", - err="logs/annotation_casc_{rtype}.{tool}.err.log" + out="logs/casc.{rtype}.{tool}.out.log", + err="logs/casc.{rtype}.{tool}.err.log" + wildcard_constraints: + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) threads: config["casc"]["threads"] conda: @@ -174,8 +177,11 @@ rule minced: txt=os.path.join(RESULTS_DIR, "annotation/minced/{rtype}/{tool}/ASSEMBLY.results.txt"), gff=os.path.join(RESULTS_DIR, "annotation/minced/{rtype}/{tool}/ASSEMBLY.results.gff") log: - out="logs/annotation_minced_{rtype}.{tool}.out.log", - err="logs/annotation_minced_{rtype}.{tool}.err.log" + out="logs/minced.{rtype}.{tool}.out.log", + err="logs/minced.{rtype}.{tool}.err.log" + wildcard_constraints: + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) conda: "../envs/analysis.yaml" shell: "(date && " @@ -184,7 +190,7 @@ rule minced: "date) 2> {log.err} > {log.out}" ################################################## -# PlasFlow +# Plasmids rule annotation_plasflow_input: input: @@ -192,8 +198,8 @@ rule annotation_plasflow_input: output: temp(os.path.join(RESULTS_DIR, "annotation/plasflow/{rtype}/{tool}/input.fasta")) wildcard_constraints: - rtype="|".join(config["assemblers"].keys()), - tool="|".join(["|".join(a) for a in config["assemblers"].values()]) + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) threads: 1 params: script=os.path.join(SRC_DIR, "filter_fasta_by_length.pl"), @@ -210,11 +216,11 @@ rule annotation_plasflow: tmp=os.path.join(RESULTS_DIR, "annotation/plasflow/{rtype}/{tool}/plasflow.tsv.tmp"), tsv=os.path.join(RESULTS_DIR, "annotation/plasflow/{rtype}/{tool}/plasflow.tsv") log: - out="logs/annotation_plasflow_{rtype}.{tool}.out.log", - err="logs/annotation_plasflow_{rtype}.{tool}.err.log" + out="logs/plasflow.{rtype}.{tool}.out.log", + err="logs/plasflow.{rtype}.{tool}.err.log" wildcard_constraints: - rtype="|".join(config["assemblers"].keys()), - tool="|".join(["|".join(a) for a in config["assemblers"].values()]) + rtype="|".join(READ_TYPES), + tool="|".join(ASSEMBLERS) threads: 1 params: threshold=config["plasflow"]["threshold"]