From 57bd993461b1d87944a004bdbfa9052e7259d75c Mon Sep 17 00:00:00 2001
From: Valentina Galata <valentina.galata@uni.lu>
Date: Thu, 23 Jul 2020 09:59:41 +0200
Subject: [PATCH] cleanup: replace out/err log by log; changed params for
 metaquast

---
 workflow/rules/analysis.smk | 66 ++++++++++++++++---------------------
 workflow/steps/analysis.smk | 45 +++++++------------------
 2 files changed, 40 insertions(+), 71 deletions(-)

diff --git a/workflow/rules/analysis.smk b/workflow/rules/analysis.smk
index e05cd28..239f7a8 100644
--- a/workflow/rules/analysis.smk
+++ b/workflow/rules/analysis.smk
@@ -5,12 +5,11 @@
 
 rule analysis_quast:
     input:
-        os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
+        os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
     output:
         os.path.join(RESULTS_DIR, "analysis/quast/{rtype}/{tool}/report.tsv")
     log:
-        out="logs/quast.{rtype}.{tool}.out.log",
-        err="logs/quast.{rtype}.{tool}.err.log"
+        "logs/quast.{rtype}.{tool}.log"
     wildcard_constraints:
         rtype="|".join(READ_TYPES),
         tool="|".join(ASSEMBLERS)
@@ -19,9 +18,11 @@ rule analysis_quast:
     conda:
         os.path.join(ENV_DIR, "quast.yaml")
     message:
-        "Assess assembly quality w/ QUAST: {input}"
+        "metaQUAST: {input}"
     shell:
-        "(date && metaquast.py --max-ref-number 0 --threads {threads} {input} -o $(dirname {output}) && date) 2> {log.err} > {log.out}"
+        "(date && "
+        "metaquast.py --max-ref-number 0 --min-contig 0 --contig-thresholds 0,1000,2000,5000 --threads {threads} {input} -o $(dirname {output}) && "
+        "date) &> {log}"
 
 ##################################################
 # Proteins
@@ -33,8 +34,7 @@ rule analysis_bbmap_rename:
     output:
         temp(os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype}_{tool}.faa"))
     log:
-        out="logs/analysis_bbmap_rename_{rtype}.{tool}.out.log",
-        err="logs/analysis_bbmap_rename_{rtype}.{tool}.err.log"
+        "logs/analysis_bbmap_rename_{rtype}.{tool}.log"
     wildcard_constraints:
         rtype="|".join(READ_TYPES),
         tool="|".join(ASSEMBLERS)
@@ -48,7 +48,7 @@ rule analysis_bbmap_rename:
     message:
         "BBMAP: rename FASTA entries in {input}"
     shell:
-        "(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) 2> {log.err} > {log.out}"
+        "(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) &> {log}"
 
 rule analysis_cdhit:
     input:
@@ -58,8 +58,7 @@ rule analysis_cdhit:
         faa12=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"),
         faa21=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype2}_{tool2}__{rtype1}_{tool1}.faa")
     log:
-        out="logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
-        err="logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
+        "logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
     wildcard_constraints:
         rtype1="|".join(READ_TYPES),
         rtype2="|".join(READ_TYPES),
@@ -74,14 +73,14 @@ rule analysis_cdhit:
         "(date && "
         "cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && "
         "cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && "
-        "date) 2> {log.err} > {log.out}"
+        "date) &> {log}"
 
 ##################################################
 # Circular contigs
 
 rule circ_contigs_fasta:
     input:  
-        os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
+        os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
     output: 
         split1=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.1.fasta")),
         split2=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.2.fasta"))
@@ -123,8 +122,7 @@ rule circ_contigs_blastn:
     output:
         os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.tsv")
     log:
-        out="logs/circ_blast.{rtype}.{tool}.out.log",
-        err="logs/circ_blast.{rtype}.{tool}.err.log"
+        "logs/circ_blast.{rtype}.{tool}.log"
     wildcard_constraints:
         rtype="|".join(READ_TYPES),
         tool="|".join(ASSEMBLERS)
@@ -136,7 +134,7 @@ rule circ_contigs_blastn:
     shell: 
         "(date && "
         "blastn -task megablast -num_alignments 50 -perc_identity 95 {params.outfmt} -query {input.fasta} -db {params.db} -out {output} && "
-        "date) 2> {log.err} > {log.out}"
+        "date) &> {log}"
 
 rule circ_contigs_filter:
     input:
@@ -174,18 +172,17 @@ rule circ_contigs_filter:
 # Assemblies
 rule mash_sketch:
     input:
-        os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
+        os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
     output:
         os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.msh")
     log:
-        out="logs/mash.sketch.{rtype}.{tool}.out.log",
-        err="logs/mash.sketch.{rtype}.{tool}.err.log"
+        "logs/mash.sketch.{rtype}.{tool}.log"
     threads:
         1
     conda:
         os.path.join(ENV_DIR, "mash.yaml")
     shell:
-        "(date && ofile={output} && mash sketch -k 31 -s 10000 -S 42 -o ${{ofile%.*}} {input} && date) 2> {log.err} > {log.out}"
+        "(date && ofile={output} && mash sketch -k 31 -s 10000 -S 42 -o ${{ofile%.*}} {input} && date) &> {log}"
 
 rule mash_sketch_paste:
     input:
@@ -207,8 +204,7 @@ rule mash_dist:
     output:
         os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")
     log:
-        out="logs/mash.dist.contigs.out.log",
-        err="logs/mash.dist.contigs.err.log"
+        "logs/mash.dist.contigs.log"
     threads:
         config["mash"]["threads"]
     conda:
@@ -216,27 +212,26 @@ rule mash_dist:
     shell:
         "(date && "
         "mash dist -t -p {threads} {input} {input} > {output} && "
-        "date) 2> {log.err} > {log.out}"
+        "date) &> {log}"
 
 # Contigs
 rule mash_sketch_contigs:
     input:
-        os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
+        os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
     output:
         os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.contigs.msh")
     log:
-        out="logs/mash.sketch.{rtype}.{tool}.contigs.out.log",
-        err="logs/mash.sketch.{rtype}.{tool}.contigs.err.log"
+        "logs/mash.sketch.{rtype}.{tool}.contigs.log"
     threads:
         1
     conda:
         os.path.join(ENV_DIR, "mash.yaml")
     shell:
-        "(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date) 2> {log.err} > {log.out}"
+        "(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date) &> {log}"
 
 rule mash_screen_contigs:
     input:
-        fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]),
+        fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.FILTERED.fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]),
         msh=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.contigs.msh"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
     output:
         expand(os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
@@ -252,12 +247,11 @@ rule mash_screen_contigs:
 
 # rule analysis_mmseqs2_db:
 #     input:
-#         os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
+#         os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
 #     output:
 #         os.path.join(RESULTS_DIR, "analysis/mmseqs2/{rtype}/{tool}/ASSEMBLY_db")
 #     log:
-#         out="logs/mmseqs2_db.{rtype}.{tool}.out.log",
-#         err="logs/mmseqs2_db.{rtype}.{tool}.err.log"
+#         "logs/mmseqs2_db.{rtype}.{tool}.log"
 #     wildcard_constraints:
 #         rtype="|".join(READ_TYPES),
 #         tool="|".join(ASSEMBLERS)
@@ -266,7 +260,7 @@ rule mash_screen_contigs:
 #     message:
 #         "Create MMseqs2 DB from {input}"
 #     shell:
-#         "(date && mmseqs createdb {input} {output} && date) 2> {log.err} > {log.out}"
+#         "(date && mmseqs createdb {input} {output} && date) &> {log}"
 
 # rule analysis_mmseqs2_compare:
 #     input:
@@ -275,8 +269,7 @@ rule mash_screen_contigs:
 #     output:
 #         os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}")
 #     log:
-#         out="logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
-#         err="logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
+#         "logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
 #     wildcard_constraints:
 #         rtype1="|".join(READ_TYPES),
 #         rtype2="|".join(READ_TYPES),
@@ -290,7 +283,7 @@ rule mash_screen_contigs:
 #         "Create MMseqs2 compare: {input}"
 #     shell:
 #         # TODO: "mmseqs2_tmp" ??? (see old files)
-#         "(date && mmseqs rbh {input.db1} {input.db2} {output} --min-seq-id 0.9 --threads {threads} && date) 2> {log.err} > {log.out}"
+#         "(date && mmseqs rbh {input.db1} {input.db2} {output} --min-seq-id 0.9 --threads {threads} && date) &> {log}"
 
 # rule analysis_mmseqs2_m8_convert:
 #     input:
@@ -300,11 +293,10 @@ rule mash_screen_contigs:
 #     output:
 #         os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}.m8")
 #     log:
-#         out="logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
-#         err="logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
+#         "logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
 #     conda:
 #         os.path.join(ENV_DIR, "cd-hit.yaml")
 #     message:
 #         "Create MMseqs2 compare: {input}"
 #     shell:
-#         "(date && mmseqs convertalis {input.db1} {input.db2} {input.rbh} {output} && date) 2> {log.err} > {log.out}"
+#         "(date && mmseqs convertalis {input.db1} {input.db2} {input.rbh} {output} && date) &> {log}"
diff --git a/workflow/steps/analysis.smk b/workflow/steps/analysis.smk
index 4f07f73..afe736d 100644
--- a/workflow/steps/analysis.smk
+++ b/workflow/steps/analysis.smk
@@ -3,56 +3,33 @@
 include:
     "../rules/analysis.smk"
 
-# NOTE: Using "shell: touch ..." to avoid the rule from being autodetected as `localrule`.
-#       This is needed so that an email can be sent upon event changes for this rule.
-
 rule ANALYSIS:
     input:
-        "status/analysis_assembly.done",
-        "status/analysis_proteins.done",
-        "status/analysis_circ.done",
-        "status/analysis_mash.done"
-    output:
-        touch("status/analysis.done")
-
-rule ANALYSIS_ASSEMBLY:
-    input:
+        # quast
         expand(
             os.path.join(RESULTS_DIR, "analysis/quast/{rtype_tool}/report.tsv"),
             rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
-        )
-    output:
-        touch("status/analysis_assembly.done")
-
-rule ANALYSIS_PROTEINS:
-    input:
+        ) if "quast" in config["steps_analysis"] else [],
+        # cdhit
         expand(
             os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"),
             combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS]
-        ),
+        ) if "cdhit" in config["steps_analysis"] else [],
         expand(
             os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"),
             combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]
-        )
-    output:
-        touch("status/analysis_proteins.done")
-
-rule ANALYSIS_CIRC:
-    input:
+        ) if "cdhit" in config["steps_analysis"] else [],
+        # blast, circ. contigs
         expand(
             os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype_tool}/ASSEMBLY.{ctype}.tsv"),
             rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
             ctype=["circ", "compl"]
-        )
-    output:
-        touch("status/analysis_circ.done")
-
-rule ANALYSIS_MASH:
-    input:
-        os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist"),
+        ) if "blast_circ" in config["steps_analysis"] else [],
+        # mash
+        [os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")] if "mash_dist" in config["steps_analysis"] else [],
         expand(
             os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"),
             rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
-        )
+        ) if "mash_screen" in config["steps_analysis"] else [],
     output:
-        touch("status/analysis_mash.done")
+        touch("status/analysis.done")
-- 
GitLab