From 57bd993461b1d87944a004bdbfa9052e7259d75c Mon Sep 17 00:00:00 2001 From: Valentina Galata <valentina.galata@uni.lu> Date: Thu, 23 Jul 2020 09:59:41 +0200 Subject: [PATCH] cleanup: replace out/err log by log; changed params for metaquast --- workflow/rules/analysis.smk | 66 ++++++++++++++++--------------------- workflow/steps/analysis.smk | 45 +++++++------------------ 2 files changed, 40 insertions(+), 71 deletions(-) diff --git a/workflow/rules/analysis.smk b/workflow/rules/analysis.smk index e05cd28..239f7a8 100644 --- a/workflow/rules/analysis.smk +++ b/workflow/rules/analysis.smk @@ -5,12 +5,11 @@ rule analysis_quast: input: - os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") + os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta") output: os.path.join(RESULTS_DIR, "analysis/quast/{rtype}/{tool}/report.tsv") log: - out="logs/quast.{rtype}.{tool}.out.log", - err="logs/quast.{rtype}.{tool}.err.log" + "logs/quast.{rtype}.{tool}.log" wildcard_constraints: rtype="|".join(READ_TYPES), tool="|".join(ASSEMBLERS) @@ -19,9 +18,11 @@ rule analysis_quast: conda: os.path.join(ENV_DIR, "quast.yaml") message: - "Assess assembly quality w/ QUAST: {input}" + "metaQUAST: {input}" shell: - "(date && metaquast.py --max-ref-number 0 --threads {threads} {input} -o $(dirname {output}) && date) 2> {log.err} > {log.out}" + "(date && " + "metaquast.py --max-ref-number 0 --min-contig 0 --contig-thresholds 0,1000,2000,5000 --threads {threads} {input} -o $(dirname {output}) && " + "date) &> {log}" ################################################## # Proteins @@ -33,8 +34,7 @@ rule analysis_bbmap_rename: output: temp(os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype}_{tool}.faa")) log: - out="logs/analysis_bbmap_rename_{rtype}.{tool}.out.log", - err="logs/analysis_bbmap_rename_{rtype}.{tool}.err.log" + "logs/analysis_bbmap_rename_{rtype}.{tool}.log" wildcard_constraints: rtype="|".join(READ_TYPES), tool="|".join(ASSEMBLERS) @@ -48,7 +48,7 @@ rule analysis_bbmap_rename: message: "BBMAP: rename FASTA entries in {input}" shell: - "(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) 2> {log.err} > {log.out}" + "(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) &> {log}" rule analysis_cdhit: input: @@ -58,8 +58,7 @@ rule analysis_cdhit: faa12=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"), faa21=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype2}_{tool2}__{rtype1}_{tool1}.faa") log: - out="logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log", - err="logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log" + "logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.log" wildcard_constraints: rtype1="|".join(READ_TYPES), rtype2="|".join(READ_TYPES), @@ -74,14 +73,14 @@ rule analysis_cdhit: "(date && " "cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && " "cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && " - "date) 2> {log.err} > {log.out}" + "date) &> {log}" ################################################## # Circular contigs rule circ_contigs_fasta: input: - os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") + os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta") output: split1=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.1.fasta")), split2=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.2.fasta")) @@ -123,8 +122,7 @@ rule circ_contigs_blastn: output: os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.tsv") log: - out="logs/circ_blast.{rtype}.{tool}.out.log", - err="logs/circ_blast.{rtype}.{tool}.err.log" + "logs/circ_blast.{rtype}.{tool}.log" wildcard_constraints: rtype="|".join(READ_TYPES), tool="|".join(ASSEMBLERS) @@ -136,7 +134,7 @@ rule circ_contigs_blastn: shell: "(date && " "blastn -task megablast -num_alignments 50 -perc_identity 95 {params.outfmt} -query {input.fasta} -db {params.db} -out {output} && " - "date) 2> {log.err} > {log.out}" + "date) &> {log}" rule circ_contigs_filter: input: @@ -174,18 +172,17 @@ rule circ_contigs_filter: # Assemblies rule mash_sketch: input: - os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") + os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta") output: os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.msh") log: - out="logs/mash.sketch.{rtype}.{tool}.out.log", - err="logs/mash.sketch.{rtype}.{tool}.err.log" + "logs/mash.sketch.{rtype}.{tool}.log" threads: 1 conda: os.path.join(ENV_DIR, "mash.yaml") shell: - "(date && ofile={output} && mash sketch -k 31 -s 10000 -S 42 -o ${{ofile%.*}} {input} && date) 2> {log.err} > {log.out}" + "(date && ofile={output} && mash sketch -k 31 -s 10000 -S 42 -o ${{ofile%.*}} {input} && date) &> {log}" rule mash_sketch_paste: input: @@ -207,8 +204,7 @@ rule mash_dist: output: os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist") log: - out="logs/mash.dist.contigs.out.log", - err="logs/mash.dist.contigs.err.log" + "logs/mash.dist.contigs.log" threads: config["mash"]["threads"] conda: @@ -216,27 +212,26 @@ rule mash_dist: shell: "(date && " "mash dist -t -p {threads} {input} {input} > {output} && " - "date) 2> {log.err} > {log.out}" + "date) &> {log}" # Contigs rule mash_sketch_contigs: input: - os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") + os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta") output: os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.contigs.msh") log: - out="logs/mash.sketch.{rtype}.{tool}.contigs.out.log", - err="logs/mash.sketch.{rtype}.{tool}.contigs.err.log" + "logs/mash.sketch.{rtype}.{tool}.contigs.log" threads: 1 conda: os.path.join(ENV_DIR, "mash.yaml") shell: - "(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date) 2> {log.err} > {log.out}" + "(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date) &> {log}" rule mash_screen_contigs: input: - fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]), + fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.FILTERED.fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]), msh=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.contigs.msh"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]) output: expand(os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]) @@ -252,12 +247,11 @@ rule mash_screen_contigs: # rule analysis_mmseqs2_db: # input: -# os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") +# os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta") # output: # os.path.join(RESULTS_DIR, "analysis/mmseqs2/{rtype}/{tool}/ASSEMBLY_db") # log: -# out="logs/mmseqs2_db.{rtype}.{tool}.out.log", -# err="logs/mmseqs2_db.{rtype}.{tool}.err.log" +# "logs/mmseqs2_db.{rtype}.{tool}.log" # wildcard_constraints: # rtype="|".join(READ_TYPES), # tool="|".join(ASSEMBLERS) @@ -266,7 +260,7 @@ rule mash_screen_contigs: # message: # "Create MMseqs2 DB from {input}" # shell: -# "(date && mmseqs createdb {input} {output} && date) 2> {log.err} > {log.out}" +# "(date && mmseqs createdb {input} {output} && date) &> {log}" # rule analysis_mmseqs2_compare: # input: @@ -275,8 +269,7 @@ rule mash_screen_contigs: # output: # os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}") # log: -# out="logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log", -# err="logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log" +# "logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.log" # wildcard_constraints: # rtype1="|".join(READ_TYPES), # rtype2="|".join(READ_TYPES), @@ -290,7 +283,7 @@ rule mash_screen_contigs: # "Create MMseqs2 compare: {input}" # shell: # # TODO: "mmseqs2_tmp" ??? (see old files) -# "(date && mmseqs rbh {input.db1} {input.db2} {output} --min-seq-id 0.9 --threads {threads} && date) 2> {log.err} > {log.out}" +# "(date && mmseqs rbh {input.db1} {input.db2} {output} --min-seq-id 0.9 --threads {threads} && date) &> {log}" # rule analysis_mmseqs2_m8_convert: # input: @@ -300,11 +293,10 @@ rule mash_screen_contigs: # output: # os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}.m8") # log: -# out="logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log", -# err="logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log" +# "logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.log" # conda: # os.path.join(ENV_DIR, "cd-hit.yaml") # message: # "Create MMseqs2 compare: {input}" # shell: -# "(date && mmseqs convertalis {input.db1} {input.db2} {input.rbh} {output} && date) 2> {log.err} > {log.out}" +# "(date && mmseqs convertalis {input.db1} {input.db2} {input.rbh} {output} && date) &> {log}" diff --git a/workflow/steps/analysis.smk b/workflow/steps/analysis.smk index 4f07f73..afe736d 100644 --- a/workflow/steps/analysis.smk +++ b/workflow/steps/analysis.smk @@ -3,56 +3,33 @@ include: "../rules/analysis.smk" -# NOTE: Using "shell: touch ..." to avoid the rule from being autodetected as `localrule`. -# This is needed so that an email can be sent upon event changes for this rule. - rule ANALYSIS: input: - "status/analysis_assembly.done", - "status/analysis_proteins.done", - "status/analysis_circ.done", - "status/analysis_mash.done" - output: - touch("status/analysis.done") - -rule ANALYSIS_ASSEMBLY: - input: + # quast expand( os.path.join(RESULTS_DIR, "analysis/quast/{rtype_tool}/report.tsv"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS] - ) - output: - touch("status/analysis_assembly.done") - -rule ANALYSIS_PROTEINS: - input: + ) if "quast" in config["steps_analysis"] else [], + # cdhit expand( os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"), combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS] - ), + ) if "cdhit" in config["steps_analysis"] else [], expand( os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"), combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS] - ) - output: - touch("status/analysis_proteins.done") - -rule ANALYSIS_CIRC: - input: + ) if "cdhit" in config["steps_analysis"] else [], + # blast, circ. contigs expand( os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype_tool}/ASSEMBLY.{ctype}.tsv"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS], ctype=["circ", "compl"] - ) - output: - touch("status/analysis_circ.done") - -rule ANALYSIS_MASH: - input: - os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist"), + ) if "blast_circ" in config["steps_analysis"] else [], + # mash + [os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")] if "mash_dist" in config["steps_analysis"] else [], expand( os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS] - ) + ) if "mash_screen" in config["steps_analysis"] else [], output: - touch("status/analysis_mash.done") + touch("status/analysis.done") -- GitLab