Skip to content
Snippets Groups Projects
Commit 57bd9934 authored by Valentina Galata's avatar Valentina Galata
Browse files

cleanup: replace out/err log by log; changed params for metaquast

parent 5fbb942a
No related branches found
No related tags found
1 merge request!76Merge "cleanup" branch with "master" branch
...@@ -5,12 +5,11 @@ ...@@ -5,12 +5,11 @@
rule analysis_quast: rule analysis_quast:
input: input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
output: output:
os.path.join(RESULTS_DIR, "analysis/quast/{rtype}/{tool}/report.tsv") os.path.join(RESULTS_DIR, "analysis/quast/{rtype}/{tool}/report.tsv")
log: log:
out="logs/quast.{rtype}.{tool}.out.log", "logs/quast.{rtype}.{tool}.log"
err="logs/quast.{rtype}.{tool}.err.log"
wildcard_constraints: wildcard_constraints:
rtype="|".join(READ_TYPES), rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS) tool="|".join(ASSEMBLERS)
...@@ -19,9 +18,11 @@ rule analysis_quast: ...@@ -19,9 +18,11 @@ rule analysis_quast:
conda: conda:
os.path.join(ENV_DIR, "quast.yaml") os.path.join(ENV_DIR, "quast.yaml")
message: message:
"Assess assembly quality w/ QUAST: {input}" "metaQUAST: {input}"
shell: shell:
"(date && metaquast.py --max-ref-number 0 --threads {threads} {input} -o $(dirname {output}) && date) 2> {log.err} > {log.out}" "(date && "
"metaquast.py --max-ref-number 0 --min-contig 0 --contig-thresholds 0,1000,2000,5000 --threads {threads} {input} -o $(dirname {output}) && "
"date) &> {log}"
################################################## ##################################################
# Proteins # Proteins
...@@ -33,8 +34,7 @@ rule analysis_bbmap_rename: ...@@ -33,8 +34,7 @@ rule analysis_bbmap_rename:
output: output:
temp(os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype}_{tool}.faa")) temp(os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype}_{tool}.faa"))
log: log:
out="logs/analysis_bbmap_rename_{rtype}.{tool}.out.log", "logs/analysis_bbmap_rename_{rtype}.{tool}.log"
err="logs/analysis_bbmap_rename_{rtype}.{tool}.err.log"
wildcard_constraints: wildcard_constraints:
rtype="|".join(READ_TYPES), rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS) tool="|".join(ASSEMBLERS)
...@@ -48,7 +48,7 @@ rule analysis_bbmap_rename: ...@@ -48,7 +48,7 @@ rule analysis_bbmap_rename:
message: message:
"BBMAP: rename FASTA entries in {input}" "BBMAP: rename FASTA entries in {input}"
shell: shell:
"(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) 2> {log.err} > {log.out}" "(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date) &> {log}"
rule analysis_cdhit: rule analysis_cdhit:
input: input:
...@@ -58,8 +58,7 @@ rule analysis_cdhit: ...@@ -58,8 +58,7 @@ rule analysis_cdhit:
faa12=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"), faa12=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"),
faa21=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype2}_{tool2}__{rtype1}_{tool1}.faa") faa21=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype2}_{tool2}__{rtype1}_{tool1}.faa")
log: log:
out="logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log", "logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
err="logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
wildcard_constraints: wildcard_constraints:
rtype1="|".join(READ_TYPES), rtype1="|".join(READ_TYPES),
rtype2="|".join(READ_TYPES), rtype2="|".join(READ_TYPES),
...@@ -74,14 +73,14 @@ rule analysis_cdhit: ...@@ -74,14 +73,14 @@ rule analysis_cdhit:
"(date && " "(date && "
"cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && " "cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && "
"cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && " "cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && "
"date) 2> {log.err} > {log.out}" "date) &> {log}"
################################################## ##################################################
# Circular contigs # Circular contigs
rule circ_contigs_fasta: rule circ_contigs_fasta:
input: input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
output: output:
split1=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.1.fasta")), split1=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.1.fasta")),
split2=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.2.fasta")) split2=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.2.fasta"))
...@@ -123,8 +122,7 @@ rule circ_contigs_blastn: ...@@ -123,8 +122,7 @@ rule circ_contigs_blastn:
output: output:
os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.tsv") os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.tsv")
log: log:
out="logs/circ_blast.{rtype}.{tool}.out.log", "logs/circ_blast.{rtype}.{tool}.log"
err="logs/circ_blast.{rtype}.{tool}.err.log"
wildcard_constraints: wildcard_constraints:
rtype="|".join(READ_TYPES), rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS) tool="|".join(ASSEMBLERS)
...@@ -136,7 +134,7 @@ rule circ_contigs_blastn: ...@@ -136,7 +134,7 @@ rule circ_contigs_blastn:
shell: shell:
"(date && " "(date && "
"blastn -task megablast -num_alignments 50 -perc_identity 95 {params.outfmt} -query {input.fasta} -db {params.db} -out {output} && " "blastn -task megablast -num_alignments 50 -perc_identity 95 {params.outfmt} -query {input.fasta} -db {params.db} -out {output} && "
"date) 2> {log.err} > {log.out}" "date) &> {log}"
rule circ_contigs_filter: rule circ_contigs_filter:
input: input:
...@@ -174,18 +172,17 @@ rule circ_contigs_filter: ...@@ -174,18 +172,17 @@ rule circ_contigs_filter:
# Assemblies # Assemblies
rule mash_sketch: rule mash_sketch:
input: input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
output: output:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.msh") os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.msh")
log: log:
out="logs/mash.sketch.{rtype}.{tool}.out.log", "logs/mash.sketch.{rtype}.{tool}.log"
err="logs/mash.sketch.{rtype}.{tool}.err.log"
threads: threads:
1 1
conda: conda:
os.path.join(ENV_DIR, "mash.yaml") os.path.join(ENV_DIR, "mash.yaml")
shell: shell:
"(date && ofile={output} && mash sketch -k 31 -s 10000 -S 42 -o ${{ofile%.*}} {input} && date) 2> {log.err} > {log.out}" "(date && ofile={output} && mash sketch -k 31 -s 10000 -S 42 -o ${{ofile%.*}} {input} && date) &> {log}"
rule mash_sketch_paste: rule mash_sketch_paste:
input: input:
...@@ -207,8 +204,7 @@ rule mash_dist: ...@@ -207,8 +204,7 @@ rule mash_dist:
output: output:
os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist") os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")
log: log:
out="logs/mash.dist.contigs.out.log", "logs/mash.dist.contigs.log"
err="logs/mash.dist.contigs.err.log"
threads: threads:
config["mash"]["threads"] config["mash"]["threads"]
conda: conda:
...@@ -216,27 +212,26 @@ rule mash_dist: ...@@ -216,27 +212,26 @@ rule mash_dist:
shell: shell:
"(date && " "(date && "
"mash dist -t -p {threads} {input} {input} > {output} && " "mash dist -t -p {threads} {input} {input} > {output} && "
"date) 2> {log.err} > {log.out}" "date) &> {log}"
# Contigs # Contigs
rule mash_sketch_contigs: rule mash_sketch_contigs:
input: input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
output: output:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.contigs.msh") os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.contigs.msh")
log: log:
out="logs/mash.sketch.{rtype}.{tool}.contigs.out.log", "logs/mash.sketch.{rtype}.{tool}.contigs.log"
err="logs/mash.sketch.{rtype}.{tool}.contigs.err.log"
threads: threads:
1 1
conda: conda:
os.path.join(ENV_DIR, "mash.yaml") os.path.join(ENV_DIR, "mash.yaml")
shell: shell:
"(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date) 2> {log.err} > {log.out}" "(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date) &> {log}"
rule mash_screen_contigs: rule mash_screen_contigs:
input: input:
fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]), fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.FILTERED.fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]),
msh=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.contigs.msh"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]) msh=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.contigs.msh"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
output: output:
expand(os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]) expand(os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
...@@ -252,12 +247,11 @@ rule mash_screen_contigs: ...@@ -252,12 +247,11 @@ rule mash_screen_contigs:
# rule analysis_mmseqs2_db: # rule analysis_mmseqs2_db:
# input: # input:
# os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta") # os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.FILTERED.fasta")
# output: # output:
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/{rtype}/{tool}/ASSEMBLY_db") # os.path.join(RESULTS_DIR, "analysis/mmseqs2/{rtype}/{tool}/ASSEMBLY_db")
# log: # log:
# out="logs/mmseqs2_db.{rtype}.{tool}.out.log", # "logs/mmseqs2_db.{rtype}.{tool}.log"
# err="logs/mmseqs2_db.{rtype}.{tool}.err.log"
# wildcard_constraints: # wildcard_constraints:
# rtype="|".join(READ_TYPES), # rtype="|".join(READ_TYPES),
# tool="|".join(ASSEMBLERS) # tool="|".join(ASSEMBLERS)
...@@ -266,7 +260,7 @@ rule mash_screen_contigs: ...@@ -266,7 +260,7 @@ rule mash_screen_contigs:
# message: # message:
# "Create MMseqs2 DB from {input}" # "Create MMseqs2 DB from {input}"
# shell: # shell:
# "(date && mmseqs createdb {input} {output} && date) 2> {log.err} > {log.out}" # "(date && mmseqs createdb {input} {output} && date) &> {log}"
# rule analysis_mmseqs2_compare: # rule analysis_mmseqs2_compare:
# input: # input:
...@@ -275,8 +269,7 @@ rule mash_screen_contigs: ...@@ -275,8 +269,7 @@ rule mash_screen_contigs:
# output: # output:
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}") # os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}")
# log: # log:
# out="logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log", # "logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
# err="logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
# wildcard_constraints: # wildcard_constraints:
# rtype1="|".join(READ_TYPES), # rtype1="|".join(READ_TYPES),
# rtype2="|".join(READ_TYPES), # rtype2="|".join(READ_TYPES),
...@@ -290,7 +283,7 @@ rule mash_screen_contigs: ...@@ -290,7 +283,7 @@ rule mash_screen_contigs:
# "Create MMseqs2 compare: {input}" # "Create MMseqs2 compare: {input}"
# shell: # shell:
# # TODO: "mmseqs2_tmp" ??? (see old files) # # TODO: "mmseqs2_tmp" ??? (see old files)
# "(date && mmseqs rbh {input.db1} {input.db2} {output} --min-seq-id 0.9 --threads {threads} && date) 2> {log.err} > {log.out}" # "(date && mmseqs rbh {input.db1} {input.db2} {output} --min-seq-id 0.9 --threads {threads} && date) &> {log}"
# rule analysis_mmseqs2_m8_convert: # rule analysis_mmseqs2_m8_convert:
# input: # input:
...@@ -300,11 +293,10 @@ rule mash_screen_contigs: ...@@ -300,11 +293,10 @@ rule mash_screen_contigs:
# output: # output:
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}.m8") # os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}.m8")
# log: # log:
# out="logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log", # "logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
# err="logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
# conda: # conda:
# os.path.join(ENV_DIR, "cd-hit.yaml") # os.path.join(ENV_DIR, "cd-hit.yaml")
# message: # message:
# "Create MMseqs2 compare: {input}" # "Create MMseqs2 compare: {input}"
# shell: # shell:
# "(date && mmseqs convertalis {input.db1} {input.db2} {input.rbh} {output} && date) 2> {log.err} > {log.out}" # "(date && mmseqs convertalis {input.db1} {input.db2} {input.rbh} {output} && date) &> {log}"
...@@ -3,56 +3,33 @@ ...@@ -3,56 +3,33 @@
include: include:
"../rules/analysis.smk" "../rules/analysis.smk"
# NOTE: Using "shell: touch ..." to avoid the rule from being autodetected as `localrule`.
# This is needed so that an email can be sent upon event changes for this rule.
rule ANALYSIS: rule ANALYSIS:
input: input:
"status/analysis_assembly.done", # quast
"status/analysis_proteins.done",
"status/analysis_circ.done",
"status/analysis_mash.done"
output:
touch("status/analysis.done")
rule ANALYSIS_ASSEMBLY:
input:
expand( expand(
os.path.join(RESULTS_DIR, "analysis/quast/{rtype_tool}/report.tsv"), os.path.join(RESULTS_DIR, "analysis/quast/{rtype_tool}/report.tsv"),
rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS] rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
) ) if "quast" in config["steps_analysis"] else [],
output: # cdhit
touch("status/analysis_assembly.done")
rule ANALYSIS_PROTEINS:
input:
expand( expand(
os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"), os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"),
combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS] combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS]
), ) if "cdhit" in config["steps_analysis"] else [],
expand( expand(
os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"), os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"),
combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS] combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]
) ) if "cdhit" in config["steps_analysis"] else [],
output: # blast, circ. contigs
touch("status/analysis_proteins.done")
rule ANALYSIS_CIRC:
input:
expand( expand(
os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype_tool}/ASSEMBLY.{ctype}.tsv"), os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype_tool}/ASSEMBLY.{ctype}.tsv"),
rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS], rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
ctype=["circ", "compl"] ctype=["circ", "compl"]
) ) if "blast_circ" in config["steps_analysis"] else [],
output: # mash
touch("status/analysis_circ.done") [os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")] if "mash_dist" in config["steps_analysis"] else [],
rule ANALYSIS_MASH:
input:
os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist"),
expand( expand(
os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"),
rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS] rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
) ) if "mash_screen" in config["steps_analysis"] else [],
output: output:
touch("status/analysis_mash.done") touch("status/analysis.done")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment