Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
ONT_pilot_gitlab
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Contributor analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ESB
ONT_pilot_gitlab
Commits
57bd9934
Commit
57bd9934
authored
4 years ago
by
Valentina Galata
Browse files
Options
Downloads
Patches
Plain Diff
cleanup: replace out/err log by log; changed params for metaquast
parent
5fbb942a
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!76
Merge "cleanup" branch with "master" branch
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
workflow/rules/analysis.smk
+29
-37
29 additions, 37 deletions
workflow/rules/analysis.smk
workflow/steps/analysis.smk
+11
-34
11 additions, 34 deletions
workflow/steps/analysis.smk
with
40 additions
and
71 deletions
workflow/rules/analysis.smk
+
29
−
37
View file @
57bd9934
...
@@ -5,12 +5,11 @@
...
@@ -5,12 +5,11 @@
rule analysis_quast:
rule analysis_quast:
input:
input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.
FILTERED.
fasta")
output:
output:
os.path.join(RESULTS_DIR, "analysis/quast/{rtype}/{tool}/report.tsv")
os.path.join(RESULTS_DIR, "analysis/quast/{rtype}/{tool}/report.tsv")
log:
log:
out="logs/quast.{rtype}.{tool}.out.log",
"logs/quast.{rtype}.{tool}.log"
err="logs/quast.{rtype}.{tool}.err.log"
wildcard_constraints:
wildcard_constraints:
rtype="|".join(READ_TYPES),
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
tool="|".join(ASSEMBLERS)
...
@@ -19,9 +18,11 @@ rule analysis_quast:
...
@@ -19,9 +18,11 @@ rule analysis_quast:
conda:
conda:
os.path.join(ENV_DIR, "quast.yaml")
os.path.join(ENV_DIR, "quast.yaml")
message:
message:
"
Assess assembly quality w/
QUAST: {input}"
"
meta
QUAST: {input}"
shell:
shell:
"(date && metaquast.py --max-ref-number 0 --threads {threads} {input} -o $(dirname {output}) && date) 2> {log.err} > {log.out}"
"(date && "
"metaquast.py --max-ref-number 0 --min-contig 0 --contig-thresholds 0,1000,2000,5000 --threads {threads} {input} -o $(dirname {output}) && "
"date) &> {log}"
##################################################
##################################################
# Proteins
# Proteins
...
@@ -33,8 +34,7 @@ rule analysis_bbmap_rename:
...
@@ -33,8 +34,7 @@ rule analysis_bbmap_rename:
output:
output:
temp(os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype}_{tool}.faa"))
temp(os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype}_{tool}.faa"))
log:
log:
out="logs/analysis_bbmap_rename_{rtype}.{tool}.out.log",
"logs/analysis_bbmap_rename_{rtype}.{tool}.log"
err="logs/analysis_bbmap_rename_{rtype}.{tool}.err.log"
wildcard_constraints:
wildcard_constraints:
rtype="|".join(READ_TYPES),
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
tool="|".join(ASSEMBLERS)
...
@@ -48,7 +48,7 @@ rule analysis_bbmap_rename:
...
@@ -48,7 +48,7 @@ rule analysis_bbmap_rename:
message:
message:
"BBMAP: rename FASTA entries in {input}"
"BBMAP: rename FASTA entries in {input}"
shell:
shell:
"(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date)
2> {log.err}
> {log
.out
}"
"(date && rename.sh in={input} out={output} prefix={params.prefix} ignorejunk=t && date)
&
> {log}"
rule analysis_cdhit:
rule analysis_cdhit:
input:
input:
...
@@ -58,8 +58,7 @@ rule analysis_cdhit:
...
@@ -58,8 +58,7 @@ rule analysis_cdhit:
faa12=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"),
faa12=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1}_{tool1}__{rtype2}_{tool2}.faa"),
faa21=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype2}_{tool2}__{rtype1}_{tool1}.faa")
faa21=os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype2}_{tool2}__{rtype1}_{tool1}.faa")
log:
log:
out="logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
"logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
err="logs/analysis_cdhit.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
wildcard_constraints:
wildcard_constraints:
rtype1="|".join(READ_TYPES),
rtype1="|".join(READ_TYPES),
rtype2="|".join(READ_TYPES),
rtype2="|".join(READ_TYPES),
...
@@ -74,14 +73,14 @@ rule analysis_cdhit:
...
@@ -74,14 +73,14 @@ rule analysis_cdhit:
"(date && "
"(date && "
"cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && "
"cd-hit-2d -i {input.faa1} -i2 {input.faa2} -o {output.faa12} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && "
"cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && "
"cd-hit-2d -i {input.faa2} -i2 {input.faa1} -o {output.faa21} -c 0.9 -n 5 -d 0 -M 16000 -T 8 && "
"date)
2> {log.err}
> {log
.out
}"
"date)
&
> {log}"
##################################################
##################################################
# Circular contigs
# Circular contigs
rule circ_contigs_fasta:
rule circ_contigs_fasta:
input:
input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.
FILTERED.
fasta")
output:
output:
split1=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.1.fasta")),
split1=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.1.fasta")),
split2=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.2.fasta"))
split2=temp(os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.split.2.fasta"))
...
@@ -123,8 +122,7 @@ rule circ_contigs_blastn:
...
@@ -123,8 +122,7 @@ rule circ_contigs_blastn:
output:
output:
os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.tsv")
os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype}/{tool}/ASSEMBLY.tsv")
log:
log:
out="logs/circ_blast.{rtype}.{tool}.out.log",
"logs/circ_blast.{rtype}.{tool}.log"
err="logs/circ_blast.{rtype}.{tool}.err.log"
wildcard_constraints:
wildcard_constraints:
rtype="|".join(READ_TYPES),
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
tool="|".join(ASSEMBLERS)
...
@@ -136,7 +134,7 @@ rule circ_contigs_blastn:
...
@@ -136,7 +134,7 @@ rule circ_contigs_blastn:
shell:
shell:
"(date && "
"(date && "
"blastn -task megablast -num_alignments 50 -perc_identity 95 {params.outfmt} -query {input.fasta} -db {params.db} -out {output} && "
"blastn -task megablast -num_alignments 50 -perc_identity 95 {params.outfmt} -query {input.fasta} -db {params.db} -out {output} && "
"date)
2> {log.err}
> {log
.out
}"
"date)
&
> {log}"
rule circ_contigs_filter:
rule circ_contigs_filter:
input:
input:
...
@@ -174,18 +172,17 @@ rule circ_contigs_filter:
...
@@ -174,18 +172,17 @@ rule circ_contigs_filter:
# Assemblies
# Assemblies
rule mash_sketch:
rule mash_sketch:
input:
input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.
FILTERED.
fasta")
output:
output:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.msh")
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.msh")
log:
log:
out="logs/mash.sketch.{rtype}.{tool}.out.log",
"logs/mash.sketch.{rtype}.{tool}.log"
err="logs/mash.sketch.{rtype}.{tool}.err.log"
threads:
threads:
1
1
conda:
conda:
os.path.join(ENV_DIR, "mash.yaml")
os.path.join(ENV_DIR, "mash.yaml")
shell:
shell:
"(date && ofile={output} && mash sketch -k 31 -s 10000 -S 42 -o ${{ofile%.*}} {input} && date)
2> {log.err}
> {log
.out
}"
"(date && ofile={output} && mash sketch -k 31 -s 10000 -S 42 -o ${{ofile%.*}} {input} && date)
&
> {log}"
rule mash_sketch_paste:
rule mash_sketch_paste:
input:
input:
...
@@ -207,8 +204,7 @@ rule mash_dist:
...
@@ -207,8 +204,7 @@ rule mash_dist:
output:
output:
os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")
os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")
log:
log:
out="logs/mash.dist.contigs.out.log",
"logs/mash.dist.contigs.log"
err="logs/mash.dist.contigs.err.log"
threads:
threads:
config["mash"]["threads"]
config["mash"]["threads"]
conda:
conda:
...
@@ -216,27 +212,26 @@ rule mash_dist:
...
@@ -216,27 +212,26 @@ rule mash_dist:
shell:
shell:
"(date && "
"(date && "
"mash dist -t -p {threads} {input} {input} > {output} && "
"mash dist -t -p {threads} {input} {input} > {output} && "
"date)
2> {log.err}
> {log
.out
}"
"date)
&
> {log}"
# Contigs
# Contigs
rule mash_sketch_contigs:
rule mash_sketch_contigs:
input:
input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.
FILTERED.
fasta")
output:
output:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.contigs.msh")
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.contigs.msh")
log:
log:
out="logs/mash.sketch.{rtype}.{tool}.contigs.out.log",
"logs/mash.sketch.{rtype}.{tool}.contigs.log"
err="logs/mash.sketch.{rtype}.{tool}.contigs.err.log"
threads:
threads:
1
1
conda:
conda:
os.path.join(ENV_DIR, "mash.yaml")
os.path.join(ENV_DIR, "mash.yaml")
shell:
shell:
"(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date)
2> {log.err}
> {log
.out
}"
"(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date)
&
> {log}"
rule mash_screen_contigs:
rule mash_screen_contigs:
input:
input:
fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]),
fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.
FILTERED.
fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]),
msh=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.contigs.msh"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
msh=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.contigs.msh"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
output:
output:
expand(os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
expand(os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
...
@@ -252,12 +247,11 @@ rule mash_screen_contigs:
...
@@ -252,12 +247,11 @@ rule mash_screen_contigs:
# rule analysis_mmseqs2_db:
# rule analysis_mmseqs2_db:
# input:
# input:
# os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
# os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.
FILTERED.
fasta")
# output:
# output:
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/{rtype}/{tool}/ASSEMBLY_db")
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/{rtype}/{tool}/ASSEMBLY_db")
# log:
# log:
# out="logs/mmseqs2_db.{rtype}.{tool}.out.log",
# "logs/mmseqs2_db.{rtype}.{tool}.log"
# err="logs/mmseqs2_db.{rtype}.{tool}.err.log"
# wildcard_constraints:
# wildcard_constraints:
# rtype="|".join(READ_TYPES),
# rtype="|".join(READ_TYPES),
# tool="|".join(ASSEMBLERS)
# tool="|".join(ASSEMBLERS)
...
@@ -266,7 +260,7 @@ rule mash_screen_contigs:
...
@@ -266,7 +260,7 @@ rule mash_screen_contigs:
# message:
# message:
# "Create MMseqs2 DB from {input}"
# "Create MMseqs2 DB from {input}"
# shell:
# shell:
# "(date && mmseqs createdb {input} {output} && date)
2> {log.err}
> {log
.out
}"
# "(date && mmseqs createdb {input} {output} && date)
&
> {log}"
# rule analysis_mmseqs2_compare:
# rule analysis_mmseqs2_compare:
# input:
# input:
...
@@ -275,8 +269,7 @@ rule mash_screen_contigs:
...
@@ -275,8 +269,7 @@ rule mash_screen_contigs:
# output:
# output:
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}")
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}")
# log:
# log:
# out="logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
# "logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
# err="logs/mmseqs2.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
# wildcard_constraints:
# wildcard_constraints:
# rtype1="|".join(READ_TYPES),
# rtype1="|".join(READ_TYPES),
# rtype2="|".join(READ_TYPES),
# rtype2="|".join(READ_TYPES),
...
@@ -290,7 +283,7 @@ rule mash_screen_contigs:
...
@@ -290,7 +283,7 @@ rule mash_screen_contigs:
# "Create MMseqs2 compare: {input}"
# "Create MMseqs2 compare: {input}"
# shell:
# shell:
# # TODO: "mmseqs2_tmp" ??? (see old files)
# # TODO: "mmseqs2_tmp" ??? (see old files)
# "(date && mmseqs rbh {input.db1} {input.db2} {output} --min-seq-id 0.9 --threads {threads} && date)
2> {log.err}
> {log
.out
}"
# "(date && mmseqs rbh {input.db1} {input.db2} {output} --min-seq-id 0.9 --threads {threads} && date)
&
> {log}"
# rule analysis_mmseqs2_m8_convert:
# rule analysis_mmseqs2_m8_convert:
# input:
# input:
...
@@ -300,11 +293,10 @@ rule mash_screen_contigs:
...
@@ -300,11 +293,10 @@ rule mash_screen_contigs:
# output:
# output:
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}.m8")
# os.path.join(RESULTS_DIR, "analysis/mmseqs2/comparison/{rtype1}_{tool1}__{rtype2}_{tool2}.m8")
# log:
# log:
# out="logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
# "logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.log"
# err="logs/mmseqs2_convert.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
# conda:
# conda:
# os.path.join(ENV_DIR, "cd-hit.yaml")
# os.path.join(ENV_DIR, "cd-hit.yaml")
# message:
# message:
# "Create MMseqs2 compare: {input}"
# "Create MMseqs2 compare: {input}"
# shell:
# shell:
# "(date && mmseqs convertalis {input.db1} {input.db2} {input.rbh} {output} && date)
2> {log.err}
> {log
.out
}"
# "(date && mmseqs convertalis {input.db1} {input.db2} {input.rbh} {output} && date)
&
> {log}"
This diff is collapsed.
Click to expand it.
workflow/steps/analysis.smk
+
11
−
34
View file @
57bd9934
...
@@ -3,56 +3,33 @@
...
@@ -3,56 +3,33 @@
include:
include:
"../rules/analysis.smk"
"../rules/analysis.smk"
# NOTE: Using "shell: touch ..." to avoid the rule from being autodetected as `localrule`.
# This is needed so that an email can be sent upon event changes for this rule.
rule ANALYSIS:
rule ANALYSIS:
input:
input:
"status/analysis_assembly.done",
# quast
"status/analysis_proteins.done",
"status/analysis_circ.done",
"status/analysis_mash.done"
output:
touch("status/analysis.done")
rule ANALYSIS_ASSEMBLY:
input:
expand(
expand(
os.path.join(RESULTS_DIR, "analysis/quast/{rtype_tool}/report.tsv"),
os.path.join(RESULTS_DIR, "analysis/quast/{rtype_tool}/report.tsv"),
rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
)
) if "quast" in config["steps_analysis"] else [],
output:
# cdhit
touch("status/analysis_assembly.done")
rule ANALYSIS_PROTEINS:
input:
expand(
expand(
os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"),
os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"),
combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS]
combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS]
),
)
if "cdhit" in config["steps_analysis"] else []
,
expand(
expand(
os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"),
os.path.join(RESULTS_DIR, "analysis/cdhit/{combi}.faa"),
combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]
combi=["%s_%s__%s_%s" % (p[1][0], p[1][1], p[0][0], p[0][1]) for p in READ_ASSEMBLER_PAIRS]
)
) if "cdhit" in config["steps_analysis"] else [],
output:
# blast, circ. contigs
touch("status/analysis_proteins.done")
rule ANALYSIS_CIRC:
input:
expand(
expand(
os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype_tool}/ASSEMBLY.{ctype}.tsv"),
os.path.join(RESULTS_DIR, "analysis/circ_blast/{rtype_tool}/ASSEMBLY.{ctype}.tsv"),
rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
ctype=["circ", "compl"]
ctype=["circ", "compl"]
)
) if "blast_circ" in config["steps_analysis"] else [],
output:
# mash
touch("status/analysis_circ.done")
[os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")] if "mash_dist" in config["steps_analysis"] else [],
rule ANALYSIS_MASH:
input:
os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist"),
expand(
expand(
os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"),
os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"),
rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
)
)
if "mash_screen" in config["steps_analysis"] else [],
output:
output:
touch("status/analysis
_mash
.done")
touch("status/analysis.done")
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment