Skip to content
Snippets Groups Projects
Commit 04c4ddd4 authored by Valentina Galata's avatar Valentina Galata
Browse files

added mash screen analysis (issue #64)

parent e12bfc99
No related branches found
No related tags found
1 merge request!76Merge "cleanup" branch with "master" branch
......@@ -171,6 +171,7 @@ rule circ_contigs_filter:
##################################################
# Mash
# Assemblies
rule mash_sketch:
input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
......@@ -217,6 +218,35 @@ rule mash_dist:
"mash dist -t -p {threads} {input} {input} > {output} && "
"date) 2> {log.err} > {log.out}"
# Contigs
rule mash_sketch_contigs:
input:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.fasta")
output:
os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.contigs.msh")
log:
out="logs/mash.sketch.{rtype}.{tool}.contigs.out.log",
err="logs/mash.sketch.{rtype}.{tool}.contigs.err.log"
threads:
1
conda:
os.path.join(ENV_DIR, "mash.yaml")
shell:
"(date && ofile={output} && mash sketch -i -k 31 -s 1000 -S 42 -o ${{ofile%.*}} {input} && date) 2> {log.err} > {log.out}"
rule mash_screen_contigs:
input:
fasta=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.fasta"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]),
msh=expand(os.path.join(RESULTS_DIR, "assembly/{rtype_tool}/ASSEMBLY.contigs.msh"), rtype_tool=["%s/%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
output:
expand(os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"), rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
threads:
config["mash"]["threads"]
conda:
os.path.join(ENV_DIR, "mash.yaml")
script:
os.path.join(SRC_DIR, "mash_screen_contigs.py")
##################################################
# MMseqs2
......
#!/usr/bin/env python
# NOTE:
# mash screen tutorial: https://mash.readthedocs.io/en/latest/tutorials.html#screening-a-read-set-for-containment-of-refseq-genomes
# columns: identity, shared-hashes, median-multiplicity, p-value, query-ID, query-comment
import subprocess
for i in range(0, len(snakemake.input.fasta)):
fa = snakemake.input.fasta[:i] + snakemake.input.fasta[i+1:]
print("mash screen: {} in {}".format(fa, snakemake.input.msh[i]))
subprocess.check_call(
"mash screen -p {p} -i -1 {m} {f} > {o}".format(
p=snakemake.threads,
m=snakemake.input.msh[i],
f=" ".join(fa),
o=snakemake.output[i]
),
shell=True
)
\ No newline at end of file
......@@ -49,6 +49,10 @@ rule ANALYSIS_CIRC:
rule ANALYSIS_MASH:
input:
os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist")
os.path.join(RESULTS_DIR, "analysis/mash/contigs.dist"),
expand(
os.path.join(RESULTS_DIR, "analysis/mash/screen.{rtype_tool}.tsv"),
rtype_tool=["%s.%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
)
output:
touch("status/analysis_mash.done")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment