Skip to content
Snippets Groups Projects

Add zymo workflow

Merged Valentina Galata requested to merge zymo into bright_future
4 files
+ 431
376
Compare changes
  • Side-by-side
  • Inline
Files
4
+ 530
0
# Pipeline for extra analysis of the zymo data:
# to be used after the main workflow (i.e. workflow/) and the report workflow (i.e. workflow_report/)
#
# Example call:
# snakemake -s workflow_zymo/Snakefile --configfile config/zymo/config.yaml --use-conda --conda-prefix /scratch/users/vgalata/miniconda3/ONT_pilot --cores 1 -rpn
##############################
# MODULES
import os
import re
import pandas
from scripts.utils import assembler_pairs
##############################
# CONFIG
include:
"rules/init.smk"
##################################################
# RULES
##################################################
rule all:
input:
refs=os.path.join(RESULTS_DIR, "ref_genomes", "zymo.dmnd"),
kegg=os.path.join(DB_DIR, "kegg.tsv"),
mummer=expand(
os.path.join(RESULTS_DIR, "extra/zymo/mummer/{rtype_tool}.dnadiff.report"),
rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
),
diamond=expand(
os.path.join(RESULTS_DIR, "extra/zymo/diamond/{rtype_tool}.tsv"),
rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
),
summary=expand(
os.path.join(RESULTS_DIR, "extra/{rtype_tool}.genes.tsv"),
rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
),
summary_pdf=expand(
os.path.join(RESULTS_DIR, "extra/{rtype_tool}.genes.pdf"),
rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS]
)
#####
# expand(os.path.join(RESULTS_DIR, "analysis/sibeliaz/{combi}_output/blocks_coords.gff"),
# combi=["%s_%s__%s_%s" % (p[0][0], p[0][1], p[1][0], p[1][1]) for p in READ_ASSEMBLER_PAIRS]),
# expand(os.path.join(RESULTS_DIR, "analysis/zymo_sibeliaz/zymo_{rtype_tool}_output/blocks_coords.gff"),
# rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS])
rule zymo_download:
output:
os.path.join(RESULTS_DIR, "ref_genomes", "{org}.fna")
log:
"logs/zymo.download.{org}.log"
params:
url=lambda wildcards: config["ref_genomes"][wildcards.org]
message:
"Zymo: download reference: {wildcards.org}"
shell:
"(date && wget {params.url} -O {output} && date) &> {log}"
rule zymo_prodigal:
input:
rules.zymo_download.output
output:
faa=os.path.join(RESULTS_DIR, "ref_genomes", "{org}.faa"),
gbk=os.path.join(RESULTS_DIR, "ref_genomes", "{org}.gbk")
log:
"logs/zymo.prodigal.{org}.log"
conda:
os.path.join(ENV_DIR, "prodigal.yaml")
message:
"Zymo: predict proteins: {wildcards.org}"
shell:
"(date && prodigal -i {input} -o {output.gbk} -a {output.faa} && sed -i 's/^>/>{wildcards.org}__/' {output.faa} && date) &> {log}"
rule zymo_concat_fna:
input:
expand(os.path.join(RESULTS_DIR, "ref_genomes", "{org}.fna"), org=config["ref_genomes"].keys())
output:
os.path.join(RESULTS_DIR, "ref_genomes", "zymo.fna")
message:
"Zymo: concat references"
shell:
"for f in {input}; do sed \"s/^>/>$(basename -s '.fna' ${{f}})__/\" ${{f}}; done > {output}"
rule zymo_concat_faa:
input:
expand(os.path.join(RESULTS_DIR, "ref_genomes", "{org}.faa"), org=config["ref_genomes"].keys())
output:
os.path.join(RESULTS_DIR, "ref_genomes", "zymo.faa")
message:
"Zymo: concat reference proteins"
shell:
"cat {input} > {output}"
rule zymo_diamondDB:
input:
rules.zymo_concat_faa.output
output:
os.path.join(RESULTS_DIR, "ref_genomes", "zymo.dmnd")
log:
"logs/zymo.diamondDB.log"
threads:
config["diamond"]["threads"]
conda:
os.path.join(ENV_DIR, "diamond.yaml")
message:
"Zymo: reference DIAMOND DB"
shell:
"(date && db={output} && diamond makedb --in {input} --db ${{db%.*}} --threads {threads} && date) &> {log}"
rule zymo_diamond:
input:
faa=os.path.join(RESULTS_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa"),
db=rules.zymo_diamondDB.output
output:
daa=os.path.join(RESULTS_DIR, "extra/zymo/diamond/{rtype}_{tool}.daa"),
tsv=os.path.join(RESULTS_DIR, "extra/zymo/diamond/{rtype}_{tool}.tsv")
log:
"logs/zymo.diamond.{rtype}.{tool}.log",
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
threads:
config["diamond"]["threads"]
params:
blastp="--id 50 --query-cover 50 --subject-cover 50 --more-sensitive",
outfmt="6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen"
conda:
os.path.join(ENV_DIR, "diamond.yaml")
message:
"Zymo: DIAMOND search: {input}"
shell:
"(date && "
"db={input.db} && "
"daa={output.daa} && "
"diamond blastp -q {input.faa} --db ${{db%.*}} --out ${{daa}} --threads {threads} --outfmt 100 {params.blastp} && "
# --max-target-seqs 1: best N matches that pass the e-value treshold
"diamond view --daa ${{daa%.*}} --max-target-seqs 1 --threads {threads} --outfmt {params.outfmt} --out {output.tsv} && "
"date) &> {log}"
rule zymo_mummer_dnadiff:
input:
ref=rules.zymo_concat_fna.output,
asm=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.POLISHED.fasta")
output:
out=os.path.join(RESULTS_DIR, "extra/zymo/mummer/{rtype}_{tool}.dnadiff.report"),
log:
"logs/zymo.mummer.dnadiff.{rtype}.{tool}.log"
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
threads: 1
conda:
os.path.join(ENV_DIR, "mummer.yaml")
message:
"Zymo: MUMMER: dnadiff: {input}"
shell:
"(date && "
"pout={output.out} && "
"dnadiff -p ${{pout%.*}} {input.ref} {input.asm} && "
"date) &> {log}"
rule proc_hmmer_tblout:
input:
os.path.join(RESULTS_DIR, "annotation/hmm/kegg/{rtype}/{tool}/hmm.tblout")
output:
temp(os.path.join(RESULTS_DIR, "annotation/hmm/kegg/{rtype}/{tool}/hmm.tsv"))
shell:
"grep -v '^#' {input} | sed 's/\s\+/\\t/g' | cut -f1-18 > {output}"
rule kegg_ko_ids:
input:
os.path.join(DB_DIR, config["hmm"]["kegg"])
output:
os.path.join(DB_DIR, "kegg.ids")
shell:
"grep \"^NAME\" {input} | sed 's/^NAME\s\+//' > {output}"
rule kegg_api_ko:
input:
os.path.join(DB_DIR, "kegg.ids")
output:
os.path.join(DB_DIR, "kegg.tsv")
run:
def query_ko(kos, concat=False):
# TODO: move to utils.py
# https://www.kegg.jp/kegg/rest/keggapi.html
# GET: The input is limited up to 10 entries
import re
import requests
from time import sleep
from random import random
assert len(kos) <= 10 and len(kos) > 0, "Number of KOs must be between 1 and 10, i.e. not {}".format(len(KO))
for ko in kos:
assert re.match("K", ko), f"KO does not start with a K: {ko}"
kegg_info = dict()
for ko in kos:
kegg_info[ko] = {"kegg_acc": ko, "kegg_name": "NA", "kegg_def": "NA", "kegg_pathways": set()}
s = requests.Session()
s_get = s.get("http://rest.kegg.jp/get/{kos}".format(kos="+".join(kos)))
assert s_get.status_code == 200, f"Could not get data"
s_pws = s.get("http://rest.kegg.jp/link/pathway/{kos}".format(kos="+".join(kos)))
assert s_pws.status_code == 200, f"Could not get linked pathways"
ko = None
for line in s_get.text.splitlines():
line = line.strip()
# if re.match("///", line): # new entry
# ko = None
if re.match("ENTRY", line):
ko = re.sub("\s+","\t", line).split("\t")[1]
assert ko in kegg_info
if re.match("NAME", line):
assert kegg_info[ko]["kegg_name"] == "NA"
kegg_info[ko]["kegg_name"] = re.sub("^NAME\s+", "", line)
elif re.match("DEFINITION", line):
assert kegg_info[ko]["kegg_def"] == "NA"
kegg_info[ko]["kegg_def"] = re.sub("^DEFINITION\s+", "", line)
for line in s_pws.text.split("\n"):
if line and re.search("path:map", line):
ko, pathway = line.split("\t")
ko = re.sub("^ko:","",ko)
assert ko in kegg_info
kegg_info[ko]["kegg_pathways"].add(pathway)
if concat:
for ko in kos:
kegg_info[ko]["kegg_pathways"] = ";".join(list(kegg_info[ko]["kegg_pathways"])) if len(kegg_info[ko]["kegg_pathways"]) > 0 else "NA"
# sleep for at least 1 sec
sleep(1 + random())
return kegg_info
with open(input[0], "r") as ifile, open(output[0], "w") as ofile:
header = ["kegg_acc", "kegg_name", "kegg_def", "kegg_pathways"]
ofile.write("\t".join(header) + "\n")
kos = []
for line in ifile:
if len(kos) == 10:
q = query_ko(kos=kos, concat=True)
for v in q.values():
ofile.write("\t".join([v[k] for k in header]) + "\n"); ofile.flush()
kos = []
ko = line.strip().split("_")[0]
kos.append(ko)
if len(kos) > 0:
q = query_ko(kos=kos, concat=True)
for v in q.values():
ofile.write("\t".join([v[k] for k in header]) + "\n"); ofile.flush()
rule assembly_gene_summary:
input:
# FASTAs
contigs=os.path.join(RESULTS_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.POLISHED.fasta"),
proteins=os.path.join(RESULTS_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa"),
# taxonomy
kraken2=expand(
os.path.join(RESULTS_DIR, "taxonomy/kraken2/{{rtype}}.{{tool}}.{db}.labels.txt"),
db=config["kraken2"]["db"].keys()
),
kaiju=expand(
os.path.join(RESULTS_DIR, "taxonomy/kaiju/{{rtype}}.{{tool}}.{db}.tsv"),
db=config["kaiju"]["db"].keys()
),
# coverage
genecov=expand(
os.path.join(RESULTS_DIR, "mapping/{mtype}/{{rtype}}/{{tool}}/ASSEMBLY.POLISHED.sr.cov.pergene"),
mtype=META_TYPES
),
# reference
refhits=os.path.join(RESULTS_DIR, "extra/zymo/diamond/{rtype}_{tool}.tsv"),
# unique/matches proteins
cdhit=lambda wildcards: expand(
os.path.join(RESULTS_DIR, "analysis/cdhit/{rtype1_tool1}__{{rtype}}_{{tool}}.faa"),
rtype1_tool1=["%s_%s" % (r,t) for r,t in READ_ASSEMBLERS if r != wildcards.rtype or t != wildcards.tool]
),
diamond=lambda wildcards: expand(
os.path.join(RESULTS_DIR, "analysis/diamond/{rtype1_tool1}__{{rtype}}_{{tool}}.tsv"),
rtype1_tool1=["%s_%s" % (r,t) for r,t in READ_ASSEMBLERS if r != wildcards.rtype or t != wildcards.tool]
),
kegg=os.path.join(RESULTS_DIR, "annotation/hmm/kegg/{rtype}/{tool}/hmm.tsv"),
kegg_info=os.path.join(DB_DIR, "kegg.tsv"),
output:
os.path.join(RESULTS_DIR, "extra/{rtype}_{tool}.genes.tsv")
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
message:
"Zymo: Gene summary: {wildcards.rtype}, {wildcards.tool}"
run:
from scripts.utils import parse_diamond_tsv, parse_prodigal_faa_headers
def parse_assembly_fna(ifile):
# TODO: move to utils.py
import re
import pandas
contigs = {}
with open(ifile, "r") as ifile:
cid = None
for line in ifile:
line = line.strip()
if re.match(">", line):
cid = re.sub("^>", "", line.split(" ")[0])
assert cid not in contigs
contigs[cid] = {"length": 0}
else:
contigs[cid]["length"] += len(line)
contigs = pandas.DataFrame.from_dict(contigs, orient='index')
return contigs
def parse_kaiju_labels(ifile, columns=["class_flag", "seq_id", "tax_id", "best_match_length", "best_match_tax_ids", "best_match_accs", "best_match_seq", "tax_lineage"]):
# TODO: move to utils.py
# https://github.com/bioinformatics-centre/kaiju#output-format
# either C or U, indicating whether the read is classified or unclassified.
# name of the read
# NCBI taxon identifier of the assigned taxon
# the length or score of the best match used for classification
# the taxon identifiers of all database sequences with the best match
# the accession numbers of all database sequences with the best match
# matching fragment sequence(s)
import pandas
df = pandas.read_csv(ifile, sep="\t", header=None, names=columns)
df = df[["class_flag", "seq_id", "tax_id", "tax_lineage"]]
df.set_index(["seq_id"], drop=True, inplace=True, verify_integrity=True)
return df
def parse_kraken2_labels(ifile, columns=["class_flag", "seq_id", "taxon", "seq_length", "matches"]):
# TODO: move to utils.py
# https://github.com/DerrickWood/kraken2/wiki/Manual#standard-kraken-output-format
# "C"/"U": a one letter code indicating that the sequence was either classified or unclassified.
# The sequence ID, obtained from the FASTA/FASTQ header.
# The taxonomy ID Kraken 2 used to label the sequence; this is 0 if the sequence is unclassified.
# The length of the sequence in bp.
# A space-delimited list indicating the LCA mapping of each k-mer in the sequence(s).
import pandas
df = pandas.read_csv(ifile, sep="\t", header=None, names=columns)
df = df.assign(tax_name=df["taxon"].apply(lambda x: re.search("(?P<tname>^.*) \(taxid (?P<tid>[0-9]+)\)$", x).group("tname")))
df = df.assign(tax_id=df["taxon"].apply(lambda x: re.search("(?P<tname>^.*) \(taxid (?P<tid>[0-9]+)\)$", x).group("tid")))
df.drop(columns=["matches", "taxon"], inplace=True)
df.set_index(["seq_id"], drop=True, inplace=True, verify_integrity=True)
return df
def parse_genecov_table(ifile):
# TODO: move to utils.py
import pandas
df = pandas.read_csv(ifile, sep="\t", header=0)
df.set_index(["prot_id"], drop=True, inplace=True, verify_integrity=True)
return df
def parse_hmmer_tblout(ifile, columns=["tname","tacc","qname","qacc","fs_evalue","fs_score","fs_bias","b1d_evalue","b1d_score","b1d_bias","exp","reg","clu","ov","env","dom","rep","inc"]):
# TODO: move to utils.py
import pandas
df = pandas.read_csv(ifile, sep="\t", header=None, names=columns)
df.drop(columns=["tacc","qacc","exp","reg","clu","ov","env","dom","rep","inc"], inplace=True)
# best hit per target
df = df.sort_values(by=["tname", "b1d_evalue"], axis=0, ascending=[True, True]).groupby("tname").head(1).reset_index(drop=True)
df.set_index(["tname"], drop=True, inplace=True, verify_integrity=True)
return df
# contig IDs and their length
contigs = parse_assembly_fna(input.contigs)
# print(contigs.head())
# genes/proteins
genes = parse_prodigal_faa_headers(input.proteins)
genes["gene_length"] = abs(genes["end"] - genes["start"]) + 1
genes = genes.assign(partial=genes["info"].apply(lambda x: not re.search(";partial=00;", x)))
# print(genes.head())
# gene cov
genecov = {}
for ifile in input.genecov:
mtype = os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(ifile))))
genecov[mtype] = parse_genecov_table(ifile)[["ave_cov"]]
# print(genecov[mtype].head())
# kraken2
kraken2 = {}
for ifile in input.kraken2:
db = os.path.basename(ifile).split(".")[2]
kraken2[db] = parse_kraken2_labels(ifile)
# print(kraken2[db].head())
# kaiju
kaiju = {}
for ifile in input.kaiju:
db = os.path.basename(ifile).split(".")[2]
kaiju[db] = parse_kaiju_labels(ifile)
# print(kaiju[db].head())
# hits to ref. genomes
refhits = parse_diamond_tsv(input.refhits)
refhits.set_index(["qseqid"], drop=True, inplace=True, verify_integrity=True)
# print(refhits.head())
# unique prot.s from cdhit
unique = {}
for ifile in input.cdhit:
tool2 = os.path.basename(ifile).split("__")[0].split("_")[1]
unique[tool2] = list(parse_prodigal_faa_headers(ifile)["prot_id"].apply(lambda x: "__".join(x.split("__")[2:])))
# print("cdhit: {}: {}: {}".format(tool2, len(unique[tool2]), unique[tool2][0:5]))
# matched prots. from diamond hits
matched = {}
for ifile in input.diamond:
tool2 = os.path.basename(ifile).split("__")[0].split("_")[1]
matched[tool2] = parse_diamond_tsv(ifile)
matched[tool2].set_index(["qseqid"], drop=True, inplace=True, verify_integrity=True)
# print(matched[tool2].head())
# kegg
kegg = parse_hmmer_tblout(input.kegg)
kegg_info = pandas.read_csv(input.kegg_info, sep="\t", header=0, index_col=0)
# print(kegg.head())
# summary
summary = genes.copy()
summary.set_index(["prot_id"], drop=False, inplace=True, verify_integrity=True)
summary["contig_length"] = summary["contig_id"].apply(lambda x: contigs.loc[x,"length"])
for k, v in genecov.items():
summary["ave_cov_%s" % k] = summary["prot_id"].apply(lambda x: v.loc[x,"ave_cov"])
for k, v in kraken2.items():
summary["kraken2_%s_name" % k] = summary["contig_id"].apply(lambda x: v.loc[x,"tax_name"])
summary["kraken2_%s_id" % k] = summary["contig_id"].apply(lambda x: v.loc[x,"tax_id"])
for k, v in kaiju.items():
summary["kaiju_%s_lineage" % k] = summary["prot_id"].apply(lambda x: v.loc[x,"tax_lineage"])
summary["kaiju_%s_id" % k] = summary["prot_id"].apply(lambda x: v.loc[x,"tax_id"])
summary["ref_sseqid"] = summary["prot_id"].apply(lambda x: refhits.loc[x,"sseqid"] if x in refhits.index else None)
summary["ref_pident"] = summary["prot_id"].apply(lambda x: refhits.loc[x,"pident"] if x in refhits.index else None)
summary["ref_qslratio"] = summary["prot_id"].apply(lambda x: refhits.loc[x,"qs_len_ratio"] if x in refhits.index else None)
summary["ref_qcov"] = summary["prot_id"].apply(lambda x: refhits.loc[x,"qcov"] if x in refhits.index else None)
summary["ref_scov"] = summary["prot_id"].apply(lambda x: refhits.loc[x,"scov"] if x in refhits.index else None)
for k, v in unique.items():
summary["cdhit_unique_%s" % k] = False
summary.loc[v,"cdhit_unique_%s" % k] = True
for k, v in matched.items():
summary["diamond_unique_%s" % k] = True
summary.loc[v.index,"diamond_unique_%s" % k] = False
summary["kegg"] = summary["prot_id"].apply(lambda x: kegg.loc[x,"qname"] if x in kegg.index else None)
summary["kegg_evalue"] = summary["prot_id"].apply(lambda x: kegg.loc[x,"b1d_evalue"] if x in kegg.index else None)
summary["kegg_name"] = summary["kegg"].apply(lambda x: kegg_info.loc[x.split("_")[0],"kegg_name"] if x is not None else None)
summary["kegg_def"] = summary["kegg"].apply(lambda x: kegg_info.loc[x.split("_")[0],"kegg_def"] if x is not None else None)
summary["kegg_pathways"]= summary["kegg"].apply(lambda x: kegg_info.loc[x.split("_")[0],"kegg_pathways"] if x is not None else None)
# print(summary.head())
# save
summary.to_csv(output[0], sep="\t", header=True, index=False, index_label=False, na_rep="NA")
rule assembly_gene_summary_plots:
input:
genes=os.path.join(RESULTS_DIR, "extra/{rtype}_{tool}.genes.tsv")
output:
pdf=os.path.join(RESULTS_DIR, "extra/{rtype}_{tool}.genes.pdf")
log:
out=os.path.join(RESULTS_DIR, "extra/{rtype}_{tool}.genes.pdf.log")
params:
const=os.path.join(SRC_DIR, "const.R"),
utils=os.path.join(SRC_DIR, "utils.R")
conda:
os.path.join(ENV_DIR, "r.yaml")
script:
os.path.join(SRC_DIR, "genes.R")
# #####################
# # Genome comparison #
# #####################
# rule sibeliaz:
# input:
# asm1=os.path.join(DATA_DIR, "assembly/{rtype1}/{tool1}/ASSEMBLY.POLISHED.fasta"),
# asm2=os.path.join(DATA_DIR, "assembly/{rtype2}/{tool2}/ASSEMBLY.POLISHED.fasta")
# output:
# os.path.join(RESULTS_DIR, "analysis/sibeliaz/{rtype1}_{tool1}__{rtype2}_{tool2}_output/blocks_coords.gff")
# log:
# out="logs/sibeliaz.{rtype1}.{tool1}.{rtype2}.{tool2}.out.log",
# err="logs/sibeliaz.{rtype1}.{tool1}.{rtype2}.{tool2}.err.log"
# threads:
# config["sibeliaz"]["threads"]
# params:
# kmers=config["sibeliaz"]["kmer"],
# memory=config["sibeliaz"]["mem"]
# wildcard_constraints:
# rtype1="|".join(READ_TYPES),
# rtype2="|".join(READ_TYPES),
# tool1="|".join(ASSEMBLERS),
# tool2="|".join(ASSEMBLERS)
# conda:
# os.path.join(ENV_DIR, "sibeliaz.yaml")
# message:
# "Genome comparision: Sibeliaz: {input.asm1} {input.asm2}"
# shell:
# "(date && "
# "sibeliaz -n -t {threads} -k {params.kmers} -f {params.memory} -o $(dirname {output}) {input.asm1} {input.asm2} && "
# "date) 2> {log.err} > {log.out}"
# rule concatenate:
# input:
# expand(os.path.join(REF_GENOMES_DIR, "{org}.fna"), org=config["ref_genomes"].keys())
# output:
# os.path.join(RESULTS_DIR,"reference/mock_genomes.fasta")
# log:
# out="logs/concat.out.log",
# err="logs/concat.err.log"
# shell:
# "(date && "
# "cat {input} > {output} && "
# "date) 2> {log.err} > {log.out}"
# rule zymo_sibeliaz:
# input:
# zymo=rules.concatenate.output,
# asm2=os.path.join(DATA_DIR, "assembly/{rtype}/{tool}/ASSEMBLY.POLISHED.fasta")
# output:
# os.path.join(RESULTS_DIR, "analysis/zymo_sibeliaz/zymo_{rtype}_{tool}_output/blocks_coords.gff")
# log:
# out="logs/sibeliaz.{rtype}.{tool}.out.log",
# err="logs/sibeliaz.{rtype}.{tool}.err.log"
# threads:
# config["sibeliaz"]["threads"]
# params:
# kmers=config["sibeliaz"]["kmer"],
# memory=config["sibeliaz"]["mem"]
# wildcard_constraints:
# rtype1="|".join(READ_TYPES),
# rtype2="|".join(READ_TYPES),
# tool1="|".join(ASSEMBLERS),
# tool2="|".join(ASSEMBLERS)
# conda:
# os.path.join(ENV_DIR, "sibeliaz.yaml")
# message:
# "Genome comparision: Sibeliaz: {input.zymo} {input.asm2}"
# shell:
# "(date && "
# "sibeliaz -n -t {threads} -k {params.kmers} -f {params.memory} -o $(dirname {output}) {input.zymo} {input.asm2} && "
# "date) 2> {log.err} > {log.out}"
\ No newline at end of file
Loading