Skip to content
Snippets Groups Projects
Commit 10e83e92 authored by Valentina Galata's avatar Valentina Galata
Browse files

updated/added messages to rules; rgi db is loaded in sep. rule

parent ecd585c8
No related branches found
No related tags found
1 merge request!76Merge "cleanup" branch with "master" branch
......@@ -18,7 +18,7 @@ rule annotation_prodigal:
conda:
os.path.join(ENV_DIR, "prodigal.yaml")
message:
"Call genes w/ Prodigal: {input}"
"Annotation: call genes w/ Prodigal"
shell:
"(date && prodigal -a {output} -p meta -i {input} && date) 2> {log.err} > {log.out}"
......@@ -42,7 +42,7 @@ rule annotation_diamond_lr_daa:
conda:
os.path.join(ENV_DIR, "diamond.yaml")
message:
"DIAMOND: blastp proteins {input.faa} to {input.db}"
"Annotation: protein search w/ DIAMOND in long reads"
shell:
"(date && diamond blastx -q {input.reads} --db {input.db} --out {output} -p {threads} --long-reads --sensitive --outfmt 100 && date) 2> {log.err} > {log.out}"
......@@ -65,7 +65,7 @@ rule annotation_diamond_daa:
conda:
os.path.join(ENV_DIR, "diamond.yaml")
message:
"DIAMOND: blastp proteins {input.faa} to {input.db}"
"Annotation: protein search w/ DIAMOND"
shell:
"(date && diamond blastp -q {input.faa} --db {input.db} --out {output} -p {threads} --outfmt 100 && date) 2> {log.err} > {log.out}"
......@@ -87,7 +87,7 @@ rule annotation_diamond_tsv:
conda:
os.path.join(ENV_DIR, "diamond.yaml")
message:
"DIAMOND: reformat {input} to {output}"
"Annotation: reformat DIAMOND output"
shell:
"(date && "
"diamond view --daa {params.ibname} --max-target-seqs 1 --outfmt {params.outfmt} --out {output} && "
......@@ -105,7 +105,7 @@ rule annotation_rgi_input:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
message:
"RGI: create input FAA: {input}"
"Annotation: RGI input"
shell:
# NOTE: remove stop codon symbol "*"
"sed 's/\*$//' {input} > {output}"
......@@ -114,7 +114,9 @@ rule annotation_rgi_input:
rule annotation_rgi:
input:
faa=os.path.join(RESULTS_DIR, "annotation/rgi/{rtype}/{tool}/input.faa"),
db=os.path.join(DB_DIR, "rgi/card.json")
db=os.path.join(DB_DIR, "rgi/card.json"),
# NOTE: to make sure that the same DB is used for all targets
setup="status/rgi_setup.done"
output:
os.path.join(RESULTS_DIR, "annotation/rgi/{rtype}/{tool}/rgi.txt")
log:
......@@ -131,12 +133,9 @@ rule annotation_rgi:
conda:
os.path.join(ENV_DIR, "rgi.yaml")
message:
"RGI: AMR prediction: {input}"
"Annotation: AMR prediction w/ RGI"
shell:
"(date && "
# NOTE: to make sure that the correct DB is used
"rgi clean --local && "
"rgi load --card_json {input.db} --local && "
"rgi database --version --local && "
# NOTE: https://github.com/arpcard/rgi/issues/93: KeyError: 'snp'
# need to run the CMD twice
......@@ -163,10 +162,10 @@ rule annotation_casc:
config["casc"]["threads"]
conda:
os.path.join(ENV_DIR, "casc.yaml")
message:
"Annotation: CRISPR detection w/ CASC"
shell:
"(date && "
# "export PATH={config[casc][path]} && "
# "export PERL5LIB={config[casc][perl5lib]} && "
"export PATH=$PATH:$(dirname {input.bin}) && "
"casc -i {input.asm} -o $(dirname {output}) -n {threads} --conservative && "
"date) 2> {log.err} > {log.out}"
......@@ -186,9 +185,10 @@ rule annotation_minced:
tool="|".join(ASSEMBLERS)
conda:
os.path.join(ENV_DIR, "minced.yaml")
message:
"Annotation: CRISPR detection w/ MinCED"
shell:
"(date && "
# "export PATH={config[minced][path]} && "
"export PATH=$PATH:$(dirname {input.jar}) && "
"minced {input.asm} {output.txt} {output.gff} && "
"date) 2> {log.err} > {log.out}"
......@@ -211,7 +211,7 @@ rule annotation_plasflow_input:
script=os.path.join(SRC_DIR, "filter_fasta_by_length.pl"),
minlen=config["plasflow"]["minlen"]
message:
"PlasFlow: create input FASTA: {input}"
"Annotation: PlasFlow input"
shell:
"{params.script} {params.minlen} {input} > {output} 2> {log}"
......@@ -233,7 +233,7 @@ rule annotation_plasflow:
conda:
os.path.join(ENV_DIR, "plasflow.yaml")
message:
"PlasFlow: predict: {input}"
"Annotation: plasmid prediction w/ PlasFlow"
shell:
"(date && "
"PlasFlow.py --input {input} --output {output.tmp} --threshold {params.threshold} && "
......
......@@ -39,6 +39,8 @@ rule polishing_lr_racon:
config["racon"]["threads"]
conda:
os.path.join(ENV_DIR, "racon.yaml")
message:
"Assembly: long reads: polishing w/ Racon"
shell:
"(date && "
"samtools view -h {input.bam} > {output.sam} && "
......@@ -61,6 +63,8 @@ rule polishing_lr_medaka:
config["medaka"]["threads"]
conda:
os.path.join(ENV_DIR, "medaka.yaml")
message:
"Assembly: long reads: polishing w/ Medaka"
shell:
"(date && "
"medaka_consensus -i {input.lr} -d {input.asm} -o $(dirname {output}) -t {threads} -m {config[medaka][model]} && "
......
......@@ -18,6 +18,8 @@ rule mapping_bwa_idx_polishing:
idx_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Mapping: BWA index for assembly polishing"
shell:
"(date && bwa index {input} -p {params.idx_prefix} && date) 2> {log.err} > {log.out}"
......@@ -33,13 +35,15 @@ rule mapping_bwa_mem_polishing:
err="logs/bwa_mem.polishing.metag.lr.{tool}.err.log"
wildcard_constraints:
tool="|".join(config["assemblers"]["lr"])
threads:
config["bwa"]["threads"]
params:
idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0],
bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
threads:
config["bwa"]["threads"]
message:
"Mapping long reads to assembly w/ BWA for polishing"
shell:
"(date && "
"bwa mem -x ont2d -t {threads} {params.idx_prefix} {input.lr} | "
......@@ -67,6 +71,8 @@ rule mapping_bwa_idx_assembly:
idx_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Mapping: BWA index for assembly mapping"
shell:
"(date && bwa index {input} -p {params.idx_prefix} && date) 2> {log.err} > {log.out}"
......@@ -89,13 +95,15 @@ rule mapping_bwa_mem_assembly_sr:
mtype="|".join(META_TYPES),
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
threads:
config["bwa"]["threads"]
params:
idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0],
bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
threads:
config["bwa"]["threads"]
message:
"Mapping short reads to assembly w/ BWA"
shell:
"(date && "
"bwa mem -t {threads} {params.idx_prefix} {input.r1} {input.r2} | "
......@@ -120,13 +128,15 @@ rule mapping_bwa_mem_assembly_lr:
wildcard_constraints:
rtype="|".join(["lr", "hy"]),
tool="|".join(config["assemblers"]["lr"] + config["assemblers"]["hy"])
threads:
config["bwa"]["threads"]
params:
idx_prefix=lambda wildcards, input: os.path.splitext(input.idx[0])[0],
bam_prefix=lambda wildcards, output: os.path.splitext(output[0])[0]
conda:
os.path.join(ENV_DIR, "bwa.yaml")
threads:
config["bwa"]["threads"]
message:
"Mapping long reads to assembly w/ BWA"
shell:
"(date && "
"bwa mem -x ont2d -t {threads} {params.idx_prefix} {input.lr} | "
......@@ -149,6 +159,8 @@ rule mapping_bwa_mem_assembly_hy:
threads: 1
conda:
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Mapping: merging short-reads and long-reads mapping results"
shell:
"(date && samtools merge {output} {input.sr} {input.lr} && date) 2> {log.err} > {log.out}"
......@@ -172,6 +184,8 @@ rule mapping_assembly_genomecov:
threads: 1
conda:
os.path.join(ENV_DIR, "bedtools.yaml")
message:
"Mapping: compute assembly coverage"
shell:
"(date && bedtools genomecov -ibam {input} > {output} && date) 2> {log.err} > {log.out}"
......@@ -188,9 +202,11 @@ rule mapping_assembly_genomecov_average:
rtype1="|".join(READ_TYPES),
rtype2="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
threads: 1
params:
script=os.path.join(SRC_DIR, "coverage.awk")
threads: 1
message:
"Mapping: compute average assembly coverage"
shell:
"(date && cat {input} | awk -f {params.script} | tail -n+2 > {output} && date) 2> {log.err} > {log.out}"
......@@ -212,10 +228,9 @@ rule mapping_assembly_flagstat:
rtype2="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
conda:
# os.path.join(ENV_DIR, "analysis.yaml")
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Assembly mapping: samtools flagstat: {input}"
"Mapping: assembly coverage stats w/ samtools flagstat"
shell:
"(date && samtools flagstat {input} > {output} && date) 2> {log.err} > {log.out}"
......@@ -234,10 +249,9 @@ rule mapping_assembly_idxstats:
rtype2="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
conda:
# os.path.join(ENV_DIR, "analysis.yaml")
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Assembly mapping: samtools idxstats: {input}"
"Mapping: assembly coverage stats w/ samtools idxstats"
shell:
"(date && samtools idxstats {input} > {output} && date) 2> {log.err} > {log.out}"
......@@ -256,8 +270,9 @@ rule mapping_assembly_uniq:
rtype2="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
conda:
# os.path.join(ENV_DIR, "analysis.yaml")
os.path.join(ENV_DIR, "bwa.yaml")
message:
"Mapping: assembly coverage stats: number of uniquely mapped reads"
message:
"Assembly mapping: unique: {input}"
shell:
......
......@@ -9,6 +9,8 @@ rule install_casc:
path=os.path.join(MOD_DIR, "casc")
conda:
os.path.join(ENV_DIR, "casc.yaml")
message:
"Setup: install CASC"
shell:
"(cd {params.path} && "
"perl Makefile.PL PREFIX=\"$(realpath .)\" && "
......@@ -16,7 +18,7 @@ rule install_casc:
"make test && "
"make install) 2> {log.err} > {log.out}"
# minced
# MinCED
rule install_minced:
output:
os.path.join(MOD_DIR, "minced/minced.jar")
......@@ -27,13 +29,15 @@ rule install_minced:
path=os.path.join(MOD_DIR, "minced")
conda:
os.path.join(ENV_DIR, "minced.yaml")
message:
"Setup: install MinCED"
shell:
"(cd {params.path} && "
"make && "
"make test) 2> {log.err} > {log.out}"
# RGI DBs
rule annotation_rgi_db:
# Download RGI data
rule download_rgi_db:
output:
archive=temp(os.path.join(DB_DIR, "rgi/card-data.tar.bz2")),
json=os.path.join(DB_DIR, "rgi/card.json")
......@@ -43,9 +47,28 @@ rule annotation_rgi_db:
params:
db_url=config["rgi"]["db_url"]
message:
"RGI: Download DB data"
"Setup: download RGI data"
shell:
"(date && "
"wget -O {output.archive} {params.db_url} --no-check-certificate && "
"tar -C $(dirname {output.archive}) -xvf {output.archive} && "
"date) 2> {log.err} > {log.out}"
# Setup RGI: load required DB
# NOTE: to make sure that the same DB is used for all targets
rule setup_rgi_db:
input:
os.path.join(DB_DIR, "rgi/card.json")
output:
"status/rgi_setup.done"
log:
out="logs/rgi_setup.out.log",
err="logs/rgi_setup.err.log"
conda:
os.path.join(ENV_DIR, "rgi.yaml")
message:
"Setup: load RGI DB"
shell:
"(rgi clean --local && "
"rgi load --card_json {input} --local && "
"rgi database --version --local) 2> {log.err} > {log.out}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment