Skip to content
Snippets Groups Projects
Commit 17bb9496 authored by Susheel Busi's avatar Susheel Busi
Browse files

added rules for diamond analyses

parent 802dba2d
2 merge requests!78Add zymo workflow,!77Workflow: Zymo - Comparison to Reference Genomes
# Main requirements
work_dir: "/scratch/users/sbusi/zymo"
results_dir: "results"
data_dir: "/scratch/users/sbusi/zymo/Zymo/results"
# Assembly
# List of assemblers for different read types: assembler names MUST be UNIQUE
assemblers:
sr: ["megahit", "metaspades"]
lr: ["flye", "canu"]
hy: ["metaspadeshybrid", "operamsmegahit", "operamsmetaspades"]
# Additional params
single_fast5_dir: "data_single_fast5"
multi_fast5_dir: "data_multi_fast5"
ref_genomes_dir: "data_ref_genomes"
# Annotation tools
prodigal:
threads: 12
diamond:
threads: 20
\ No newline at end of file
......@@ -36,7 +36,12 @@ ONTP_CLUSTER="{cluster.call} -t {cluster.runtime} -n {cluster.threads} -p {clust
# activate the env
conda activate ${ONTP_ENV}
# run the pipeline
# unlock the pipeline
snakemake -s ${ONTP_SMK} -rp --jobs 10 --local-cores 1 \
--configfile ${ONTP_CONFIG} --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
--cluster-config ${ONTP_SLURM} --cluster "${ONTP_CLUSTER}" --unlock
# run the pipeline
snakemake -s ${ONTP_SMK} -rpn --jobs 10 --local-cores 1 \
--configfile ${ONTP_CONFIG} --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
--cluster-config ${ONTP_SLURM} --cluster "${ONTP_CLUSTER}"
......@@ -17,12 +17,17 @@ concatenate:
quality: "normal"
runtime: "0-00:05:00"
annotation_prodigal:
zymo_prodigal:
partition: "batch"
qos: "normal"
runtime: "00-2:00:00"
runtime: "00-1:00:00"
annotation_diamond:
diamond_db:
partition: "batch"
quality: "normal"
runtime: "00-00:30:00"
analysis_diamond:
partition: "batch"
quality: "normal"
runtime: "00-2:00:00"
\ No newline at end of file
......@@ -2,6 +2,7 @@
# Python modules
##################################################
import os
import re
##############################
# CONFIG
......@@ -29,6 +30,26 @@ SINGLE_FAST5_DIR = os.path.abspath(config["single_fast5_dir"])
MULTI_FAST5_DIR = os.path.abspath(config["multi_fast5_dir"])
REF_GENOMES_DIR = os.path.abspath(config["ref_genomes_dir"])
RESULTS_DIR = config["results_dir"]
DATA_DIR = config["data_dir"]
##################################################
# FUNCTIONS
##################################################
def assembler_pairs(assemblers):
"""
Create all possible combinations of two assemblers
"""
from itertools import combinations
return list(combinations(assemblers, 2))
##################################################
# ADDITIONAL PARAMETERS
##################################################
READ_TYPES = list(config["assemblers"].keys()) # list of read types
ASSEMBLERS = [y for x in config["assemblers"].values() for y in x] # list of all assemblers
READ_ASSEMBLERS = [y for x in [[(k, vv) for vv in v] for k, v in config["assemblers"].items()] for y in x] # list of (read type, assembler)
READ_ASSEMBLER_PAIRS = assembler_pairs(READ_ASSEMBLERS)
##################################################
# RULES
##################################################
......@@ -36,22 +57,20 @@ RESULTS_DIR = config["results_dir"]
rule all:
input:
REF_GENOMES_DIR,
expand(os.path.join(RESULTS_DIR,"reference/mock_genomes.fasta"))
expand(os.path.join(RESULTS_DIR,"reference/mock_genomes.fasta")),
expand(os.path.join(RESULTS_DIR,"annotation/zymo_prodigal/proteins.faa")),
expand(os.path.join(RESULTS_DIR,"annotation/diamond/zymo.dmnd")),
expand(os.path.join(RESULTS_DIR, "analysis/diamond/{rtype_tool}.{ext}"),
rtype_tool=["%s_%s" % (rtype, tool) for rtype, tool in READ_ASSEMBLERS],
ext=["daa", "tsv"])
rule download_genomes:
# input:
# SINGLE_FAST5_DIR
output:
directory(REF_GENOMES_DIR)
log:
out="logs/ref_genomes.out.log",
err="logs/ref_genomes.err.log"
# threads:
# config["ont_fast5_api"]["single_to_multi_fast5"]["threads"]
# params:
# batch_size=config["ont_fast5_api"]["single_to_multi_fast5"]["batch_size"]
# conda:
# os.path.join(ENV_DIR, "ont_fast5_api.yaml")
shell:
"(date && "
"wget https://github.com/al-mcintyre/mCaller_analysis_scripts/raw/master/assemblies/bsubtilis_pb.fasta -P {output} && "
......@@ -79,4 +98,65 @@ rule concatenate:
shell:
"(date && "
"cat {input}/*.fasta > {output} && "
"date) 2> {log.err} > {log.out}"
\ No newline at end of file
"date) 2> {log.err} > {log.out}"
rule zymo_prodigal:
input:
rules.concatenate.output
output:
os.path.join(RESULTS_DIR,"annotation/zymo_prodigal/proteins.faa")
log:
out="logs/zymo_prodigal.out.log",
err="logs/zymo_prodigal.err.log"
threads:
config["prodigal"]["threads"]
conda:
os.path.join(ENV_DIR, "prodigal.yaml")
shell:
"(date && "
"prodigal -a {output} -p meta -i {input} && "
"date) 2> {log.err} > {log.out}"
rule diamond_db:
input:
rules.zymo_prodigal.output
output:
os.path.join(RESULTS_DIR,"annotation/diamond/zymo.dmnd")
log:
out="logs/diamond_db.out.log",
err="logs/diamond_db.err.log"
threads:
config["diamond"]["threads"]
conda:
os.path.join(ENV_DIR, "diamond.yaml")
shell:
"(date && "
"diamond makedb --in {input} -d $(echo {output} | sed 's/.dmnd$//') && "
"date) 2> {log.err} > {log.out}"
rule analysis_diamond:
input:
faa=os.path.join(DATA_DIR, "annotation/prodigal/{rtype}/{tool}/proteins.faa"),
db=rules.diamond_db.output
output:
daa=os.path.join(RESULTS_DIR, "analysis/diamond/{rtype}_{tool}.daa"),
tsv=os.path.join(RESULTS_DIR, "analysis/diamond/{rtype}_{tool}.tsv")
log:
"logs/analysis.diamond.{rtype}.{tool}.log",
wildcard_constraints:
rtype="|".join(READ_TYPES),
tool="|".join(ASSEMBLERS)
threads:
config["diamond"]["threads"]
params:
outfmt="6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen"
conda:
os.path.join(ENV_DIR, "diamond.yaml")
message:
"Analysis: DIAMOND search: {input}"
shell:
"(date && "
"daa={output.daa} && "
"diamond blastp -q {input.faa} --db {input.db} --out {output.daa} -p {threads} --outfmt 100 && "
"diamond view --daa ${{daa%.*}} --max-target-seqs 1 -p {threads} --outfmt {params.outfmt} --out {output.tsv} && "
"date) &> {log}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment