Skip to content
Snippets Groups Projects
Commit 14c69148 authored by Valentina Galata's avatar Valentina Galata
Browse files

zymo configs: updated

parent 29822190
No related branches found
No related tags found
No related merge requests found
work_dir: "/mnt/lscratch/users/vgalata/Zymo-GridION-EVEN-BB-SN"
work_dir: "/scratch/users/vgalata/Zymo"
single_fast5_dir: "data_single_fast5"
multi_fast5_dir: "data_multi_fast5"
......
############################################################
# STEPS
# Pipeline steps to be done: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
# Steps to be done
# steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps_annotation: ["diamond", "rgi", "plasflow", "minced", "barrnap"] # prodigal is run in any case
steps_analysis: ["quast", "cdhit", "mash_dist"]
steps_taxonomy: ["kraken2", "kaiju"]
############################################################
# INPUT
# working directory: will contain the results
work_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/mock"
# working directory: will contain the results (should be writeable)
work_dir: "/scratch/users/vgalata/Zymo"
# Paths WITHIN the working directory
# directory containing required DBs
# directory containing required DBs (should be writeable)
db_dir: "dbs"
# results directory
results_dir: "results"
......@@ -21,11 +25,11 @@ data:
# Meta-genomics
metag:
sr:
r1: "data/raw/short_reads/ERR2984773/ERR2984773_1.fastq.gz"
r2: "data/raw/short_reads/ERR2984773/ERR2984773_2.fastq.gz"
r1: "/scratch/users/vgalata/Zymo/data_sr/ERR2984773_1.fastq.gz"
r2: "/scratch/users/vgalata/Zymo/data_sr/ERR2984773_2.fastq.gz"
ont:
# List of directories containing FAST5 files
dirs: ["/mnt/lscratch/users/vgalata/Zymo-GridION-EVEN-BB-SN/data_multi_fast5/"] # leave empty if no data, i.e. []
dirs: ["/scratch/users/vgalata/Zymo/data_multi_fast5/"] # leave empty if no data, i.e. []
# List of FAST5 files
files: [] # leave empty if no data, i.e. []
# FastQ: if given NO basecalling will be done !!!
......@@ -36,7 +40,7 @@ data:
r1: "" # leave empty if no data, i.e. ""
r2: "" # leave empty if no data, i.e. ""
# Meta-proteomics
metap:
# metap:
# TODO
############################################################
......@@ -45,7 +49,7 @@ data:
##############################
# Preprocessing
# TODO: installation
# TODO: installation ???
# Preprocessing: LR: Basecalling
# XXX
guppy:
......@@ -63,11 +67,17 @@ guppy:
threads: 20
# Preprocessing: SR
# XXX
# https://github.com/OpenGene/fastp
fastp:
threads: 10
min_length: 40
# rRNA gene filtering
sortmerna:
threads: 20
# References to be used (w/ md5sums)
refs: []
# FastQ QC
# https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
fastqc:
......@@ -77,22 +87,35 @@ fastqc:
##############################
# Assembly
# List of assemblers for different read types
# List of assemblers for different read types: assembler names MUST be UNIQUE
assemblers:
sr: ["megahit", "metaspades"]
lr: ["flye"]
lr: ["flye", "wtdbg2"]
hy: ["metaspadeshybrid", "operams"]
hyhy: []
# XXX
# https://github.com/fenderglass/Flye
flye:
threads: 10
genome_size: "1g"
# XXX
# https://github.com/ruanjue/wtdbg2
wtdbg2:
threads: 10
bin: "/scratch/users/sbusi/tools/wtdbg2/"
genome_size: "1g"
# https://canu.readthedocs.io/en/latest/
canu:
threads: 24
# mem: "64g"
genome_size: "1g"
# https://github.com/ablab/spades
metaspades:
threads: 10
# XXX
# https://github.com/voutcn/megahit
megahit:
threads: 10
......@@ -105,12 +128,12 @@ operams:
##############################
# Long-read assembly polishing
# XXX
# https://nanoporetech.github.io/medaka/index.html
medaka:
threads: 10 # do NOT set to large value (e.g. using 30 did not work)
threads: 10 # NOTE: avoid large values !!! e.g. 30 did not work
model: r941_min_high # the MinION model, high accuarcy
# XXX
# https://github.com/isovic/racon
racon:
threads: 30
......@@ -128,33 +151,25 @@ bwa:
# http://www.htslib.org/doc/samtools.html
samtools:
sort:
# threads: 10
chunk_size: "4G"
view:
# threads: 10
##############################
# Annotation
# TODO: data download
# Sequence search
# XXX
# https://github.com/bbuchfink/diamond
diamond:
threads: 20
#db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
db: "/work/projects/ecosystem_biology/local_tools/databases/nr_uniprot_trembl.dmnd"
db: "/work/projects/ecosystem_biology/local_tools/databases/nr_uniprot_trembl.dmnd" # TODO: data download
# CRISPR
# https://github.com/dnasko/CASC
casc:
threads: 10
# path: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/bin"
# perl5lib: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/lib/site_perl"
# CRISPR
# https://github.com/ctSkennerton/minced
minced:
# path: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/minced/"
# minced:
# Plasmid prediction
# https://github.com/smaegol/PlasFlow
......@@ -167,34 +182,42 @@ plasflow:
rgi:
threads: 5
db_url: "https://card.mcmaster.ca/latest/data"
alignment_tool: "DIAMOND" # DIAMOND or BLAST
alignment_tool: "DIAMOND"
# rRNA genes prediction
# https://github.com/tseemann/barrnap
barrnap:
threads: 5
kingdom: ["bac", "arc", "euk", "mito"]
##############################
# Analysis
# https://github.com/weizhongli/cdhit --> wiki
cdhit:
threads: 10
# XXX
bbmap:
threads: 10
# Assembly quality
# XXX
# https://github.com/ablab/quast
quast:
threads: 10
# Sequence search and clustering
# https://github.com/soedinglab/MMseqs2
mmseqs2:
threads: 30
# mmseqs2:
# threads: 30
# createdb: "--dbtype 2 --shuffle -v"
# easycluster: "--kmer-per-seq-scale 0.5 --cov-mode 0 -c 0.5 --min-seq-id 0.9"
# easylinclust: "--kmer-per-seq-scale 0.5 --cov-mode 0 -c 0.5 --min-seq-id 0.9"
# path: "/home/users/sbusi/apps/mmseqs/bin"
# createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
# rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
# convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
# Seq. alignment
# https://mummer4.github.io/
mummer:
archive: "https://github.com/mummer4/mummer/releases/download/v3.9.4alpha/mummer-3.9.4alpha.tar.gz"
##############################
# Taxonomy
......@@ -206,8 +229,17 @@ kraken2:
maxikraken: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
class:
sr: "--gzip-compressed --paired"
lr: "" # TODO
contigs: "" # TODO
lr: ""
contigs: ""
# http://kaiju.binf.ku.dk/
# http://kaiju.binf.ku.dk/server
# https://github.com/bioinformatics-centre/kaiju
kaiju:
threads: 10
db: # key = basename of *.fmi
kaiju_db_nr_euk: "/mnt/isilon/projects/ecosystem_biology/databases/kaiju/kaiju_db_nr_euk_2020-05-25"
ranks: ["phylum", "class", "order", "family", "genus", "species"]
# # XXX
# GTDBTK:
......
#!/bin/bash -l
##############################
# SLURM
# NOTE: used for this script only, NOT for the snakemake call below
# slurm settings if called using sbatch
#SBATCH -J ONT_SMK
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 1
#SBATCH --time=0-10:00:00
#SBATCH --time=3-00:00:00
#SBATCH -p batch
#SBATCH --qos=qos-batch
##############################
# SNAKEMAKE
# conda env name
# conda env name or path
ONTP_ENV="ONT_pilot"
# number of cores for snakemake
ONTP_CORES=60
# snakemake file
ONTP_SMK="workflow/Snakefile"
# config file
ONTP_CONFIG="config/Zymo-GridION-EVEN-BB-SN/config.yaml" # USER INPUT REQUIRED
# slurm config file
ONTP_SLURM="config/Zymo-GridION-EVEN-BB-SN/slurm.yaml"
# config files
ONTP_CONFIG="config/Zymo/config.yaml"
ONTP_SLURM="config/Zymo/slurm.yaml"
# slurm cluster call
ONTP_CLUSTER="-p {cluster.partition} -q {cluster.qos} {cluster.explicit} -N {cluster.nodes} -n {cluster.n} -c {threads} -t {cluster.time} --job-name={cluster.job-name}"
##############################
# IMP
ONTP_CLUSTER="sbatch -p {cluster.partition} -q {cluster.qos} {cluster.explicit} -N {cluster.nodes} -n {cluster.n} -c {threads} -t {cluster.time} --job-name={cluster.job-name}"
# activate the env
conda activate ${ONTP_ENV}
# run the pipeline
snakemake -s ${ONTP_SMK} -rp --cores ${ONTP_CORES} --configfile ${ONTP_CONFIG} \
--use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
--cluster-config ${ONTP_SLURM} --cluster "sbatch ${ONTP_CLUSTER}"
snakemake -s workflow/Snakefile -rp --jobs 10 --local-cores 1 \
--configfile ${ONTP_CONFIG} --use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
--cluster-config ${ONTP_SLURM} --cluster "${ONTP_CLUSTER}"
......@@ -27,6 +27,14 @@ fastp_sr:
n: 1
explicit: ""
sortmerna_filt:
time: "01-12:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
# Assembly
assembly_lr_flye:
time: "00-10:00:00"
......@@ -36,6 +44,22 @@ assembly_lr_flye:
n: 1
explicit: ""
assembly_lr_wtdbg2:
time: "00-10:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
assembly_lr_canu:
time: "05-00:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
assembly_sr_megahit:
time: "01-4:00:00"
partition: "bigmem"
......@@ -77,7 +101,15 @@ mapping_bwa_idx_assembly:
n: 1
explicit: ""
mapping_bwa_mem_assembly_sr:
mapping_bwa_mem_assembly_sr_metag:
time: "00-10:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
mapping_bwa_mem_assembly_sr_metat:
time: "00-10:00:00"
partition: "batch"
qos: "qos-batch"
......@@ -101,6 +133,14 @@ mapping_bwa_mem_assembly_hy:
n: 1
explicit: ""
mapping_bwa_mem_assembly_hyhy:
time: "00-10:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
# Assembly polishing
mapping_bwa_idx_polishing:
time: "00-10:00:00"
......@@ -134,6 +174,22 @@ polishing_lr_medaka:
n: 1
explicit: ""
mapping_bwa_mem_polishing_metat:
time: "00-10:00:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
polishing_metat_racon:
time: "00-10:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
# Annotation
annotation_prodigal:
time: "01-4:00:00"
......@@ -159,7 +215,8 @@ annotation_plasflow:
n: 1
explicit: ""
kraken2_contigs:
# Taxonomy
tax_kraken2_contigs:
time: "00-02:00:00"
partition: "bigmem"
qos: "qos-bigmem"
......@@ -167,7 +224,7 @@ kraken2_contigs:
n: 1
explicit: ""
kraken2_sr:
tax_kraken2_sr:
time: "00-02:00:00"
partition: "bigmem"
qos: "qos-bigmem"
......@@ -175,10 +232,26 @@ kraken2_sr:
n: 1
explicit: ""
kraken2_lr:
tax_kraken2_lr:
time: "00-02:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
tax_kaiju:
time: "00-01:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
tax_kaiju_summary:
time: "00-00:10:00"
partition: "batch"
qos: "qos-batch"
nodes: 1
n: 1
explicit: ""
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment