Skip to content
Snippets Groups Projects
Commit 1ad79bf6 authored by Susheel Busi's avatar Susheel Busi
Browse files

updated the overal snakefiles including the with the ANALYSIS_RULES

parent 0b5d4048
No related branches found
No related tags found
1 merge request!67WIP: Checkpoint snakefile
# File for running ONT analyses
# default configuration file
configfile:"config/CONFIG.yaml"
# default executable for snakmake
shell.executable("bash")
# input settings
RUNS=config['runs']['first']
STEPS=config['steps']
# include rules for the workflows based on "steps" in the CONFIG.yaml file
# ONT analyses workflow
TARGETS = []
if 'assembly_annotation' in STEPS:
include: "workflows/checkpoint_assembly_annotation.smk"
TARGETS += ["assemble_and_coverage.done",
"annotate.done",
"basecall_merge_qc.done",
"coverage_of_references.done",
"prodigal_gene_call.done",
"diamond_proteins.done"]
if 'mmseq' in STEPS:
include: "workflows/mmseq.smk"
TARGETS += ["mmseq_comparison_for_ont.done"]
if 'metaT' in STEPS:
include: "workflows/metat.smk"
TARGETS += ["metaT_mapping_for_ONT.done"]
if 'mapping' in STEPS:
include: "workflows/mapping.smk"
TARGETS += ["mapping_for_binning.done"]
if 'binning' in STEPS:
include: "workflows/binning.smk"
TARGETS += ["binning_for_ont.done"]
if 'taxonomy' in STEPS:
include: "workflows/taxonomy.smk"
TARGETS += ["taxonomy_for_ont.done"]
if 'analysis' in STEPS:
include: "workflows/analysis.smk"
TARGETS += ["analysis.done"]
#else:
# raise Exception('You are not serious. No input data')
# print("No input data provided")
rule all:
input:
TARGETS
# steps: "assembly_annotation mapping metaT mmseq binning taxonomy"
steps: "mapping metaT mmseq binning taxonomy"
# steps: "assembly_annotation mapping metaT mmseq binning taxonomy analysis"
steps: "binning taxonomy"
data_dir: "data"
results_dir: "results"
db_dir: "dbs"
......
......@@ -43,9 +43,13 @@ if 'taxonomy' in STEPS:
include: "workflows/taxonomy.smk"
TARGETS += ["taxonomy_for_ont.done"]
else:
# raise Exception('You are not serious. No input data')
print("No input data provided")
if 'analysis' in STEPS:
include: "workflows/analysis.smk"
TARGETS += ["analysis.done"]
#else:
# raise Exception('You are not serious. No input data')
# print("No input data provided")
rule all:
......
......@@ -43,6 +43,10 @@ if 'taxonomy' in STEPS:
include: "workflows/taxonomy.smk"
TARGETS += ["taxonomy_for_ont.done"]
if 'analysis' in STEPS:
include: "workflows/analysis.smk"
TARGETS += ["analysis.done"]
#else:
# raise Exception('You are not serious. No input data')
# print("No input data provided")
......
# steps: "assembly_annotation mapping metaT mmseq binning taxonomy analysis"
steps: "binning taxonomy"
data_dir: "data"
results_dir: "results"
db_dir: "dbs"
runs:
first: "S1_SizeSelected"
second: "S3_Gtube"
# third: "20181108_0827_test"
assemblers: ["flye"]
p7zip:
bin: "/home/users/claczny/apps/software/p7zip_16.02/bin/7za"
threads: 4
ont_fast5_api:
single_to_multi_fast5:
bin: "single_to_multi_fast5"
batch: 8000
threads: 8
flowcell: "FLO-MIN106"
kit: "SQK-LSK109"
#barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"]
barcodes: ["no_barcode"]
guppy_cpu:
path: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin"
bin: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin/guppy_basecaller"
version: "cpu-3.1.5"
config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
cpu_threads: 28
guppy_gpu:
path: "/home/users/sbusi/apps/ont-guppy/bin"
bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_basecaller"
version: "3.6.0+98ff765"
config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
hac_config: "dna_r9.4.1_450bps_hac.cfg"
records_per_fastq: 8000
chunk_size: 1000
chunks_per_runner: 1000
num_callers: 4
runners_per_device: 2
gpu_device: "cuda:0"
cpu_threads: 28
guppy_barcoder:
path: "/home/users/sbusi/apps/ont-guppy/bin"
bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_barcoder"
version: "3.4.5+fb1fbfb"
records_per_fastq: 8000
threads: 8
nanostats:
short_reads_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/data/raw/short_reads"
#short_reads_prefix: "/mnt/isilon/projects/lcsb_sequencing/transfer/bioecosystem/Rashi/2019/Apr/fastq"
metaT_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/data/metaT"
fastp:
min_length: 40
minimap2:
threads: 16
igc:
uri: "parrot.genomics.cn/gigadb/pub/10.5524/100001_101000/100064/1.GeneCatalogs/IGC.fa.gz"
hg38:
uri: "ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.38_GRCh38.p12/GCF_000001405.38_GRCh38.p12_genomic.fna.gz"
genomecov:
bin: "bedtools genomecov"
compute_avg_coverage:
bin: "scripts/coverage.awk"
bwa:
threads: 24
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
samtools:
sort:
threads: 4
chunk_size: "4G"
view:
threads: 4
flye:
bin: "flye"
threads: 27
genome_size: "1g"
operams:
bin: "set +u; source ~/.bashrc; set -u; ml lang/Perl lang/R && perl /scratch/users/claczny/ont/apps/software/OPERA-MS/OPERA-MS.pl"
threads: 28
megahit:
threads: 28
nonpareil:
memory: 4096
threads: 14
medaka:
threads: 28
racon:
threads: 28
rebaler:
threads: 28
diamond:
threads: 28
db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
metaspades:
threads: 28
mmseq2:
threads: 24
# Define sample names
#samples: ["flye", "megahit", "metaspades_hybrid"]
# samples: ["flye", "megahit"]
# samples: ["metaspades_hybrid"]
binning_samples: ["flye", "megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "bwa_merged_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
# Hybrid assembler
hybrid_assembler: "metaspades_hybrid"
# Directory where fastq files are
#data_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/Binning"
# Directory to save the output to
#results_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/Binning"
# Number of cpus or threads to use
threads: 28
# Path to the the 140GB Kraken2 database
kraken2_database: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
# Path to DAS_Tool
DAS_Tool:
path: "/home/users/sbusi/apps/DAS_Tool-master"
bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
# Path to DAS_Tool database
dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
# Mapping options
bwa:
threads: 24
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
samtools:
sort:
threads: 4
chunk_size: "4G"
view:
threads: 4
minimap2:
threads: 24
# Path to GTDBTK database
GTDBTK:
DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
# Rscript path
Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
mmseqs:
path: "/home/users/sbusi/apps/mmseqs/bin"
createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
steps: "assembly_annotation metaT"
# steps: "assembly_annotation mmseq metaT mapping binning taxonomy"
data_dir: "data"
results_dir: "results"
db_dir: "dbs"
runs:
first: "S1_SizeSelected"
second: "S3_Gtube"
# third: "20181108_0827_test"
barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"]
assemblers: ["flye"]
p7zip:
bin: "/home/users/claczny/apps/software/p7zip_16.02/bin/7za"
threads: 4
ont_fast5_api:
single_to_multi_fast5:
bin: "single_to_multi_fast5"
batch: 8000
threads: 8
flowcell: "FLO-MIN106"
kit: "SQK-LSK109"
#barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"]
barcodes: ["barcode07"]
guppy_cpu:
path: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin"
bin: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin/guppy_basecaller"
version: "cpu-3.1.5"
config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
cpu_threads: 28
guppy_gpu:
path: "/home/users/sbusi/apps/ont-guppy/bin"
bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_basecaller"
version: "3.6.0+98ff765"
config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
hac_config: "dna_r9.4.1_450bps_hac.cfg"
records_per_fastq: 8000
chunk_size: 1000
chunks_per_runner: 1000
num_callers: 4
runners_per_device: 2
gpu_device: "cuda:0"
cpu_threads: 28
guppy_barcoder:
path: "/home/users/sbusi/apps/ont-guppy/bin"
bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_barcoder"
version: "3.4.5+fb1fbfb"
records_per_fastq: 8000
threads: 8
nanostats:
short_reads_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/data/raw/short_reads"
#short_reads_prefix: "/mnt/isilon/projects/lcsb_sequencing/transfer/bioecosystem/Rashi/2019/Apr/fastq"
metaT_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/data/metaT"
fastp:
min_length: 40
minimap2:
threads: 16
igc:
uri: "parrot.genomics.cn/gigadb/pub/10.5524/100001_101000/100064/1.GeneCatalogs/IGC.fa.gz"
hg38:
uri: "ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.38_GRCh38.p12/GCF_000001405.38_GRCh38.p12_genomic.fna.gz"
genomecov:
bin: "bedtools genomecov"
compute_avg_coverage:
bin: "scripts/coverage.awk"
bwa:
threads: 24
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
samtools:
sort:
threads: 4
chunk_size: "4G"
view:
threads: 4
flye:
bin: "flye"
threads: 27
genome_size: "1g"
operams:
bin: "set +u; source ~/.bashrc; set -u; ml lang/Perl lang/R && perl /scratch/users/claczny/ont/apps/software/OPERA-MS/OPERA-MS.pl"
threads: 28
megahit:
threads: 28
nonpareil:
memory: 4096
threads: 14
medaka:
threads: 28
racon:
threads: 28
rebaler:
threads: 28
diamond:
threads: 28
db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
metaspades:
threads: 28
mmseq2:
threads: 24
# Define sample names
#samples: ["flye", "megahit", "metaspades_hybrid"]
# samples: ["flye", "megahit"]
# samples: ["metaspades_hybrid"]
binning_samples: ["flye", "megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "bwa_merged_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
# Hybrid assembler
hybrid_assembler: "metaspades_hybrid"
# Directory where fastq files are
#data_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/Binning"
# Directory to save the output to
#results_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/Binning"
# Number of cpus or threads to use
threads: 28
# Path to the the 140GB Kraken2 database
kraken2_database: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
# Path to DAS_Tool
DAS_Tool:
path: "/home/users/sbusi/apps/DAS_Tool-master"
bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
# Path to DAS_Tool database
dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
# Mapping options
bwa:
threads: 24
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
samtools:
sort:
threads: 4
chunk_size: "4G"
view:
threads: 4
minimap2:
threads: 24
# Path to GTDBTK database
GTDBTK:
DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
# Rscript path
Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
......@@ -43,8 +43,13 @@ if 'taxonomy' in STEPS:
include: "workflows/taxonomy.smk"
TARGETS += ["taxonomy_for_ont.done"]
else:
raise Exception('You are not serious. No input data')
if 'analysis' in STEPS:
include: "workflows/analysis.smk"
TARGETS += ["analysis.done"]
#else:
# raise Exception('You are not serious. No input data')
# print("No input data provided")
rule all:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment