Skip to content
Snippets Groups Projects

WIP: Checkpoint snakefile

Merged Susheel Busi requested to merge checkpoint_snakefile into master
Compare and Show latest version
32 files
+ 854
337
Compare changes
  • Side-by-side
  • Inline
Files
32
+ 48
13
# steps: "assembly_annotation mapping metaT mmseq binning taxonomy analysis"
steps: "binning taxonomy"
steps: ["assembly_annotation", "mapping", "metaT", "mmseq", "binning", "taxonomy", "analysis"]
# analysis_steps: ["cdhit", "mappability", "crispr", "plasmids", "amr"]
analysis_steps: ["cdhit", "mappability", "crispr", "plasmids", "amr"]
# working directory containing all relevant data,
# i.e. prefix for data, results, DBs etc.
work_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB"
data_dir: "data"
results_dir: "results"
db_dir: "dbs"
runs:
first: "S1_SizeSelected"
second: "S3_Gtube"
# third: "20181108_0827_test"
assemblers: ["flye"]
# assemblers: ["flye"]
assemblers: ["flye", "megahit", "metaspades", "metaspades_hybrid"]
p7zip:
bin: "/home/users/claczny/apps/software/p7zip_16.02/bin/7za"
threads: 4
@@ -18,8 +28,10 @@ ont_fast5_api:
threads: 8
flowcell: "FLO-MIN106"
kit: "SQK-LSK109"
#barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"]
barcodes: ["no_barcode"]
guppy_cpu:
path: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin"
bin: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin/guppy_basecaller"
@@ -45,10 +57,13 @@ guppy_barcoder:
version: "3.4.5+fb1fbfb"
records_per_fastq: 8000
threads: 8
nanostats:
short_reads_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/data/raw/short_reads"
#short_reads_prefix: "/mnt/isilon/projects/lcsb_sequencing/transfer/bioecosystem/Rashi/2019/Apr/fastq"
metaT_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/data/metaT"
fastp:
min_length: 40
minimap2:
@@ -91,15 +106,15 @@ rebaler:
threads: 28
diamond:
threads: 28
db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
#db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
db: "/work/projects/ecosystem_biology/local_tools/databases/nr_uniprot_trembl.dmnd"
metaspades:
threads: 28
mmseq2:
threads: 24
# Define sample names
#samples: ["flye", "megahit", "metaspades_hybrid"]
# samples: ["flye", "megahit"]
# samples: ["metaspades_hybrid"]
samples: ["ONT3_MG_xx_Rashi_S11"]
binning_samples: ["flye", "megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "bwa_merged_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
# Hybrid assembler
@@ -119,10 +134,11 @@ kraken2_database: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
# Path to DAS_Tool
DAS_Tool:
path: "/home/users/sbusi/apps/DAS_Tool-master"
bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
path: "/home/users/sbusi/apps/DAS_Tool-master"
bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
# Path to DAS_Tool database
# TODO: mv to DAS_Tool
dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
# Mapping options
@@ -144,10 +160,29 @@ GTDBTK:
DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
# Rscript path
# TODO: mv to DAS_Tool
Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
# XXX
mmseqs:
path: "/home/users/sbusi/apps/mmseqs/bin"
createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
path: "/home/users/sbusi/apps/mmseqs/bin"
createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
# CRISPR
CASC:
PATH: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/bin"
PERL5LIB: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/lib/site_perl"
minced:
PATH: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/minced/"
# Plasmid prediction
plasflow:
threshold: 0.7 # class. prob. threshold
minlen: 1000 # rm contigs with length below this threshold
# AMR prediction
rgi:
db_url: "https://card.mcmaster.ca/latest/data"
alignment_tool: "DIAMOND" # DIAMOND or BLAST
Loading