diff --git a/2019_GDB/config/CONFIG.yaml b/2019_GDB/config/CONFIG.yaml index 0499477e5502c1d1788f5c88267530f5c90403d5..7647f99649656de25490669bb20210bd0216a914 100755 --- a/2019_GDB/config/CONFIG.yaml +++ b/2019_GDB/config/CONFIG.yaml @@ -1,20 +1,23 @@ -# steps: "assembly_annotation mapping metaT mmseq binning taxonomy analysis" -# steps: "binning taxonomy analysis" -steps: "analysis" +steps: ["assembly_annotation", "mapping", "metaT", "mmseq", "binning", "taxonomy", "analysis"] + # analysis_steps: ["cdhit", "mappability", "crispr", "plasmids", "amr"] -analysis_steps: ["plasmids", "amr"] +analysis_steps: ["cdhit", "mappability", "crispr", "plasmids", "amr"] + # working directory containing all relevant data, # i.e. prefix for data, results, DBs etc. work_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB" data_dir: "data" results_dir: "results" db_dir: "dbs" + runs: first: "S1_SizeSelected" second: "S3_Gtube" # third: "20181108_0827_test" + # assemblers: ["flye"] assemblers: ["flye", "megahit", "metaspades", "metaspades_hybrid"] + p7zip: bin: "/home/users/claczny/apps/software/p7zip_16.02/bin/7za" threads: 4 @@ -25,8 +28,10 @@ ont_fast5_api: threads: 8 flowcell: "FLO-MIN106" kit: "SQK-LSK109" + #barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"] barcodes: ["no_barcode"] + guppy_cpu: path: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin" bin: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin/guppy_basecaller" @@ -52,10 +57,13 @@ guppy_barcoder: version: "3.4.5+fb1fbfb" records_per_fastq: 8000 threads: 8 + nanostats: + short_reads_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/data/raw/short_reads" #short_reads_prefix: "/mnt/isilon/projects/lcsb_sequencing/transfer/bioecosystem/Rashi/2019/Apr/fastq" metaT_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/data/metaT" + fastp: min_length: 40 minimap2: @@ -104,6 +112,7 @@ metaspades: threads: 28 mmseq2: threads: 24 + # Define sample names #samples: ["flye", "megahit", "metaspades_hybrid"] # samples: ["flye", "megahit"] @@ -127,10 +136,11 @@ kraken2_database: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/" # Path to DAS_Tool DAS_Tool: - path: "/home/users/sbusi/apps/DAS_Tool-master" - bin: "/home/users/sbusi/apps/DAS_Tool-master/src/" + path: "/home/users/sbusi/apps/DAS_Tool-master" + bin: "/home/users/sbusi/apps/DAS_Tool-master/src/" # Path to DAS_Tool database +# TODO: mv to DAS_Tool dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/" # Mapping options @@ -152,13 +162,15 @@ GTDBTK: DATA: "/home/users/sbusi/apps/db/gtdbtk/release89" # Rscript path +# TODO: mv to DAS_Tool Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/" +# XXX mmseqs: - path: "/home/users/sbusi/apps/mmseqs/bin" - createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb" - rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh" - convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis" + path: "/home/users/sbusi/apps/mmseqs/bin" + createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb" + rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh" + convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis" # CRISPR CASC: diff --git a/2019_GDB/updated_SNAKEFILE b/2019_GDB/updated_SNAKEFILE index c2d004e0cd876bb0243daa0574d14b30891a4aad..9ab57599bc2ba5958290d74d3ccc113e7b1156f6 100755 --- a/2019_GDB/updated_SNAKEFILE +++ b/2019_GDB/updated_SNAKEFILE @@ -1,5 +1,8 @@ # File for running ONT analyses +import os +from tempfile import TemporaryDirectory + # default configuration file configfile:"config/CONFIG.yaml" @@ -9,16 +12,13 @@ DATA_DIR = config["data_dir"] RESULTS_DIR = config["results_dir"] DB_DIR = config["db_dir"] -RUNS = config['runs']['first'] STEPS = config['steps'] ANALYSIS_STEPS = config["analysis_steps"] -BARCODES = config["barcodes"] -# NOTE: do NOT set SAMPLES (used in some snakemake files) -ASSEMBLERS = config["assemblers"] -MAPPERS = ["bwa", "mmi"] -BINNING_SAMPLES = config["binning_samples"] -HYBRID_ASSEMBLER = config["hybrid_assembler"] +BARCODES=config["barcodes"] + +ASSEMBLERS=config["assemblers"] +HYBRID_ASSEMBLER=config["hybrid_assembler"] # default executable for snakmake shell.executable("bash")