Skip to content
Snippets Groups Projects
Commit f286f75b authored by Susheel Busi's avatar Susheel Busi
Browse files

minor changes

parent b143371e
No related branches found
No related tags found
No related merge requests found
steps: "assembly_annotation mmseq metaT mapping binning taxonomy"
data_dir: "data"
results_dir: "results"
db_dir: "dbs"
runs:
first: "20181106_1450_noselection_sizeselection"
second: "20181107_0906_same"
third: "20181108_0827_test"
barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"]
assemblers: ["flye"]
p7zip:
bin: "/home/users/claczny/apps/software/p7zip_16.02/bin/7za"
threads: 4
ont_fast5_api:
single_to_multi_fast5:
bin: "single_to_multi_fast5"
batch: 8000
threads: 8
flowcell: "FLO-MIN106"
kit: "SQK-LSK108"
#barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"]
barcodes: ["barcode07"]
guppy_cpu:
path: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin"
bin: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin/guppy_basecaller"
version: "cpu-3.1.5"
config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
cpu_threads: 28
guppy_gpu:
path: "/home/users/sbusi/apps/ont-guppy/bin"
bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_basecaller"
version: "3.4.5+fb1fbfb"
config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
hac_config: "dna_r9.4.1_450bps_hac.cfg"
records_per_fastq: 8000
chunk_size: 1000
chunks_per_runner: 1000
num_callers: 4
runners_per_device: 2
gpu_device: "cuda:0"
cpu_threads: 28
guppy_barcoder:
path: "/home/users/sbusi/apps/ont-guppy/bin"
bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_barcoder"
version: "3.4.5+fb1fbfb"
records_per_fastq: 8000
threads: 8
nanostats:
#short_reads_prefix: "/scratch/users/claczny/ont/fecal_pilot/data/raw/short_reads"
short_reads_prefix: "/mnt/isilon/projects/lcsb_sequencing/transfer/bioecosystem/Rashi/2019/Apr/fastq"
metaT_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/metaT/2018_GDB"
#samples: ["Kapa1_MG_S18", "Kapa2_MG_S19", "NEB1_MG_S16", "NEB2_MG_S17", "NEBmod1_MG_Rashi_S22", "NEBmod2_MG_Rashi_S23"]
fastp:
min_length: 40
minimap2:
threads: 16
igc:
uri: "parrot.genomics.cn/gigadb/pub/10.5524/100001_101000/100064/1.GeneCatalogs/IGC.fa.gz"
hg38:
uri: "ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.38_GRCh38.p12/GCF_000001405.38_GRCh38.p12_genomic.fna.gz"
genomecov:
bin: "bedtools genomecov"
compute_avg_coverage:
bin: "scripts/coverage.awk"
bwa:
threads: 24
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
samtools:
sort:
threads: 4
chunk_size: "4G"
view:
threads: 4
flye:
bin: "flye"
threads: 27
genome_size: "1g"
operams:
bin: "set +u; source ~/.bashrc; set -u; ml lang/Perl lang/R && perl /scratch/users/claczny/ont/apps/software/OPERA-MS/OPERA-MS.pl"
threads: 28
megahit:
threads: 28
nonpareil:
memory: 4096
threads: 14
medaka:
threads: 28
racon:
threads: 28
rebaler:
threads: 28
diamond:
threads: 28
db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
metaspades:
threads: 28
mmseq2:
threads: 24
# Define sample names
# samples: ["flye", "megahit", "metaspades"]
# samples: ["flye", "megahit"]
samples: ["metaspades_hybrid"]
#binning_samples: ["flye", "megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "bwa_merged_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
binning_samples: ["megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
# Hybrid assembler
hybrid_assembler: "metaspades_hybrid"
# Directory where fastq files are
#data_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/Binning"
# Directory to save the output to
#results_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/Binning"
# Number of cpus or threads to use
threads: 28
# Path to the the 140GB Kraken2 database
kraken2_database: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
# Path to DAS_Tool
DAS_Tool:
path: "/home/users/sbusi/apps/DAS_Tool-master"
bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
# Path to DAS_Tool database
dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
# Mapping options
bwa:
threads: 24
long_reads_index:
opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
samtools:
sort:
threads: 4
chunk_size: "4G"
view:
threads: 4
minimap2:
threads: 24
# Path to GTDBTK database
GTDBTK:
DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
# Rscript path
Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
# File for running ONT analyses
# default configuration file
configfile:"config/CONFIG.yaml"
# default executable for snakmake
shell.executable("bash")
# input settings
# RUNS=os.environment.get("RUNS", config['runs']['first']).split()
# STEPS=sorted(os.environment.get("STEPS", config['steps']).split())
RUNS=config['runs']['first']
STEPS=config['steps']
# include rules for the workflows based on "steps" in the CONFIG.yaml file
# ONT analyses workflow
TARGETS = []
if 'assembly_annotation' in STEPS:
include: "workflows/assembly_annotation.smk"
TARGETS += ["assemble_and_coverage.done",
"annotate.done",
"basecall_merge_qc.done",
"coverage_of_references.done",
"prodigal_gene_call.done",
"diamond_proteins.done"]
if 'mmseq' in STEPS:
include: "workflows/mmseq.smk"
TARGETS += [expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/{assembler}_db"), assembler=["flye", "megahit", "metaspades_hybrid"]),
expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/flye_megahit_rbh")),
expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/flye_metaspades_hybrid_rbh")),
expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/megahit_metaspades_hybrid_rbh")),
expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/flye_megahit.m8")),
expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/flye_metaspades_hybrid.m8")),
expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/megahit_metaspades_hybrid.m8")),
expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/plot_files_ready.done")),
expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/upset_plots.done"))]
if 'metaT' in STEPS:
include: "workflows/metat.smk"
TARGETS += [expand(os.path.join(RESULTS_DIR, "preprocessing/metaT/{metaT_sample}.fastp.{report_type}"), metaT_sample="GDB_2018_metaT", report_type=["html", "json"]),
expand(os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", assembler="megahit"),
expand(os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", barcode=BARCODES, assembler="metaspades_hybrid"),
expand(os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}_reads-x-{barcode}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", barcode=BARCODES, assembler=ASSEMBLERS),
expand(os.path.join(RESULTS_DIR, "genomecov/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", assembler="megahit"),
expand(os.path.join(RESULTS_DIR, "genomecov/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", barcode=BARCODES, assembler="metaspades_hybrid"),
expand(os.path.join(RESULTS_DIR, "genomecov/metaT/lr/{metaT_sample}_reads-x-{barcode}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", barcode=BARCODES, assembler=ASSEMBLERS)]
if 'mapping' in STEPS:
include: "workflows/mapping.smk"
TARGETS += [expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"), hybrid_assembler=HYBRID_ASSEMBLER),
expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_{reads}_{hybrid_assembler}/{mapper}_{reads}_{hybrid_assembler}.bam"), mapper=MAPPERS, reads=["sr", "lr"], hybrid_assembler=HYBRID_ASSEMBLER),
expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"), hybrid_assembler=HYBRID_ASSEMBLER),
expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_merged_{hybrid_assembler}/{mapper}_merged_{hybrid_assembler}.bam"), mapper=MAPPERS, hybrid_assembler=HYBRID_ASSEMBLER)]
if 'binning' in STEPS:
include: "workflows/binning.smk"
TARGETS += [expand(os.path.join(RESULTS_DIR, "assembly/{sample}.fa"), sample=BINNING_SAMPLES),
expand(os.path.join(RESULTS_DIR, "Binning/{sample}/dastool_output/{sample}_proteins.faa"), sample=BINNING_SAMPLES)]
if 'taxonomy' in STEPS:
include: "workflows/taxonomy.smk"
TARGETS += [expand(os.path.join(RESULTS_DIR, "Binning/checkm_output/{sample}_output.txt"), sample=BINNING_SAMPLES),
expand(os.path.join(RESULTS_DIR, "Binning/gtdbtk_output/{sample}/gtdbtk.bac120.summary.tsv"), sample=BINNING_SAMPLES)]
else:
raise Exception('You are not serious. No input data')
rule all:
input:
TARGETS
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment