added slurm.yaml for Zymo-GridION-EVEN-BB-SN; rm not used configs

c6afa55b · Valentina Galata · 82fd036f · c6afa55b · 82fd036f · 82fd036f
Commit c6afa55b authored 4 years ago by Valentina Galata
--- a/config/slurm.yaml
+++ b/config/slurm.yaml
--- a/config/config.yaml
+++ b/config/config.yaml
-############################################################
-# STEPS
-
-# Pipeline steps
-# NOTE: no binning and taxonomic analysis
-# steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis"]
-steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis"]
-
-# NOTE: currently not used
-# Annotation sub-steps
-# annotation_steps: ["plasmids", "crispr", "amr"]
-
-# NOTE: currently not used
-# Analysis sub-steps
-# analysis_steps: ["quast", "prodigal", "cdhit", "mmseqs2"]
-
-############################################################
-# INPUT
-
-# working directory: will contain the results
-# work_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB"
-work_dir: "/scratch/users/vgalata/ont_pilot"
-
-# Paths WITHIN the working directory
-# directory containing required DBs
-db_dir: "dbs"
-# results directory
-results_dir: "results"
-
-# Data paths: Use absolute paths or paths relative to the working directory !!!
-data:
-    # Meta-genomics
-    metag:
-        sr: 
-            r1: "data/raw/short_reads/ONT3_MG_xx_Rashi_S11_R1_001.fastq.gz"
-            r2: "data/raw/short_reads/ONT3_MG_xx_Rashi_S11_R2_001.fastq.gz"
-        ont:
-            # List of directories containing FAST5 files
-            dirs: ["data/multifast5"]
-            # List of FAST5 files
-            files: []
-    # Meta-transcriptomics
-    metat:
-        sr:
-            r1: "data/metaT/FastSelectFull1_MT_Rashi_S14_R1_001.fastq.gz" # leave empty if no data, i.e. ""
-            r2: "data/metaT/FastSelectFull1_MT_Rashi_S14_R2_001.fastq.gz" # leave empty if no data, i.e. ""
-    # Meta-proteomics
-    metap:
-        # TODO
-
-# binning_samples: ["flye", "megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "bwa_merged_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
-
-############################################################
-# TOOLS
-
-##############################
-# Preprocessing
-
-# TODO: installation
-# Preprocessing: LR: Basecalling
-# XXX
-guppy:
-    config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
-    gpu:
-        path: "/home/users/sbusi/apps/ont-guppy/bin"
-        bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_basecaller"
-        version: "3.6.0+98ff765"
-        records_per_fastq: 8000
-        chunk_size: 1000
-        chunks_per_runner: 1000
-        num_callers: 4
-        runners_per_device: 2
-        gpu_device: "cuda:0"
-        threads: 20
-
-# Preprocessing: SR
-# XXX
-fastp:
-    threads: 10
-    min_length: 40
-
-# FastQ QC
-# https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
-fastqc:
-    threads: 10
-    params: "-q -f fastq"
-
-##############################
-# Assembly
-
-# List of assemblers for different read types
-assemblers:
-    sr: ["megahit", "metaspades"]
-    lr: ["flye"]
-    hy: ["metaspadeshybrid", "operams"]
-
-# XXX
-flye:
-    threads: 10
-    genome_size: "1g"
-
-# XXX
-metaspades:
-    threads: 10
-
-# XXX
-megahit:
-    threads: 10
-
-# TODO: installation
-# https://github.com/CSB5/OPERA-MS
-operams:
-    threads: 10
-    bin: "/home/users/sbusi/apps/miniconda3/envs/operams/OPERA-MS/OPERA-MS.pl"
-
-##############################
-# Long-read assembly polishing
-
-# XXX
-medaka:
-    threads: 10 # do NOT set to large value (e.g. using 30 did not work)
-    model: r941_min_high # the MinION model, high accuarcy
-
-# XXX
-racon:
-    threads: 30
-
-##############################
-# Mapping
-
-# Mapper
-# http://bio-bwa.sourceforge.net/
-bwa:
-    threads: 10
-    long_reads_index:
-        opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
-
-# SAM utils
-# http://www.htslib.org/doc/samtools.html
-samtools:
-    sort:
-        # threads: 10
-        chunk_size: "4G"
-    view:
-        # threads: 10
-
-##############################
-# Annotation
-
-# TODO: data download
-# Sequence search
-# XXX
-diamond:
-    threads: 20
-    #db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
-    db: "/work/projects/ecosystem_biology/local_tools/databases/nr_uniprot_trembl.dmnd"
-
-# CRISPR
-# https://github.com/dnasko/CASC
-casc:
-    threads: 10
-    # path: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/bin"
-    # perl5lib: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/lib/site_perl"
-
-# CRISPR
-# https://github.com/ctSkennerton/minced
-minced:
-    # path: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/minced/"
-
-# Plasmid prediction
-# https://github.com/smaegol/PlasFlow
-plasflow:
-    threshold: 0.7 # class. prob. threshold
-    minlen: 1000 # rm contigs with length below this threshold
-
-# AMR prediction
-# https://github.com/arpcard/rgi
-rgi:
-    threads: 5
-    db_url: "https://card.mcmaster.ca/latest/data"
-    alignment_tool: "DIAMOND" # DIAMOND or BLAST
-
-##############################
-# Analysis
-
-# XXX
-bbmap:
-    threads: 10
-
-# Assembly quality
-# XXX
-quast:
-    threads: 10
-
-# Sequence search and clustering
-# https://github.com/soedinglab/MMseqs2
-mmseqs2:
-    threads: 30
-    # path: "/home/users/sbusi/apps/mmseqs/bin"
-    # createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
-    # rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
-    # convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
-
-# Seq. alignment
-# https://mummer4.github.io/
-mummer:
-    archive: "https://github.com/mummer4/mummer/releases/download/v3.9.4alpha/mummer-3.9.4alpha.tar.gz"
-
-##############################
-# Taxonomy
-
-# XXX
-# kraken2:
-#     db: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
-
-# # XXX
-# GTDBTK:
-#     DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
-
-##############################
-# Binning
-
-# DAS_Tool:
-#     path: "/home/users/sbusi/apps/DAS_Tool-master"
-#     bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
-#     db: "/home/users/sbusi/apps/DAS_Tool-master/db/"
-#     Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
-# # Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
-# # dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
-
-##############################
-# ???
-# nonpareil:
-#     memory: 4096
-#     threads: 14
-
-# rebaler:
-#     threads: 28
\ No newline at end of file
--- a/config/figures.yml
+++ b/config/figures.yml
-# general
-data_path: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/"
-output: "output" # output directory
-utils: "src/utils.R" # path to utils.R
-assemblers: ["flye", "megahit", "metaspades", "metaspades_hybrid"]
-
-# figures
-fig_mmseq_upsetr:
-    script: "src/fig_mmseq_upsetr.R"
-    input:
-        overlap_sizes: "data/overlap_sizes.txt"
-    output: "fig_mmseq_upsetr.pdf"
-    width: 7
-    height: 5
-
-fig_quast:
-    script: "src/fig_quast.R"
-    input:
-        stats: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/quast_methylation_v_non_comparison/quast_comparision_meth_v_non_results/report.tsv"
-    output: "fig_quast.pdf"
-    width: 7
-    height: 10
-
-fig_partial_genes:
-    script: "src/fig_partial_genes.R"
-    input:
-        counts: "data/2019_GDB_protein_partial_genes.txt"
-    output: "fig_partial_genes.pdf"
-    width: 7
-    height: 5
-
-fig_nanostats:
-    script: "src/fig_nanostats.R"
-    input:
-        stats: "data/nanostats_summary.tsv"
-    output: "fig_nanostats.pdf"
-    width: 7
-    height: 9
-
-fig_crispr:
-    script: "src/fig_crispr.R"
-    input:
-        stats: "data/crispr_summary.tsv"
-    output: "fig_crispr.pdf"
-    width: 7
-    height: 5
-
-fig_plasflow:
-    script: "src/fig_plasflow.R"
-    input:
-        stats: "data/plasflow_summary.tsv"
-    output: "fig_plasflow.pdf"
-    width: 7
-    height: 5
-
-fig_rgi:
-    script: "src/fig_rgi.R"
-    input:
-        stats: "data/rgi_summary.tsv"
-    output: "fig_rgi.pdf"
-    width: 10
-    height: 7
\ No newline at end of file
--- a/config/sbatch.sh
+++ b/config/sbatch.sh
-#!/bin/bash -l
-
-##############################
-# SLURM
-# NOTE: used for this script only, NOT for the snakemake call below
-
-#SBATCH -J ONT_pilot
-#SBATCH -N 1
-#SBATCH -n 1
-#SBATCH -c 1
-#SBATCH --time=2-00:00:00
-#SBATCH -p batch
-#SBATCH --qos=qos-batch
-
-##############################
-# SNAKEMAKE
-
-# conda env name
-ONTP_ENV="ONT_pilot"
-# number of cores for snakemake
-ONTP_CORES=60
-# snakemake file
-ONTP_SMK="workflow/Snakefile"
-# config file
-ONTP_CONFIG="config/config.yaml" # USER INPUT REQUIRED
-# slurm config file
-ONTP_SLURM="config/slurm.yaml"
-# slurm cluster call
-ONTP_CLUSTER="-p {cluster.partition} -q {cluster.qos} {cluster.explicit} -N {cluster.nodes} -n {cluster.n} -c {threads} -t {cluster.time} --job-name={cluster.job-name}"
-
-##############################
-# IMP
-
-# activate the env
-conda activate ${ONTP_ENV}
-
-# run the pipeline
-snakemake -s ${ONTP_SMK} -rp --cores ${ONTP_CORES} --configfile ${ONTP_CONFIG} \
--use-conda --conda-prefix ${CONDA_PREFIX}/pipeline \
--cluster-config ${ONTP_SLURM} --cluster "sbatch ${ONTP_CLUSTER}"