updated ganges config.yaml

7d13fd2c · Valentina Galata · ee9d2237 · 7d13fd2c
Commit 7d13fd2c authored 4 years ago by Valentina Galata
--- a/config/ganges/config.yaml
+++ b/config/ganges/config.yaml
+############################################################
+# STEPS
+
+# Pipeline steps to be done: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
+steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
+
+############################################################
+# INPUT
+
+# working directory: will contain the results (should be writeable)
+work_dir: "/scratch/users/sbusi/ONT/ganges"
+
+# Paths WITHIN the working directory
+# directory containing required DBs (should be writeable)
+db_dir: "dbs"
+# results directory (will be created in work_dir)
+results_dir: "results"
+
+# Data paths: Use absolute paths or paths relative to the working directory !!!
+data:
+    # Meta-genomics
+    metag:
+        sr: 
+            r1: "data/sr/ERR2612536/ERR2612536_1.fastq.gz"
+            r2: "data/sr/ERR2612536/ERR2612536_2.fastq.gz"
+        ont:
+            # List of directories containing FAST5 files
+            dirs: ["data/lr/SO_6051_metagenome"] # leave empty if no data, i.e. []
+            # List of FAST5 files
+            files: [] # leave empty if no data, i.e. []
+            # FastQ: if given NO basecalling will be done !!!
+            fastq: "" # leave empty if no data, i.e. ""
+    # Meta-transcriptomics
+    metat:
+        sr:
+            r1: "" # leave empty if no data, i.e. ""
+            r2: "" # leave empty if no data, i.e. ""
+    # Meta-proteomics
+    metap:
+        # TODO
+
+############################################################
+# TOOLS
+
+##############################
+# Preprocessing
+
+# TODO: installation
+# Preprocessing: LR: Basecalling
+# XXX
+guppy:
+    config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
+    gpu:
+        path: "/home/users/sbusi/apps/ont-guppy/bin"
+        bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_basecaller"
+        version: "3.6.0+98ff765"
+        records_per_fastq: 8000
+        chunk_size: 1000
+        chunks_per_runner: 1000
+        num_callers: 4
+        runners_per_device: 2
+        gpu_device: "cuda:0"
+        threads: 20
+
+# Preprocessing: SR
+# XXX
+fastp:
+    threads: 10
+    min_length: 40
+
+# FastQ QC
+# https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+fastqc:
+    threads: 10
+    params: "-q -f fastq"
+
+##############################
+# Assembly
+
+# List of assemblers for different read types
+assemblers:
+    sr: ["megahit", "metaspades"]
+    lr: ["flye"]
+    hy: ["metaspadeshybrid", "operams"]
+
+# XXX
+flye:
+    threads: 10
+    genome_size: "1g"
+
+# XXX
+metaspades:
+    threads: 10
+
+# XXX
+megahit:
+    threads: 10
+
+# TODO: installation
+# https://github.com/CSB5/OPERA-MS
+operams:
+    threads: 10
+    bin: "/home/users/sbusi/apps/miniconda3/envs/operams/OPERA-MS/OPERA-MS.pl"
+
+##############################
+# Long-read assembly polishing
+
+# XXX
+medaka:
+    threads: 10 # do NOT set to large value (e.g. using 30 did not work)
+    model: r941_min_high # the MinION model, high accuarcy
+
+# XXX
+racon:
+    threads: 30
+
+##############################
+# Mapping
+
+# Mapper
+# http://bio-bwa.sourceforge.net/
+bwa:
+    threads: 10
+    long_reads_index:
+        opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
+
+# SAM utils
+# http://www.htslib.org/doc/samtools.html
+samtools:
+    sort:
+        # threads: 10
+        chunk_size: "4G"
+    view:
+        # threads: 10
+
+##############################
+# Annotation
+
+# TODO: data download
+# Sequence search
+# XXX
+diamond:
+    threads: 20
+    #db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
+    db: "/work/projects/ecosystem_biology/local_tools/databases/nr_uniprot_trembl.dmnd"
+
+# CRISPR
+# https://github.com/dnasko/CASC
+casc:
+    threads: 10
+    # path: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/bin"
+    # perl5lib: "/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/lib/site_perl"
+
+# CRISPR
+# https://github.com/ctSkennerton/minced
+minced:
+    # path: "$PATH:/mnt/lscratch/users/sbusi/ONT/cedric_ont_basecalling/2019_GDB/crispr/minced/"
+
+# Plasmid prediction
+# https://github.com/smaegol/PlasFlow
+plasflow:
+    threshold: 0.7 # class. prob. threshold
+    minlen: 1000 # rm contigs with length below this threshold
+
+# AMR prediction
+# https://github.com/arpcard/rgi
+rgi:
+    threads: 5
+    db_url: "https://card.mcmaster.ca/latest/data"
+    alignment_tool: "DIAMOND" # DIAMOND or BLAST
+
+##############################
+# Analysis
+
+# XXX
+bbmap:
+    threads: 10
+
+# Assembly quality
+# XXX
+quast:
+    threads: 10
+
+# Sequence search and clustering
+# https://github.com/soedinglab/MMseqs2
+mmseqs2:
+    threads: 30
+    # path: "/home/users/sbusi/apps/mmseqs/bin"
+    # createdb: "/home/users/sbusi/apps/mmseqs/bin/mmseqs createdb"
+    # rbh: "/home/users/sbusi/apps/mmseqs/bin/mmseqs rbh"
+    # convertalis: "/home/users/sbusi/apps/mmseqs/bin/mmseqs convertalis"
+
+# Seq. alignment
+# https://mummer4.github.io/
+# mummer:
+    # path: "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/mummer/mummer-3.9.4alpha/bin"
+
+##############################
+# Taxonomy
+
+# https://ccb.jhu.edu/software/kraken2/
+# https://github.com/DerrickWood/kraken2
+kraken2:
+    threads: 10
+    db:
+        maxikraken: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
+    class:
+        sr: "--gzip-compressed --paired"
+        lr: "" # TODO
+        contigs: "" # TODO
+
+# # XXX
+# GTDBTK:
+#     DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
+
+##############################
+# MISC
+
+# https://github.com/marbl/mash
+mash:
+    threads: 10
+
+##############################
+# Binning
+
+# DAS_Tool:
+#     path: "/home/users/sbusi/apps/DAS_Tool-master"
+#     bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
+#     db: "/home/users/sbusi/apps/DAS_Tool-master/db/"
+#     Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
+# # Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
+# # dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
+
+##############################
+# ???
+# nonpareil:
+#     memory: 4096
+#     threads: 14
+
+# rebaler:
+#     threads: 28
\ No newline at end of file