Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
config.yaml 5.50 KiB
############################################################
# STEPS

# Steps to be done
steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps_annotation: ["rgi", "plasflow", "minced", "barrnap", "kegg"] # prodigal is run in any case
steps_analysis: ["quast", "mash", "mashmap", "fastani", "mummer", "cdhit", "diamond", "cov"]
steps_taxonomy: ["kraken2", "kaiju"]

############################################################
# INPUT

# working directory: will contain the results (should be writeable)
work_dir: "/scratch/users/vgalata/GDB"

# Paths WITHIN the working directory
# directory containing required DBs (should be writeable)
db_dir: "/mnt/lscratch/users/vgalata/ONT_pilot_DBs"
# results directory (will be created in work_dir)
results_dir: "results"

# Data paths: Use absolute paths or paths relative to the working directory !!!
data:
    # Meta-genomics
    metag:
        sr: 
            r1: "/mnt/isilon/projects/ecosystem_biology/ONT_pilot/GDB_2019/metag/sr/ONT3_MG_xx_Rashi_S11_R1_001.fastq.gz"
            r2: "/mnt/isilon/projects/ecosystem_biology/ONT_pilot/GDB_2019/metag/sr/ONT3_MG_xx_Rashi_S11_R2_001.fastq.gz"
        ont:
            fastq: "/scratch/users/vgalata/GDB/basecalling/lr.fastq.gz"
    # Meta-transcriptomics
    metat:
        sr:
            r1: "/mnt/isilon/projects/ecosystem_biology/ONT_pilot/GDB_2019/metat/sr/FastSelectFull1_MT_Rashi_S14_R1_001.fastq.gz" # leave empty if no data, i.e. ""
            r2: "/mnt/isilon/projects/ecosystem_biology/ONT_pilot/GDB_2019/metat/sr/FastSelectFull1_MT_Rashi_S14_R2_001.fastq.gz" # leave empty if no data, i.e. ""
    # Meta-proteomics
    # metap:
        # TODO

############################################################
# TOOLS

##############################
# Preprocessing

# https://github.com/OpenGene/fastp
fastp:
    threads: 10
    min_length: 40

# https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
fastqc:
    threads: 10

##############################
# Assembly

# List of assemblers for different read types: assembler names MUST be UNIQUE
assemblers:
    sr: ["megahit", "metaspades"]
    lr: ["flye", "canu"]
    hy: ["metaspadeshybrid", "operamsmegahit", "operamsmetaspades"]

# https://github.com/fenderglass/Flye
flye:
    threads: 10

# https://canu.readthedocs.io/en/latest/
canu:
    threads: 24