Skip to content
Snippets Groups Projects
Commit 3370f1b8 authored by Valentina Galata's avatar Valentina Galata
Browse files

updated GDB config files (issue #54)

parent 3abb643a
No related branches found
No related tags found
No related merge requests found
......@@ -3,7 +3,7 @@
# Steps to be done
# steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"]
steps: ["preprocessing"]
steps_annotation: ["diamond", "rgi", "plasflow", "minced", "barrnap"] # prodigal is run in any case
steps_analysis: ["quast", "cdhit", "mash_dist"]
steps_taxonomy: ["kraken2", "kaiju"]
......@@ -16,7 +16,7 @@ work_dir: "/scratch/users/vgalata/ont_pilot"
# Paths WITHIN the working directory
# directory containing required DBs (should be writeable)
db_dir: "dbs"
db_dir: "/mnt/lscratch/users/vgalata/ONT_pilot_DBs"
# results directory (will be created in work_dir)
results_dir: "results"
......@@ -161,7 +161,7 @@ samtools:
# https://github.com/bbuchfink/diamond
diamond:
threads: 20
db: "/work/projects/ecosystem_biology/local_tools/databases/nr_uniprot_trembl.dmnd" # TODO: data download
db: "nr_uniprot_trembl.dmnd" # file name in "dbs" folder
# CRISPR
# https://github.com/dnasko/CASC
......@@ -202,25 +202,19 @@ cdhit:
# https://github.com/BioInfoTools/BBMap/
bbmap:
threads: 10
# References to be used (w/ md5sums)
rrna_refs: [
# c0cd2aa2e84e3e3977859c34feb63cd5 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5.8s-database-id98.fasta
# 703e4c270ab0a578deb4800c33b36367 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5s-database-id98.fasta
# 8b4e6c6f17f6f35444a60fdc915e052c /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-16s-id95.fasta
# ca4edcdddb98d7868f93e2308e297704 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-23s-id98.fasta
# db6e72022cf650c4b33bd888b92a0391 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-16s-id90.fasta
# f347d2f8f8ffbfa28c785e3a9fe3db79 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-23s-id98.fasta
# 878a413765d09c3ec75409fb1d1573f1 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-18s-id95.fasta
# cbb973e63f52981bd591de0404df5839 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-28s-id98.fast
"/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5.8s-database-id98.fasta",
"/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5s-database-id98.fasta",
"/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-16s-id95.fasta",
"/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-23s-id98.fasta",
"/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-16s-id90.fasta",
"/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-23s-id98.fasta",
"/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-18s-id95.fasta",
"/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-28s-id98.fasta"
rrna_refs: [ # file names in "dbs" folder
"sortmerna/rfam-5.8s-database-id98.fasta",
"sortmerna/rfam-5s-database-id98.fasta",
"sortmerna/silva-arc-16s-id95.fasta",
"sortmerna/silva-arc-23s-id98.fasta",
"sortmerna/silva-bac-16s-id90.fasta",
"sortmerna/silva-bac-23s-id98.fasta",
"sortmerna/silva-euk-18s-id95.fasta",
"sortmerna/silva-euk-28s-id98.fasta"
]
host_refs: # keep empty if no host specific reads should be removed
# key: url of GZ archive
GCF_000001405.38_GRCh38.p12: "ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.38_GRCh38.p12/GCF_000001405.38_GRCh38.p12_genomic.fna.gz"
# Assembly quality
# https://github.com/ablab/quast
......@@ -246,8 +240,8 @@ quast:
# https://github.com/DerrickWood/kraken2
kraken2:
threads: 10
db:
maxikraken: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
db: # dir. name in "dbs" folder
maxikraken: "maxikraken2_1903_140GB"
class:
sr: "--gzip-compressed --paired"
lr: ""
......@@ -258,13 +252,14 @@ kraken2:
# https://github.com/bioinformatics-centre/kaiju
kaiju:
threads: 10
db: # key = basename of *.fmi
kaiju_db_nr_euk: "/mnt/isilon/projects/ecosystem_biology/databases/kaiju/kaiju_db_nr_euk_2020-05-25"
db: # dir. name in "dbs" folder
# key = basename of *.fmi
kaiju_db_nr_euk: "kaiju_db_nr_euk_2020-05-25"
ranks: ["phylum", "class", "order", "family", "genus", "species"]
# # XXX
# GTDBTK:
# DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
# XXX
GTDBTK: # dir. name in "dbs" folder
DATA: "gtdbtk_release89"
##############################
# MISC
......
......@@ -35,6 +35,30 @@ rm_rrna_bbmap:
n: 1
explicit: ""
rm_host_bbmap_sr_metat:
time: "00-4:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
rm_host_bbmap_sr_metag:
time: "00-4:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
rm_host_bbmap_lr_metag:
time: "00-4:00:00"
partition: "bigmem"
qos: "qos-bigmem"
nodes: 1
n: 1
explicit: ""
# Assembly
assembly_lr_flye:
time: "00-8:00:00"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment