diff --git a/config/GDB/config.yaml b/config/GDB/config.yaml index 48ffc94c9dca4767660bb2e59e4c27d1650072a0..5bddb151775bb4008a4d14230eb3878240c47722 100644 --- a/config/GDB/config.yaml +++ b/config/GDB/config.yaml @@ -3,7 +3,7 @@ # Steps to be done # steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"] -steps: ["preprocessing", "assembly", "mapping", "annotation", "analysis", "taxonomy"] +steps: ["preprocessing"] steps_annotation: ["diamond", "rgi", "plasflow", "minced", "barrnap"] # prodigal is run in any case steps_analysis: ["quast", "cdhit", "mash_dist"] steps_taxonomy: ["kraken2", "kaiju"] @@ -16,7 +16,7 @@ work_dir: "/scratch/users/vgalata/ont_pilot" # Paths WITHIN the working directory # directory containing required DBs (should be writeable) -db_dir: "dbs" +db_dir: "/mnt/lscratch/users/vgalata/ONT_pilot_DBs" # results directory (will be created in work_dir) results_dir: "results" @@ -161,7 +161,7 @@ samtools: # https://github.com/bbuchfink/diamond diamond: threads: 20 - db: "/work/projects/ecosystem_biology/local_tools/databases/nr_uniprot_trembl.dmnd" # TODO: data download + db: "nr_uniprot_trembl.dmnd" # file name in "dbs" folder # CRISPR # https://github.com/dnasko/CASC @@ -202,25 +202,19 @@ cdhit: # https://github.com/BioInfoTools/BBMap/ bbmap: threads: 10 - # References to be used (w/ md5sums) - rrna_refs: [ - # c0cd2aa2e84e3e3977859c34feb63cd5 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5.8s-database-id98.fasta - # 703e4c270ab0a578deb4800c33b36367 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5s-database-id98.fasta - # 8b4e6c6f17f6f35444a60fdc915e052c /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-16s-id95.fasta - # ca4edcdddb98d7868f93e2308e297704 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-23s-id98.fasta - # db6e72022cf650c4b33bd888b92a0391 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-16s-id90.fasta - # f347d2f8f8ffbfa28c785e3a9fe3db79 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-23s-id98.fasta - # 878a413765d09c3ec75409fb1d1573f1 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-18s-id95.fasta - # cbb973e63f52981bd591de0404df5839 /mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-28s-id98.fast - "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5.8s-database-id98.fasta", - "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/rfam-5s-database-id98.fasta", - "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-16s-id95.fasta", - "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-arc-23s-id98.fasta", - "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-16s-id90.fasta", - "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-bac-23s-id98.fasta", - "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-18s-id95.fasta", - "/mnt/irisgpfs/projects/ecosystem_biology/local_tools/IMP3/databases/sortmerna/silva-euk-28s-id98.fasta" + rrna_refs: [ # file names in "dbs" folder + "sortmerna/rfam-5.8s-database-id98.fasta", + "sortmerna/rfam-5s-database-id98.fasta", + "sortmerna/silva-arc-16s-id95.fasta", + "sortmerna/silva-arc-23s-id98.fasta", + "sortmerna/silva-bac-16s-id90.fasta", + "sortmerna/silva-bac-23s-id98.fasta", + "sortmerna/silva-euk-18s-id95.fasta", + "sortmerna/silva-euk-28s-id98.fasta" ] + host_refs: # keep empty if no host specific reads should be removed + # key: url of GZ archive + GCF_000001405.38_GRCh38.p12: "ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.38_GRCh38.p12/GCF_000001405.38_GRCh38.p12_genomic.fna.gz" # Assembly quality # https://github.com/ablab/quast @@ -246,8 +240,8 @@ quast: # https://github.com/DerrickWood/kraken2 kraken2: threads: 10 - db: - maxikraken: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/" + db: # dir. name in "dbs" folder + maxikraken: "maxikraken2_1903_140GB" class: sr: "--gzip-compressed --paired" lr: "" @@ -258,13 +252,14 @@ kraken2: # https://github.com/bioinformatics-centre/kaiju kaiju: threads: 10 - db: # key = basename of *.fmi - kaiju_db_nr_euk: "/mnt/isilon/projects/ecosystem_biology/databases/kaiju/kaiju_db_nr_euk_2020-05-25" + db: # dir. name in "dbs" folder + # key = basename of *.fmi + kaiju_db_nr_euk: "kaiju_db_nr_euk_2020-05-25" ranks: ["phylum", "class", "order", "family", "genus", "species"] -# # XXX -# GTDBTK: -# DATA: "/home/users/sbusi/apps/db/gtdbtk/release89" +# XXX +GTDBTK: # dir. name in "dbs" folder + DATA: "gtdbtk_release89" ############################## # MISC diff --git a/config/GDB/slurm.yaml b/config/GDB/slurm.yaml index 168089e77b1948297f6e806e7e1d7fada49d7fda..b0c31d96b280d3aa319c060f9b234d34bb1e09fb 100644 --- a/config/GDB/slurm.yaml +++ b/config/GDB/slurm.yaml @@ -35,6 +35,30 @@ rm_rrna_bbmap: n: 1 explicit: "" +rm_host_bbmap_sr_metat: + time: "00-4:00:00" + partition: "bigmem" + qos: "qos-bigmem" + nodes: 1 + n: 1 + explicit: "" + +rm_host_bbmap_sr_metag: + time: "00-4:00:00" + partition: "bigmem" + qos: "qos-bigmem" + nodes: 1 + n: 1 + explicit: "" + +rm_host_bbmap_lr_metag: + time: "00-4:00:00" + partition: "bigmem" + qos: "qos-bigmem" + nodes: 1 + n: 1 + explicit: "" + # Assembly assembly_lr_flye: time: "00-8:00:00"