minor changes

f286f75b · Susheel Busi · b143371e · b143371e · b143371e · b143371e
Commit f286f75b authored 4 years ago by Susheel Busi
--- a/MODULAR_SNAKEFILE/CONFIG.yaml
+++ b/MODULAR_SNAKEFILE/CONFIG.yaml
-steps: "assembly_annotation mmseq metaT mapping binning taxonomy"
-data_dir: "data"
-results_dir: "results"
-db_dir: "dbs"
-runs:
-    first: "20181106_1450_noselection_sizeselection"
-    second: "20181107_0906_same"
-    third: "20181108_0827_test"
-barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"]
-assemblers: ["flye"]
-p7zip:
-    bin: "/home/users/claczny/apps/software/p7zip_16.02/bin/7za"
-    threads: 4
-ont_fast5_api:
-    single_to_multi_fast5:
-        bin: "single_to_multi_fast5"
-        batch: 8000
-        threads: 8
-flowcell: "FLO-MIN106"
-kit: "SQK-LSK108"
-#barcodes: ["barcode06", "barcode07", "barcode08", "barcode09", "barcode10"]
-barcodes: ["barcode07"]
-guppy_cpu:
-    path: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin"
-    bin: "/scratch/users/claczny/ont/apps/software/ont-guppy-cpu-3.1.5_linux64/bin/guppy_basecaller"
-    version: "cpu-3.1.5"
-    config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
-    cpu_threads: 28
-guppy_gpu:
-    path: "/home/users/sbusi/apps/ont-guppy/bin"
-    bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_basecaller"
-    version: "3.4.5+fb1fbfb"
-    config: "dna_r9.4.1_450bps_modbases_dam-dcm-cpg_hac.cfg"
-    hac_config: "dna_r9.4.1_450bps_hac.cfg"
-    records_per_fastq: 8000
-    chunk_size: 1000
-    chunks_per_runner: 1000
-    num_callers: 4
-    runners_per_device: 2
-    gpu_device: "cuda:0"
-    cpu_threads: 28
-guppy_barcoder:
-    path: "/home/users/sbusi/apps/ont-guppy/bin"
-    bin: "set +u; source ~/.bashrc; set -u; ml compiler/LLVM system/CUDA && /home/users/sbusi/apps/ont-guppy/bin/guppy_barcoder"
-    version: "3.4.5+fb1fbfb"
-    records_per_fastq: 8000
-    threads: 8
-nanostats:
-#short_reads_prefix: "/scratch/users/claczny/ont/fecal_pilot/data/raw/short_reads"
-short_reads_prefix: "/mnt/isilon/projects/lcsb_sequencing/transfer/bioecosystem/Rashi/2019/Apr/fastq"
-metaT_prefix: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/metaT/2018_GDB"
-#samples: ["Kapa1_MG_S18", "Kapa2_MG_S19", "NEB1_MG_S16", "NEB2_MG_S17", "NEBmod1_MG_Rashi_S22", "NEBmod2_MG_Rashi_S23"]
-fastp:
-    min_length: 40
-minimap2:
-    threads: 16
-igc:
-    uri: "parrot.genomics.cn/gigadb/pub/10.5524/100001_101000/100064/1.GeneCatalogs/IGC.fa.gz"
-hg38:
-    uri: "ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.38_GRCh38.p12/GCF_000001405.38_GRCh38.p12_genomic.fna.gz"
-genomecov:
-    bin: "bedtools genomecov"
-compute_avg_coverage:
-    bin: "scripts/coverage.awk"
-bwa:
-    threads: 24
-    long_reads_index:
-        opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
-samtools:
-    sort:
-        threads: 4
-        chunk_size: "4G"
-    view:
-        threads: 4
-flye:
-    bin: "flye"
-    threads: 27
-    genome_size: "1g"
-operams:
-    bin: "set +u; source ~/.bashrc; set -u; ml lang/Perl lang/R && perl /scratch/users/claczny/ont/apps/software/OPERA-MS/OPERA-MS.pl"
-    threads: 28
-megahit:
-    threads: 28
-nonpareil:
-    memory: 4096
-    threads: 14
-medaka:
-    threads: 28
-racon:
-    threads: 28 
-rebaler:
-    threads: 28
-diamond:
-    threads: 28
-    db: "/mnt/isilon/projects/ecosystem_biology/NOMIS/DIAMOND/new_nr.dmnd"
-metaspades:
-    threads: 28
-mmseq2:
-    threads: 24
-# Define sample names
-# samples: ["flye", "megahit", "metaspades"]
-# samples: ["flye", "megahit"]
-samples: ["metaspades_hybrid"]
-#binning_samples: ["flye", "megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "bwa_merged_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
-binning_samples: ["megahit", "bwa_sr_metaspades_hybrid", "bwa_lr_metaspades_hybrid", "mmi_sr_metaspades_hybrid", "mmi_lr_metaspades_hybrid", "mmi_merged_metaspades_hybrid"]
-
-# Hybrid assembler
-hybrid_assembler: "metaspades_hybrid"
-
-# Directory where fastq files are
-#data_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/Binning"
-
-# Directory to save the output to
-#results_dir: "/scratch/users/sbusi/ONT/cedric_ont_basecalling/Binning"
-
-# Number of cpus or threads to use
-threads: 28
-
-# Path to the the 140GB Kraken2 database
-kraken2_database: "/scratch/users/bkunath/Kraken2/maxikraken2_1903_140GB/"
-
-# Path to DAS_Tool
-DAS_Tool:
-        path: "/home/users/sbusi/apps/DAS_Tool-master"
-        bin: "/home/users/sbusi/apps/DAS_Tool-master/src/"
-
-# Path to DAS_Tool database
-dastool_database: "/home/users/sbusi/apps/DAS_Tool-master/db/"
-
-# Mapping options
-bwa:
-    threads: 24
-    long_reads_index:
-        opts: "-aY -A 5 -B 11 -O 2,1 -E 4,3 -k 8 -W 16 -w 40 -r 1 -D 0 -y 20 -L 30,30 -T 2.5"
-samtools:
-    sort:
-        threads: 4
-        chunk_size: "4G"
-    view:
-        threads: 4
-minimap2:
-    threads: 24
-
-# Path to GTDBTK database
-GTDBTK:
-    DATA: "/home/users/sbusi/apps/db/gtdbtk/release89"
-
-# Rscript path
-Rscript: "/home/users/sbusi/apps/miniconda3/envs/dastool/bin/"
--- a/MODULAR_SNAKEFILE/assemble_and_coverage.done
+++ b/MODULAR_SNAKEFILE/assemble_and_coverage.done
--- a/MODULAR_SNAKEFILE/basecall_merge_qc.done
+++ b/MODULAR_SNAKEFILE/basecall_merge_qc.done
--- a/MODULAR_SNAKEFILE/basecall_merge_qc_NO_MOD.done
+++ b/MODULAR_SNAKEFILE/basecall_merge_qc_NO_MOD.done
--- a/MODULAR_SNAKEFILE/coverage_of_references.done
+++ b/MODULAR_SNAKEFILE/coverage_of_references.done
--- a/MODULAR_SNAKEFILE/test_snakefile
+++ b/MODULAR_SNAKEFILE/test_snakefile
-# File for running ONT analyses
-
-# default configuration file
-configfile:"config/CONFIG.yaml"
-
-# default executable for snakmake
-shell.executable("bash")
-
-# input settings
-# RUNS=os.environment.get("RUNS", config['runs']['first']).split()
-# STEPS=sorted(os.environment.get("STEPS", config['steps']).split())
-RUNS=config['runs']['first']
-STEPS=config['steps']
-
-# include rules for the workflows based on "steps" in the CONFIG.yaml file
-# ONT analyses workflow
-TARGETS = []
-
-if 'assembly_annotation' in STEPS:
-        include: "workflows/assembly_annotation.smk"
-        TARGETS += ["assemble_and_coverage.done",
-                "annotate.done",
-                "basecall_merge_qc.done",
-                "coverage_of_references.done",
-                "prodigal_gene_call.done",
-                "diamond_proteins.done"]
-
-if 'mmseq' in STEPS:
-        include: "workflows/mmseq.smk"
-        TARGETS += [expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/{assembler}_db"), assembler=["flye", "megahit", "metaspades_hybrid"]),
-                expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/flye_megahit_rbh")),
-                expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/flye_metaspades_hybrid_rbh")),
-                expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/megahit_metaspades_hybrid_rbh")),
-                expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/flye_megahit.m8")),
-                expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/flye_metaspades_hybrid.m8")),
-                expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/megahit_metaspades_hybrid.m8")),
-                expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/plot_files_ready.done")),
-                expand(os.path.join(RESULTS_DIR, "annotation/mmseq2/upset_plots.done"))]
-
-if 'metaT' in STEPS:
-        include: "workflows/metat.smk"
-        TARGETS += [expand(os.path.join(RESULTS_DIR, "preprocessing/metaT/{metaT_sample}.fastp.{report_type}"), metaT_sample="GDB_2018_metaT", report_type=["html", "json"]),
-                expand(os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", assembler="megahit"),
-                expand(os.path.join(RESULTS_DIR, "mapping/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", barcode=BARCODES, assembler="metaspades_hybrid"),
-                expand(os.path.join(RESULTS_DIR, "mapping/metaT/lr/{metaT_sample}_reads-x-{barcode}-{assembler}_contigs.bam"), metaT_sample="GDB_2018_metaT", barcode=BARCODES, assembler=ASSEMBLERS),
-                expand(os.path.join(RESULTS_DIR, "genomecov/metaT/sr/{metaT_sample}_reads-x-{sr_sample}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", assembler="megahit"),
-                expand(os.path.join(RESULTS_DIR, "genomecov/metaT/sr/{metaT_sample}_reads-x-lr_{barcode}_sr_{sr_sample}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", sr_sample="NEB2_MG_S17", barcode=BARCODES, assembler="metaspades_hybrid"),
-                expand(os.path.join(RESULTS_DIR, "genomecov/metaT/lr/{metaT_sample}_reads-x-{barcode}-{assembler}_contigs.avg_cov.txt"), metaT_sample="GDB_2018_metaT", barcode=BARCODES, assembler=ASSEMBLERS)]
-
-if 'mapping' in STEPS:
-        include: "workflows/mapping.smk"
-        TARGETS += [expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/metaspades.bwt"), hybrid_assembler=HYBRID_ASSEMBLER),
-                expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_{reads}_{hybrid_assembler}/{mapper}_{reads}_{hybrid_assembler}.bam"), mapper=MAPPERS, reads=["sr", "lr"], hybrid_assembler=HYBRID_ASSEMBLER),
-                expand(os.path.join(RESULTS_DIR, "mapping/{hybrid_assembler}/{hybrid_assembler}.mmi"), hybrid_assembler=HYBRID_ASSEMBLER),
-                expand(os.path.join(RESULTS_DIR, "mapping/{mapper}_merged_{hybrid_assembler}/{mapper}_merged_{hybrid_assembler}.bam"), mapper=MAPPERS, hybrid_assembler=HYBRID_ASSEMBLER)]
-
-if 'binning' in STEPS:
-        include: "workflows/binning.smk"
-        TARGETS += [expand(os.path.join(RESULTS_DIR, "assembly/{sample}.fa"), sample=BINNING_SAMPLES),
-                expand(os.path.join(RESULTS_DIR, "Binning/{sample}/dastool_output/{sample}_proteins.faa"), sample=BINNING_SAMPLES)]
-
-if 'taxonomy' in STEPS:
-        include: "workflows/taxonomy.smk"
-        TARGETS += [expand(os.path.join(RESULTS_DIR, "Binning/checkm_output/{sample}_output.txt"), sample=BINNING_SAMPLES),
-                expand(os.path.join(RESULTS_DIR, "Binning/gtdbtk_output/{sample}/gtdbtk.bac120.summary.tsv"), sample=BINNING_SAMPLES)]
-
-else:
-    raise Exception('You are not serious. No input data')
-
-
-rule all:
-    input:
-        TARGETS