From 88705fab86660ef797d8261c80a7ad77d182dc81 Mon Sep 17 00:00:00 2001 From: Yohan Jarosz <yohanjarosz@yahoo.fr> Date: Tue, 21 Jul 2015 15:46:44 +0200 Subject: [PATCH] rm download db from main rule --- init.rule | 22 ++++++++- rules/Util.rules | 123 ----------------------------------------------- 2 files changed, 21 insertions(+), 124 deletions(-) diff --git a/init.rule b/init.rule index 1aab328..86eb8f5 100644 --- a/init.rule +++ b/init.rule @@ -13,7 +13,9 @@ rule ALL: "{path}/idx/{files}.{ext}", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna", - ext=['bursttrie_0.dat', 'kmer_0.dat', 'pos_0.dat', 'stats']) + ext=['bursttrie_0.dat', 'kmer_0.dat', 'pos_0.dat', 'stats']), + "%s/ec2pathway.txt" % DBPATH, + "%s/pathway2hierarchy.txt" % DBPATH rule _DOWNLOAD_HUMAN_DB: output: @@ -118,3 +120,21 @@ rule _DOWNLOAD_TRIMMOMATIC_ADAPTERS: rm Trimmomatic-Src-0.32.zip && rm -rf trimmomatic-0.32 touch {output} """ + + +rule _DOWNLOAD_KEGG_INFORMATION: + log: + U_LOG + benchmark: + "%s/benchmarks/DOWNLOAD_KEGG_INFORMATION.json" % U_OUT + output: + "%s/ec2pathway.txt" % DBPATH, + "%s/pathway2hierarchy.txt" % DBPATH + shell: + """ + wget --no-check-certificate {config[kegg][db_ec2pthy]} -O {DBPATH}/ec2pathway.txt.tmp + grep "path:ec" {DBPATH}/ec2pathway.txt.tmp | sed -e 's/path:ec//g' | sed -e 's/ec://g' > {output[0]} + rm {DBPATH}/ec2pathway.txt.tmp + + python src/make.pwy.hierarchy.kegg.py > {output[1]} + """ diff --git a/rules/Util.rules b/rules/Util.rules index 82ffbbd..df1da6f 100644 --- a/rules/Util.rules +++ b/rules/Util.rules @@ -1,60 +1,6 @@ # output directory and log U_OUT, U_LOG = prepare_environment('Util') -rule _DOWNLOAD_HUMAN_DB: - output: - expand("{path}/{filter}.{ext}", path=DBPATH + "/human", filter=config["human_filtering"]["filter"], ext=['fa']) - params: - filter = config["human_filtering"]["filter"], outdir = DBPATH + "/human" - shell: - """ - TMPD=$(mktemp -d -t --tmpdir={TMPDIR} "XXXXXX") - wget {config[human_filtering][url]} -O $TMPD/{params.filter}.fa.gz - gunzip $TMPD/{params.filter}.fa.gz - mkdir -p {params.outdir} - mv $TMPD/{params.filter}.fa {params.outdir} - rm -rf $TMPD - """ - - -rule _DOWNLOAD_SORTMERNA_DATABASES: - output: - expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna") - shell: - """ - TMPD=$(mktemp -d -t --tmpdir={tmp} "XXXXXX") - wget {pkg_url} -O $TMPD/sortmerna.tgz - tar -xzf $TMPD/sortmerna.tgz --strip-components=1 -C $TMPD - mkdir -p {path} - mv $TMPD/rRNA_databases/*.fasta {path}/. - rm -rf $TMPD - """.format(pkg_url=config["sortmerna"]["pkg_url"], path=DBPATH + "/sortmerna", tmp=TMPDIR) - -rule _DOWNLOAD_PROKKA_DATABASES: - output: - expand("{path}/{db}", path=DBPATH, db=config["prokka"]["databases"]) - shell: - """ - ### prokka by default will look databases where is located the binary. - ### we have to softlink to put the binary somewhere and the databases somewhere else. - if [[ "{DBPATH}" = /* ]] - then - PP={DBPATH}; - else - PP=$PWD/{DBPATH}; - fi - cd $(dirname $(which prokka))/.. && ln -s $PP db - echo "Softlinking $(dirname $(which prokka))/../db to $PP" - TMPDIR=$(mktemp -d -t "XXXXXX") - wget {config[prokka][pkg_url]} --no-check-certificate -O $TMPDIR/prokka.tgz - tar -xzf $TMPDIR/prokka.tgz --strip-components=1 -C $TMPDIR - mkdir -p {DBPATH} - cp -r $TMPDIR/db/* {DBPATH}/. - rm -rf $TMPDIR - prokka --setupdb - """ - - rule _DOWNLOAD_LIBRARY_FILES: output: "lib/d3.min.js", "lib/d3pie.min.js", "lib/jquery-2.1.1.min.js" @@ -65,29 +11,6 @@ rule _DOWNLOAD_LIBRARY_FILES: wget http://code.jquery.com/jquery-2.1.1.min.js -O lib/jquery-2.1.1.min.js """ -rule INDEX_SORTMERNA_DB: - log: - U_LOG - benchmark: - "%s/benchmarks/INDEX_SORTMERNA_DB.json" % U_OUT - input: - expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna") - output: - expand( - "{path}/idx/{files}.{ext}", - files=config["sortmerna"]["files"], - path=DBPATH + "/sortmerna", - ext=['bursttrie_0.dat', 'kmer_0.dat', 'pos_0.dat', 'stats']) - run: - fastaindexed = expand( - "{path}/idx/{files}", - files=config["sortmerna"]["files"], - path=DBPATH + "/sortmerna") - ref = ':'.join('%s,%s' % (a, b) for a, b in zip(input, fastaindexed)) - shell("mkdir -p {DBPATH}/sortmerna") - shell("indexdb_rna --ref {ref} > {log} 2>&1") - - rule DIAGRAMMS: message: "Creating diagramms." @@ -198,49 +121,3 @@ rule CHECK_TOOL_VERSION: echo "# KronaTools" >> {output} """ -# -# rule _DOWNLOAD_KEGG_INFORMATION: -# log: -# U_LOG -# benchmark: -# "%s/benchmarks/DOWNLOAD_KEGG_INFORMATION.json" % U_OUT -# output: -# "%s/ec2pwy.txt" % U_OUT, -# "%s/pwy2hierarchy.txt" % U_OUT -# shell: -# """ -# echo "[x] DOWNLOAD_KEGG_INFORMATION `date +"%Y/%m/%d %H:%M:%S"`" >> {log} -# echo "Preparing EC to KEGG ID mappings" -# python {SRCDIR}/make.ec.to.pwy.kegg.py -o {output[0]} -# echo "Preparing KEGG ID to pathway mappings" -# python {SRCDIR}/make.pwy.hierarchy.kegg.py -o {output[1]} -# """ - -rule _DOWNLOAD_TRIMMOMATIC_ADAPTERS: - output: - "{DBPATH}/adapters/adapters.done" - shell: - """ - wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-Src-0.32.zip - unzip Trimmomatic-Src-0.32.zip - cp -r trimmomatic-0.32/adapters {DBPATH} - rm Trimmomatic-Src-0.32.zip && rm -rf trimmomatic-0.32 - touch {output} - """ - -rule _DOWNLOAD_KEGG_INFORMATION: - log: - U_LOG - benchmark: - "%s/benchmarks/DOWNLOAD_KEGG_INFORMATION.json" % U_OUT - output: - "%s/ec2pathway.txt" % DBPATH, - "%s/pathway2hierarchy.txt" % DBPATH - shell: - """ - wget --no-check-certificate {config[kegg][db_ec2pthy]} -O {DBPATH}/ec2pathway.txt.tmp - grep "path:ec" {DBPATH}/ec2pathway.txt.tmp | sed -e 's/path:ec//g' | sed -e 's/ec://g' > {output[0]} - rm {DBPATH}/ec2pathway.txt.tmp - - python src/make.pwy.hierarchy.kegg.py > {output[1]} - """ -- GitLab