Newer
Older
include:
"config"
rule ALL:
input:
expand(
"{path}/{filter}.{ext}", path=DBPATH + "/human",
filter=config["human_filtering"]["filter"],
ext=['fa', 'fa.amb', 'fa.ann', 'fa.bwt', 'fa.pac', 'fa.sa']
),
expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna"),
expand("{path}/{db}", path=DBPATH, db=config["prokka"]["databases"]),
"%s/adapters/adapters.done" % DBPATH,
expand(
"{path}/idx/{files}.{ext}",
files=config["sortmerna"]["files"],
path=DBPATH + "/sortmerna",
ext=['bursttrie_0.dat', 'kmer_0.dat', 'pos_0.dat', 'stats']),
"%s/ec2pathway.txt" % DBPATH,
"%s/pathway2hierarchy.txt" % DBPATH
rule _DOWNLOAD_HUMAN_DB:
output:
expand("{path}/{filter}.{ext}", path=DBPATH + "/human", filter=config["human_filtering"]["filter"], ext=['fa'])
params:
filter = config["human_filtering"]["filter"], outdir = DBPATH + "/human"
shell:
"""
TMPD=$(mktemp -d -t --tmpdir={TMPDIR} "XXXXXX")
wget {config[human_filtering][url]} --no-check-certificate -O $TMPD/{params.filter}.fa.gz
gunzip $TMPD/{params.filter}.fa.gz
mkdir -p {params.outdir}
mv $TMPD/{params.filter}.fa {params.outdir}
rm -rf $TMPD
"""
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
rule _DOWNLOAD_SORTMERNA_DATABASES:
output:
expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna")
shell:
"""
TMPD=$(mktemp -d -t --tmpdir={tmp} "XXXXXX")
wget {pkg_url} --no-check-certificate -O $TMPD/sortmerna.tgz
tar -xzf $TMPD/sortmerna.tgz --strip-components=1 -C $TMPD
mkdir -p {path}
mv $TMPD/rRNA_databases/*.fasta {path}/.
rm -rf $TMPD
""".format(
pkg_url=config["sortmerna"]["pkg_url"],
path=DBPATH + "/sortmerna",
tmp=TMPDIR
)
rule _DOWNLOAD_PROKKA_DATABASES:
output:
expand("{path}/{db}", path=DBPATH, db=config["prokka"]["databases"])
shell:
"""
### prokka by default will look databases where is located the binary.
### we have to softlink to put the binary somewhere and the databases somewhere else.
if [[ "{DBPATH}" = /* ]]
then
PP={DBPATH};
else
PP=$PWD/{DBPATH};
fi
cd $(dirname $(which prokka))/.. && ln -s $PP db
echo "Softlinking $(dirname $(which prokka))/../db to $PP"
TMPDIR=$(mktemp -d -t "XXXXXX")
wget {config[prokka][pkg_url]} --no-check-certificate -O $TMPDIR/prokka.tgz
tar -xzf $TMPDIR/prokka.tgz --strip-components=1 -C $TMPDIR
mkdir -p {DBPATH}
cp -r $TMPDIR/db/* {DBPATH}/.
rm -rf $TMPDIR
prokka --setupdb
"""
rule INDEX_SORTMERNA_DB:
input:
expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna")
output:
expand(
"{path}/idx/{files}.{ext}",
files=config["sortmerna"]["files"],
path=DBPATH + "/sortmerna",
ext=['bursttrie_0.dat', 'kmer_0.dat', 'pos_0.dat', 'stats'])
run:
fastaindexed = expand(
"{path}/idx/{files}",
files=config["sortmerna"]["files"],
path=DBPATH + "/sortmerna")
ref = ':'.join('%s,%s' % (a, b) for a, b in zip(input, fastaindexed))
shell("mkdir -p {DBPATH}/sortmerna")
shell("indexdb_rna --ref {ref}")
rule INDEX_FASTA_FILE:
input:
"{fasta}"
output:
"{fasta}.amb",
"{fasta}.bwt",
"{fasta}.pac",
"{fasta}.sa",
"{fasta}.ann"
shell:
"""
"""
rule _DOWNLOAD_TRIMMOMATIC_ADAPTERS:
output:
"{DBPATH}/adapters/adapters.done"
shell:
"""
wget --no-check-certificate {config[trimmomatic][pkg_url]} -O Trimmomatic-Src-0.32.zip
unzip Trimmomatic-Src-0.32.zip
cp -r trimmomatic-0.32/adapters {DBPATH}
rm Trimmomatic-Src-0.32.zip && rm -rf trimmomatic-0.32
touch {output}
"""
rule _DOWNLOAD_KEGG_INFORMATION:
output:
"%s/ec2pathway.txt" % DBPATH,
"%s/pathway2hierarchy.txt" % DBPATH
shell:
"""
wget --no-check-certificate {config[kegg][db_ec2pthy]} -O {DBPATH}/ec2pathway.txt.tmp
grep "path:ec" {DBPATH}/ec2pathway.txt.tmp | sed -e 's/path:ec//g' | sed -e 's/ec://g' > {output[0]}
rm {DBPATH}/ec2pathway.txt.tmp
python src/make.pwy.hierarchy.kegg.py > {output[1]}
"""