Skip to content
Snippets Groups Projects
Forked from IMP / IMP
934 commits behind the upstream repository.
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
init.rule 4.57 KiB
include:
    "config"

rule ALL:
    input:
        expand(
            "{path}/{filter}.{ext}", path=DBPATH + "/human",
            filter=config["human_filtering"]["filter"],
            ext=['fa', 'fa.amb', 'fa.ann', 'fa.bwt', 'fa.pac', 'fa.sa']
        ),
        expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna"),
        expand("{path}/{db}", path=DBPATH, db=config["prokka"]["databases"]),
        "%s/adapters/adapters.done" % DBPATH,
        expand(
            "{path}/idx/{files}.{ext}",
            files=config["sortmerna"]["files"],
            path=DBPATH + "/sortmerna",
            ext=['bursttrie_0.dat', 'kmer_0.dat', 'pos_0.dat', 'stats']),
        "%s/ec2pathway.txt" % DBPATH,
        "%s/pathway2hierarchy.txt" % DBPATH

rule _DOWNLOAD_HUMAN_DB:
    output:
        expand("{path}/{filter}.{ext}", path=DBPATH + "/human", filter=config["human_filtering"]["filter"], ext=['fa'])
    params:
        filter = config["human_filtering"]["filter"], outdir = DBPATH + "/human"
    shell:
        """
        TMPD=$(mktemp -d -t --tmpdir={TMPDIR} "XXXXXX")
        wget {config[human_filtering][url]} --no-check-certificate -O $TMPD/{params.filter}.fa.gz
        gunzip $TMPD/{params.filter}.fa.gz
        mkdir -p {params.outdir}
        mv $TMPD/{params.filter}.fa {params.outdir}
        rm -rf $TMPD
        """


rule _DOWNLOAD_SORTMERNA_DATABASES:
    output:
        expand("{path}/{files}.fasta", files=config["sortmerna"]["files"], path=DBPATH + "/sortmerna")
    shell:
        """
        TMPD=$(mktemp -d -t --tmpdir={tmp} "XXXXXX")
        wget {pkg_url} --no-check-certificate -O $TMPD/sortmerna.tgz
        tar -xzf  $TMPD/sortmerna.tgz --strip-components=1 -C $TMPD
        mkdir -p {path}
        mv $TMPD/rRNA_databases/*.fasta {path}/.
        rm -rf $TMPD
        """.format(
            pkg_url=config["sortmerna"]["pkg_url"],
            path=DBPATH + "/sortmerna",
            tmp=TMPDIR
        )

rule _DOWNLOAD_PROKKA_DATABASES:
    output:
        expand("{path}/{db}", path=DBPATH, db=config["prokka"]["databases"])
    shell:
        """
        ### prokka by default will look databases where is located the binary.
        ### we have to softlink to put the binary somewhere and the databases somewhere else.
        if [[ "{DBPATH}" = /* ]]
        then
            PP={DBPATH};
        else
            PP=$PWD/{DBPATH};
        fi
        cd $(dirname $(which prokka))/.. && ln -s $PP db
        echo "Softlinking $(dirname $(which prokka))/../db to $PP"
        TMPDIR=$(mktemp -d -t "XXXXXX")