Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • IMP/IMP
  • IMP-dev/IMP
2 results
Show changes
Commits on Source (831)
.*
!.gitignore
!.gitlab-ci.yml
*~
conf/userconfig.imp.json
output/*
db
__pycache__
stages:
- dag
- init
- preprocessing
- assembly
- analysis
- binning
- manual
databases:
stage: manual
when: manual
script:
- pip3.4 install --editable . --user
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d $CI_PROJECT_DIR/$CI_BUILD_ID init
before_script:
- pip3.4 install --editable . --user
# INIT
i-cl:
stage: init
script:
- pytest
i-with-another-filter:
stage: init
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db init --index $CI_PROJECT_DIR/test/small.fa
# DAG
# see if the snakemake graph is resolving to the end
dag-mgmt-default:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mgmt-maxbin:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -b maxbin -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mgmt-idba:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -a idba -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mgmt-nf:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mgmt-nf2:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n" --no-filtering
dag-mg-default:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mg-nf:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mg-nf2:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n" --no-filtering
dag-mt-default:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db run -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mt-nf:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mt-nf2:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n" --no-filtering
dag-mgmt-no-binning:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b no run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mt-no-binning:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b no run -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mg-no-binning:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b no run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
# PREPROCESSING
p-mgmt-default:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db preprocessing -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
p-mgmt-nf:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db preprocessing -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
p-mg-default:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db preprocessing -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics --single-step
p-mg-nf:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db preprocessing -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics --single-step
p-mt-default:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db preprocessing -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics --single-step
p-mt-nf:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db preprocessing -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics --single-step
# Assembly
a-mgmt-idba:
stage: manual
when: manual
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a idba assembly -m /mnt/data/input/assembly-megahit/mg.r1.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.r2.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.se.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r1.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r2.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
a-mg-idba:
stage: manual
when: manual
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a idba assembly -m /mnt/data/input/assembly-megahit/mg.r1.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.r2.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
a-mt-idba:
stage: manual
when: manual
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a idba assembly -t /mnt/data/input/assembly-megahit/mt.r1.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r2.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
a-mgmt-megahit:
stage: assembly
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a megahit assembly -m /mnt/data/input/assembly-megahit/mg.r1.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.r2.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.se.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r1.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r2.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
a-mg-megahit:
stage: assembly
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a megahit assembly -m /mnt/data/input/assembly-megahit/mg.r1.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.r2.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
a-mt-megahit:
stage: assembly
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a megahit assembly -t /mnt/data/input/assembly-megahit/mt.r1.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r2.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
an-mgmt:
stage: analysis
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/analysis-mgmt/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -e IMP_SUDO=sudo -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db analysis --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
an-mg:
stage: analysis
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/analysis-mg/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -e IMP_SUDO=sudo -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db analysis --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
an-mt:
stage: analysis
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/analysis-mt/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -e IMP_SUDO=sudo -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db analysis --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
# binny-mgmt:
# stage: binning
# script:
# - mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
# - cp -r /mnt/data/input/binny-mgmt/* $CI_PROJECT_DIR/$CI_BUILD_ID
# - impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db binning --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
# binny-mg:
# stage: binning
# script:
# - mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
# - cp -r /mnt/data/input/binny-mg/* $CI_PROJECT_DIR/$CI_BUILD_ID
# - impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db binning --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
maxbin-mgmt:
stage: manual
when: manual
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/maxbin-mgmt/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b maxbin binning --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
maxbin-mg:
stage: manual
when: manual
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/maxbin-mg/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b maxbin binning --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
# v 1.4.1
* fix command line bug (check config file exists)
* fix command line bug (loading container from local tarball)
* handling cases where no SE reads after preprocessing
* fix MT "binning" step
* command line handle environment variable
* remove deprecated IMP script (use impy instead)
# v 1.4.0
* Update command line utility: impy
* Integrate more tools into the pipeline (e.g. MaxBin)
* Part of the pipeline can be launched without the others
* Testing pipeline workflow and tools
* Update container to version 1.4
# v 1.2.0
* Upgrade Snakemake to the latest Version
* Current version of the IMP code is shipped inside the docker container
......
#!/usr/bin/env python3
from lib.docopt import docopt
from lib.path import Path
import subprocess
import os
import getpass
import json
import shlex
from copy import deepcopy
import tempfile
import sys
IMP_VERSION = '1.1.1'
IMP_DEFAULT_TAR_REPOSITORY = 'https://webdav-r3lab.uni.lu/public/R3lab/IMP/dist/imp-%s.tar.gz' % IMP_VERSION
IMP_IMAGE_NAME = 'docker-r3lab.uni.lu/imp/imp'
__doc__ = """Integrated Metaomic Pipeline.
____ __ __ ____
(_ _)( \/ )( _ \\
_)(_ ) ( )___/
(____)(_/\/\_)(__)
Usage:
IMP [-m MG1 -m MG2] [-t MT1 -t MT2] -o OUTPUT [--enter] [--norm] [--current] [-r REPO] [-n CONTAINER] [-v VERSION] [-c CONFIGFILE] [-d DBPATH] [-a ASSEMBLER] [-e ENV] ... [COMMANDS ...]
IMP --init [--current] [-d DBPATH] [-n CONTAINER] [-v VERSION] [-r REPO] [-c CONFIGFILE]
IMP (-h | --help)
IMP --version
Options:
-e ENV Environment variable to pass to the container
--enter Enter the container
--init Initialize IMP databases (Take a while).
--norm Don't delete the container after use. Useful for debugging.
--ask Ask to create directory if it doesn't exist.
--current Use the current version of the IMP codebase (what you have pulled).
-c CONFIG Pass a user defined config file. Default: conf/userconfig.imp.json
-h --help Show this help and exit
-m MG Path to the metagenomics paired files (must be 2 files).
-t MT Path to the metatranscriptomic paired files (2 files).
-d DBPATH Path to the databases [default: db]
-n CONTAINER Name of the container. Useful when you want to run your own built container. [default: {name}]
-v VERSION Name of the container. Useful when you want to run your own built container. [default: {version}]
-o OUTPUT Path to the output directory
-r REPO Repository to install IMP Tarball from (it can be a local file). [default: {repo}]
-a ASSEMBLER Name of the assembler for MGMT. Only idba and megahit are supported.
Typical use:
# first run
./IMP --init
# simple run with default options
./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory
# use a different database path
./IMP --init -d /path/to/databases_directory
./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory -d /path/to/databases_directory
# use the IMP code you have pulled instead of the one shipped inside the container.
./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory --current
""".format(
name=IMP_IMAGE_NAME,
version=IMP_VERSION,
repo=IMP_DEFAULT_TAR_REPOSITORY
)
def check_installation():
"""
Check if dependencies are installed.
"""
# docker
try:
subprocess.check_output(['which', 'docker'])
except subprocess.CalledProcessError:
raise Exception("Docker must be installed. Please see https://docs.docker.com/installation.")
# git
try:
subprocess.check_output(['which', 'git'])
except subprocess.CalledProcessError:
raise Exception("Git must be installed. Please see http://www.git-scm.com.")
# python3
if sys.version_info < (3, 0, 0):
raise Exception("Python 3 or later must be installed. Please see https://www.python.org/downloads.")
def check_imp_installed(name, version, repo):
"""
Check if IMP is installed. Install it if not.
"""
if not is_imp_installed(name, version):
install_imp(repo)
def is_imp_installed(name, version):
"""
Check if IMP is installed
"""
try:
imp_image = subprocess.Popen(['docker', 'images', name], stdout=subprocess.PIPE)
is_installed = subprocess.check_output(['grep', version], stdin=imp_image.stdout)
except subprocess.CalledProcessError:
return False
return True
def install_imp(repo):
"""
Install IMP.
"""
fname = 'imp-tarball.tmp.tgz'
if repo[:4].startswith('http'):
# download
print("[x] Downloading IMP TARBALL at '%s'" % repo)
subprocess.check_output(['wget', '--no-check-certificate', repo, '-O', fname])
else:
# copy
print("[x] Copying IMP TARBALL '%s'" % repo)
subprocess.check_output(['cp', repo, fname])
# load
print("[x] Loading IMP TARBALL into docker")
subprocess.check_output(['docker', 'load', '-i', fname])
print("[x] Removing IMP TARBALL.")
# clean
os.remove(fname)
def map_user(command, directory):
"""
User inside the docker container and outside the container are not the same.
We change it on each run.
"""
# get group id and username of the user
username = getpass.getuser()
return ' /bin/bash -c "{c} ; useradd {u} && chown -R {u} {d} && chmod -R u+Xrw,g+rw,o+r {d}"'.format(
c=command, u=username, d=directory
)
def init(args):
"""
Start the docker container to index files and setup prokka.
Must be run at least once.
"""
CURRENT_PATH = Path(__file__).parent.abspath()
database_path = Path(args['-d']).abspath()
config_file = str(Path(args['-c']).name)
config_directory = Path(args['-c']).parent.abspath()
# prepare docker command
docker_cmd = 'docker run --rm -v {d}:/databases -v {conf}:/conf -e CONFIGFILE=/conf/{c} {n}:{v}'
formatting_args = {
'd': database_path,
'n': args['-n'],
'v': args['-v'],
'c': config_file,
'conf': config_directory
}
# override docker command if the user want to mount a specific version of IMP codebase.
if args['--current']:
formatting_args['p'] = CURRENT_PATH
docker_cmd = 'docker run --rm -v {p}:/code -v {d}:/databases -v {conf}:/conf -e CONFIGFILE=/conf/{c} {n}:{v}'
# format docker command
docker_cmd = docker_cmd.format(**formatting_args)
# IMP command + user mapping (see https://github.com/docker/docker/pull/12648)
cmd = docker_cmd + map_user('snakemake -s /code/rules/init', '/databases')
print("Executing", '"', cmd, '"')
subprocess.call(cmd, shell=True)
def run(args):
CURRENT_PATH = Path(__file__).parent.abspath()
# find minimum common path
mg_data = [Path(p).abspath() for p in args['-m']]
mt_data = [Path(p).abspath() for p in args['-t']]
# check if paths exists
for pth in mg_data + mt_data:
if not pth.exists():
print("'%s' does not exist" % pth)
exit(1)
common_path = Path(os.path.commonprefix(mg_data + mt_data)).dirname()
# update data paths
mg_data = [p.partition(common_path)[-1][1:] for p in mg_data]
mt_data = [p.partition(common_path)[-1][1:] for p in mt_data]
# output directory
output = Path(args['-o']).abspath()
if not output.exists():
output.makedirs()
database_path = Path(args['-d']).abspath()
config_file = str(Path(args['-c']).name)
config_directory = Path(args['-c']).parent.abspath()
# configure IMP mount point to the docker container
mount_points = [
'-v %s:/data' % common_path,
'-v %s:/output' % output,
'-v %s:/databases' % database_path,
'-v %s:/conf' % config_directory
]
# add code mount point if the user want to mount a specific version of IMP codebase.
if args['--current']:
mount_points.append('-v %s:/code' % CURRENT_PATH)
# environment variables: add MG and MT data and config if specified
envs = ['-e {}="{}"'.format(*e.split('=')) for e in args['-e']]
# prepare MG and MT data
mg = ['/data/' + d for d in mg_data]
mt = ['/data/' + d for d in mt_data]
if mg:
envs += ['-e MG="%s"' % ' '.join(mg)]
if mt:
envs += ['-e MT="%s"' % ' '.join(mt)]
if args['-c']:
envs += ['-e CONFIGFILE=/conf/%s' % config_file]
if args['-a']:
envs += ['-e IMP_ASSEMBLER=%s' % args['-a']]
# CL
cmd = ['docker', 'run'] + mount_points + envs
# rm the container by default
if not args['--norm']:
cmd += ['--rm']
# if --enter flag is specified, attach tty and set mode to interactive
if args['--enter']:
cmd += ['-it']
# add container name and commands to pass to snakemake
cmd += ['%s:%s' % (args['-n'], args['-v'])]
# if --enter flag is specified, change the command
if args['--enter']:
cmd += ['/bin/bash']
cmd = ' '.join(cmd)
else:
if not args['COMMANDS']:
args['COMMANDS'] = ['snakemake', 'ALL']
cmd = ' '.join(cmd) + map_user(' '.join(args['COMMANDS']), '/output')
print("Executing", '"', cmd, '"')
subprocess.call(cmd, shell=True)
def validate(args):
mg = args['-m']
if mg and len(mg) != 2:
print('Metagenomic data should be 2 paired files', file=sys.stderr)
return False
mt = args['-t']
if mt and len(mt) != 2:
print('Metatranscriptomic data should be 2 paired files', file=sys.stderr)
return False
if not mg and not mt:
print('You should provide at metagenomic and/or metatranscriptomic data', file=sys.stderr)
return False
return True
if __name__ == '__main__':
check_installation()
args = docopt(__doc__, version=IMP_VERSION, options_first=True)
check_imp_installed(args['-n'], args['-v'], args['-r'])
if args['--init']:
init(args)
exit(0)
if not validate(args):
exit(1)
run(args)
MIT License
Copyright (c) 2016 Luxembourg Centre for Systems Biomedicine
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
include *.rst
# Upload and release changes of impy to [PyPI](https://pypi.python.org/pypi).
* Login to PyPI.
* Makes changes in `setup.py` (version, ...)
* Upload the package.
```
python setup.py sdist upload
```
# IMP
The Integrated Meta-omic Pipeline (IMP) is developed to perform large-scale, reproducible and automated integrative analysis of metagenomic and metatranscriptomic data. IMP also performs single omic (i.e. metagenomic-only and metatrancriptomic-only) analysis as an additional functionality.
IMP use various tools in order to perform an analysis (see `docs/DEPENDENCIES.md`). IMP does not provide an installation script for the tools it uses. It would be impossible to maintain.
Instead we provide the IMP workflow as is via the `snakemake` wrapper. Additionally, we provide a `docker container` with all the tools already installed inside.
Depending on your setup you might choose to use the workflow directly via `snakemake` or use the docker container via the `wrapper script`
* **wrapper script**: Everything is self-contained in a single docker container. Only 3 dependencies are needed in order to run IMP.
> Best on personal computers or when working in a mutually trusted environment.
* **snakemake**: Only the workflow is provided. You need to install correctly every tool on your machine.
> Best for cluster environment or when you don't have administrative rights to run docker.
Each run mode is respectively explained in the next sections.
---
### Wrapper script
In the same directory of this README file you can find `IMP` script.
This wrapper script will parse the command line arguments you provide
and translate everything to the docker container.
#### Dependencies
* [docker](https://docs.docker.com/installation)
* [git](http://www.git-scm.com)
* [python3]( https://www.python.org/downloads)
#### Usage
First you need to run
```bash
./IMP --init
```
to initialize and download databases needed to run IMP. It is only needed once. Then you could
```bash
./IMP -h
```
to get some help.
---
### Snakemake
#### Dependencies
See `docs/DEPENDENCIES.md`.
You could look at the `docker/Dockerfile` to look how to install some tools.
We do not provide an installation script as it my vary a lot depending on the environment you are (cluster usage, module load, ...)
#### Usage
First you need to run
```bash
snakemake -s rules/init
```
to initialize and download databases needed to run IMP. It is only needed once. Then you could
```bash
snakemake -l # Get a list of available steps.
snakemake # Launch the analysis
```
#### Configuration
We use a config file to pass variables to snakemake wrapper script. The default parameters are visible in `src/config.imp.json`. You could override parameter via the file `conf/userconfig.imp.json`.
> Please do not override parameters directly on `src/config.imp.json` as it may be overridden with the next IMP update.
Eventually you could pass a different location for the config file:
```bash
CONFIGFILE=/home/imp/myconfigfile.json snakemake
```
Some parameters that can vary a lot can also be overridden via the command line
and take precedence over the config file:
* OUTPUTDIR
* MG
* MT
* DBPATH
* TMPDIR
* MEMTOTAL
* MEMCORE
You can provide them like this:
```bash
OUTPUTDIR=/home/imp/output MG="/home/imp/input/MG.R1.fq MG=/home/imp/input/MG.R2.fq" TMPDIR=/tmp snakemake
```
To see the full list of parameters and a description, see `docs/PARAMETERS.md`.
###
IMP
###
The Integrated Meta-omic Pipeline (IMP) is developed to perform large-scale, reproducible and automated integrative reference free analysis of metagenomic and metatranscriptomic data. IMP also performs single omic (i.e. metagenomic-only and metatrancriptomic-only) analysis as an additional functionality.
*************************
Documentation and website
*************************
All documentation and resources can be found : `here <http://r3lab.uni.lu/web/imp/doc.html>`_.
All components used to develop IMP workflow care addressed under the `R3lab frozen pages <http://r3lab.uni.lu/frozen/imp>`_.
# include configuration file
include:
"rules/config"
def prepare_environment(stepname):
"""
Prepare the output directories and logs.
stepname: the name of the pipeline step
return: the step master directory, the step log
"""
out = os.path.join(OUTPUTDIR, stepname)
# mkdirs
if not os.path.exists(out):
os.makedirs(out)
elif not os.path.isdir(out):
raise OSError("//[IMP] Output is not a directory: %s" % out)
if not os.path.exists(TMPDIR):
os.makedirs(TMPDIR)
bench = os.path.join(out, 'benchmarks')
if not os.path.exists(bench):
os.makedirs(bench)
return out, os.path.join(out, '%s.log' % stepname)
# INCLUDES PROCESSING RULES
include:
"rules/Util.rules"
include:
"rules/Preprocessing/master.rules"
include:
"rules/Assembly/master.rules"
"rules/ini/config"
# define the data types used and the assembly
if MG and MT:
TYPES = ['mg', 'mt']
ASS = 'mgmt'
elif MG:
TYPES = ['mg']
ASS = 'mg'
elif MT:
TYPES = ['mt']
ASS = 'mt'
workdir:
OUTPUTDIR
# include rules for the workflow based on the input parameters
include:
"rules/Analysis/master.rules"
"rules/data.input.rules"
# INTEGRATIVE MG-MT workflow
if MG and MT:
if 'preprocessing' in IMP_STEPS:
include:
"workflows/integrative/Preprocessing"
if 'assembly' in IMP_STEPS:
include:
"workflows/integrative/Assembly"
if 'analysis' in IMP_STEPS:
include:
"workflows/integrative/Analysis"
if 'binning' in IMP_STEPS:
include:
"workflows/integrative/Binning"
if 'report' in IMP_STEPS:
include:
"workflows/integrative/Report"
# Single omics MG workflow
elif MG:
if 'preprocessing' in IMP_STEPS:
include:
"workflows/single_omics/mg/Preprocessing"
if 'assembly' in IMP_STEPS:
include:
"workflows/single_omics/mg/Assembly"
if 'analysis' in IMP_STEPS:
include:
"workflows/single_omics/mg/Analysis"
if 'binning' in IMP_STEPS:
include:
"workflows/single_omics/mg/Binning"
if 'report' in IMP_STEPS:
include:
"workflows/single_omics/mg/Report"
elif MT:
if 'preprocessing' in IMP_STEPS:
include:
"workflows/single_omics/mt/Preprocessing"
if 'assembly' in IMP_STEPS:
include:
"workflows/single_omics/mt/Assembly"
if 'analysis' in IMP_STEPS:
include:
"workflows/single_omics/mt/Analysis"
if 'binning' in IMP_STEPS:
include:
"workflows/single_omics/mt/Binning"
if 'report' in IMP_STEPS:
include:
"workflows/single_omics/mt/Report"
else:
raise Exception('No input data.')
inputs = []
if 'preprocessing' in IMP_STEPS:
inputs.append('preprocessing.done')
if 'assembly' in IMP_STEPS:
inputs.append('assembly.done')
if 'analysis' in IMP_STEPS:
inputs.append('analysis.done')
if 'binning' in IMP_STEPS:
inputs.append('binning.done')
if 'report' in IMP_STEPS:
inputs.append('report.done')
# master command
rule ALL:
input:
preprocessing_output_files(),
assembly_output_files(),
analysis_output_files()
shell:
"echo 'DONE'"
inputs
output:
touch('workflow.done')
1.4.1
......@@ -2,15 +2,19 @@
> This README assumes that you are in the `docker` directory of the IMP project.
## Increment Version number
## Increment Version number in each Docker files
Edit the docker file `docker/Dockerfile`:
Increment the version number in that file.
> The version number can be anything: e.g '1.1.2' or 'my-feature'
> The <version> number can be anything: e.g '1.1.2' or 'my-feature'
Edit the `IMP` script and put the `IMP_VERSION` variable to the same version.
Edit the `impy.py` script and put the `IMP_VERSION` variable to the same version.
Tag the new version on the Gitlab.
Edit the Docker file and change the IMP clone process to clone the same version.
## Build the docker images locally
......@@ -18,9 +22,16 @@ Edit the `IMP` script and put the `IMP_VERSION` variable to the same version.
### Build dependencies
docker build -t docker-r3lab.uni.lu/imp/imp-deps:1.2 -f Dockerfile-dependencies .
docker build -t docker-r3lab.uni.lu/imp/imp-deps:<version> -f Dockerfile-dependencies .
> 'docker-r3lab.uni.lu/imp/imp-deps:<version>' is the image name that we will have to give to the tools Docker file.
### Build tools
docker build -t docker-r3lab.uni.lu/imp/imp-tools:<version> -f Dockerfile-tools .
> 'docker-r3lab.uni.lu/imp/imp-deps:1.1' is the image name that we will have to give to the main Docker file.
> 'docker-r3lab.uni.lu/imp/imp-tools:<version>' is the image name that we will have to give to the main Docker file.
### Build the main Docker file.
......
# Base container with tools needed for the IMP pipeline
#
# VERSION 1.2.0
# VERSION 1.4.1
FROM docker-r3lab.uni.lu/imp/imp-deps:1.2
FROM docker-r3lab.uni.lu/imp/imp-tools:1.4.1
MAINTAINER yohan.jarosz@uni.lu
USER root
######################################
# Tools installations #
######################################
## fastuniq
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/FastUniq-1.1.tar.gz -nv\
&& tar -xzf FastUniq-1.1.tar.gz \
&& cd FastUniq/source \
&& make \
&& mv fastuniq /usr/bin/. \
&& cd ../.. && rm -rf FastUniq* \
## Trimmomatic
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/Trimmomatic-Src-0.32.zip -nv\
&& unzip Trimmomatic-Src-0.32.zip \
&& cd trimmomatic-0.32 \
&& ant \
&& cp dist/jar/trimmomatic-0.32.jar /home/imp/lib/. \
&& cd .. && rm -rf *rimmomatic*
## idba ud
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/idba-1.1.1.tar.gz -nv\
&& tar -xzf idba-1.1.1.tar.gz \
&& cd idba-1.1.1 \
&& sed -i -e 's/static const uint32_t kMaxShortSequence = 128/static const uint32_t kMaxShortSequence = 2048/' src/sequence/short_sequence.h \
&& ./configure \
&& make \
&& make install \
&& mv bin/idba_ud /usr/bin \
&& mv bin/fq2fa /usr/bin \
&& cd .. && rm -rf idba* \
## cap3
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/cap3.linux.x86_64.tar -nv\
&& tar -xf cap3.linux.x86_64.tar \
&& cp CAP3/cap3 /usr/bin \
&& rm -rf cap3* CAP3 \
## bwa
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/bwa-0.7.9a.tar.bz2 -nv\
&& tar -xjf bwa-0.7.9a.tar.bz2 \
&& cd bwa-0.7.9a \
&& make \
&& mv bwa /usr/bin \
&& cd .. && rm -rf bwa*
## htsjdk - dependency of picard tools and FastQC (below)
RUN cd /home/imp/lib \
&& mkdir fastqc \
&& cd fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/htsjdk-1.138.tgz -O htsjdk-1.138.tgz -nv \
&& tar -xzf htsjdk-1.138.tgz \
&& cd htsjdk-1.138 \
&& ant htsjdk-jar \
&& cd .. && rm htsjdk-1.138.tgz
## Picard tools - dependency of fastqc (below)
RUN cd /home/imp/lib \
&& cd /home/imp/lib/fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/picard.1.138.tgz -O picard-1.138.tgz -nv \
&& tar -xzf picard-1.138.tgz \
&& mv /home/imp/lib/fastqc/htsjdk-1.138 /home/imp/lib/fastqc/picard-1.138/htsjdk \
&& cd picard-1.138/ \
&& ant -lib lib/ant package-commands \
&& cd .. && rm picard-1.138.tgz
## FastQC
RUN cd /home/imp/lib \
&& cd /home/imp/lib/fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/fastqc_v0.11.3.zip -nv \
&& unzip fastqc_v0.11.3.zip \
&& cd FastQC \
&& chmod 775 fastqc
## freebayes
RUN cd /home/imp/tmp \
&& git clone --recursive https://git-r3lab.uni.lu/R3/freebayes.git \
&& cd freebayes \
&& git checkout -b v0.9.16 \
&& make \
&& make install \
&& cd .. && rm -rf freebayes \
## vcftools
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/vcftools_0.1.12b.tar.gz -nv \
&& tar -xzf vcftools_0.1.12b.tar.gz \
&& cd vcftools_0.1.12b \
&& make \
&& make install \
&& cp -r bin/* /usr/bin \
&& cp -r perl/* /etc/perl/. \
&& cd .. && rm -rf vcftools*
## prokka
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/prokka-1.11.tar.gz -nv \
&& tar -xzf prokka-1.11.tar.gz \
&& cd prokka-1.11 \
&& cp bin/prokka* /usr/bin \
&& cp binaries/linux/* /usr/bin \
&& cd .. && rm -rf prokka* \
## parallel
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/parallel-20140422.tar.bz2 -nv \
&& tar -xjf parallel-20140422.tar.bz2 \
&& cd parallel-20140422 \
&& ./configure \
&& make \
&& make install \
&& cd .. && rm -rf parallel*
## sortmerna
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/sortmerna.2.0.tgz -nv \
&& tar -xzf sortmerna.2.0.tgz \
&& cd sortmerna-2.0 \
&& sh build.sh \
&& mv sortmerna indexdb_rna /usr/bin/. \
&& mv scripts/*.sh /home/imp/lib/. \
&& cd .. && rm -rf sortmerna*
## bedtools2
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/bedtools-2.24.0.tar.gz -O bedtools-2.24.0.tar.gz -nv \
&& tar -xzf bedtools-2.24.0.tar.gz \
&& cd bedtools2 \
&& make
## Install KronaPlot
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/KronaTools-2.5.tar -nv \
&& tar -xvf KronaTools-2.5.tar \
&& cd KronaTools-2.5 \
&& perl install.pl
## htslib
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/htslib-1.2.1.tar.bz2 -nv \
&& tar -jxvf htslib-1.2.1.tar.bz2 \
&& cd htslib-1.2.1 \
&& ./configure && make && make install \
&& cd .. && rm -rf htslib-1.2.1
## Platypus
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/Platypus-latest.tgz -nv \
&& tar -xvzf Platypus-latest.tgz \
&& cd Platypus_0.8.1 \
&& bash buildPlatypus.sh
RUN cd /home/imp/lib \
&& rm -rf megahit \
&& git clone https://github.com/voutcn/megahit.git \
&& cd megahit \
&& make \
&& mv megahit* /usr/bin/.
## Vizbin JAR
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/VizBin-dist.jar -O VizBin-dist.jar -nv
RUN cd /home/imp/lib
## Quast/metaQuast
RUN cd /home/imp/lib \
&& git config --global http.postBuffer 2M \
&& git config --global http.maxRequestBuffer 100M \
&& git config --global core.compression 0 \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/quast-3.1.zip -O quast.zip -nv\
&& unzip quast.zip \
&& cd quast-release_3.1 \
&& ln -s $PWD/metaquast.py /usr/bin/metaquast
######################
# set up environment #
######################
# add Snakemake
RUN pip3 install snakemake==3.7.1
## add snakemake completion
RUN echo "`snakemake --bash-completion`" >> ~/.bashrc \
## add LD_LIBRARY_PATH
&& echo "export LD_LIBRARY_PATH=/usr/local/lib" >> ~/.bashrc
#####################
# Ship IMP codebase #
#####################
RUN mkdir /code \
&& cd /code \
&& git clone --branch 1.2.0 --single-branch https://git-r3lab.uni.lu/shaman.narayanasamy/IMP.git
RUN mkdir -p /home/imp/code /home/imp/output /home/imp/data /home/imp/bin /home/imp/databases \
&& cd /home/imp/code \
&& git clone --branch 1.4.1 --single-branch https://git-r3lab.uni.lu/IMP/IMP.git . \
&& cd /usr && ln -s /home/imp/databases db
# R with checkpoint libraries
ADD dependencies.R /home/imp/lib/
RUN mkdir /home/imp/lib/.checkpoint \
&& echo "r <- getOption('repos'); r['CRAN'] <- 'https://cloud.r-project.org/'; options(repos = r);" > /home/imp/.Rprofile \
&& Rscript -e "install.packages('checkpoint', repos='https://cloud.r-project.org/')" \
&& Rscript -e "library(checkpoint);checkpoint('2016-06-20', checkpointLocation='/home/imp/lib', project='/home/imp/lib')" \
&& Rscript -e "source('http://bioconductor.org/biocLite.R');biocLite('genomeIntervals', dependencies=TRUE)" \
&& chmod -R 777 /home/imp/lib/.checkpoint /home/imp/.Rprofile
#### Add gosu
ENV GOSU_VERSION 1.7
RUN set -x \
&& apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/* \
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)" \
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture).asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
&& rm -r "$GNUPGHOME" /usr/local/bin/gosu.asc \
&& chmod +x /usr/local/bin/gosu \
&& gosu nobody true
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
#### Add IMP user
RUN groupadd imp && useradd -g imp -d /home/imp imp \
&& chown imp:imp -R /home/imp/ \
&& chmod -R 0777 /home/imp \
&& echo 'imp:imp' |chpasswd \
&& echo "imp ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers.d/imp
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
######################
# runtime parameters #
######################
ENV PATH /home/imp/lib/trinityrnaseq_r20140717:/home/imp/lib/Platypus_0.8.1:/home/imp/lib/megahit:/home/imp/lib/fastqc/FastQC:$PATH
ENV PATH /home/imp/lib/FragGeneScan1.30:/home/imp/lib/MaxBin-2.2.1:/home/imp/lib/pullseq/src:/home/imp/lib/Platypus_0.8.1:/home/imp/lib/megahit:/home/imp/lib/fastqc/FastQC:$PATH
ENV LD_LIBRARY_PATH /usr/local/lib/:/home/imp/lib/Platypus_0.8.1
VOLUME ["/data", "/output", "/databases", "/code"]
WORKDIR /code
VOLUME ["/home/imp/data", "/home/imp/output", "/home/imp/databases", "/home/imp/code"]
WORKDIR /home/imp/code
CMD ["snakemake", "ALL"]
# Base container with tools needed for the IMP pipeline
#
# VERSION 1.4.1
FROM docker-r3lab.uni.lu/imp/imp:1.3
MAINTAINER yohan.jarosz@uni.lu
######################################
# Tools installations #
######################################
## Reinstall bioperl
RUN (echo y;echo o conf prerequisites_policy follow;echo o conf commit)|cpan -f -i CJFIELDS/BioPerl-1.6.924.tar.gz \
#&& apt-get install -yq bioperl=1.6.923-1 \
&& rm -rf /var/lib/apt/lists/*
## CheckM
RUN cd /home/imp/lib \
&& pip2.7 install pysam dendropy ScreamingBackpack \
&& wget https://github.com/Ecogenomics/CheckM/archive/v1.0.6.tar.gz \
&& tar xf v1.0.6.tar.gz \
&& cd CheckM-1.0.6/ \
&& python2.7 setup.py install \
&& cd .. \
&& rm -r CheckM-1.0.6/ \
&& wget https://github.com/matsen/pplacer/releases/download/v1.1.alpha17/pplacer-Linux-v1.1.alpha17.zip \
&& unzip pplacer-Linux-v1.1.alpha17.zip \
&& cp pplacer-Linux-v1.1.alpha17/guppy /usr/bin \
&& cp pplacer-Linux-v1.1.alpha17/pplacer /usr/bin \
&& rm -r pplacer-Linux-v1.1.alpha17
##CheckM Data
RUN cd /home/imp/lib \
&& wget https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_v1.0.6.tar.gz
RUN cd /home/imp/lib \
&& mkdir checkm_data_v1.0.6 \
&& tar -xf checkm_data_v1.0.6.tar.gz -C checkm_data_v1.0.6 \
#echo used for interactive prompt
&& echo checkm_data_v1.0.6 | checkm data setRoot checkm_data_v1.0.6/
##Amphora2
RUN cd /home/imp/lib \
&& git clone https://github.com/mherold1/AMPHORA2 \
&& wget https://github.com/stamatak/standard-RAxML/archive/v8.2.8.tar.gz -O stamatak-standard-RAxML.tar.gz \
&& tar xf stamatak-standard-RAxML.tar.gz \
&& cd standard-RAxML-8.2.8; make -f Makefile.gcc; make -f Makefile.PTHREADS.gcc; sudo cp raxmlHPC* /usr/bin/. \
&& cd .. \
&& (echo y;echo o conf prerequisites_policy follow;echo o conf commit)|cpan -f -i Parallel::ForkManager
# && wget ftp://emboss.open-bio.org/pub/EMBOSS/old/6.5.0/EMBOSS-6.5.7.tar.gz \
# && tar -xf EMBOSS-6.5.7.tar.gz \
# && cd EMBOSS-6.5.7 \
# && ./configure \
# && make \
# && make install
# FOR NOW: using apt-get install emboss
# && apt-get install emboss
RUN cd /home/imp/lib \
&& sudo apt-get update \
&& echo y| sudo apt-get install emboss
# && wget ftp://emboss.open-bio.org/pub/EMBOSS/old/6.5.0/EMBOSS-6.5.7.tar.gz \
# && tar -xf EMBOSS-6.5.7.tar.gz \
# && cd EMBOSS-6.5.7 \
# && ./configure --without-mysql --without-postgresql --without-axis2c --without-hpdf --without-x\
# && make \
#&& make install \
#&& ldconfig \
# && make install
##pullseq
RUN cd /home/imp/lib \
&& git clone https://github.com/bcthomas/pullseq \
&& cd pullseq \
&& ./bootstrap \
&& ./configure \
&& make \
&& make install
##Phylophlan, removed for now maybe revisit at a later timepoint, MH 16.6.2016
#RUN cd /home/imp/lib \
## && wget https://bitbucket.org/nsegata/phylophlan/get/default.tar.gz \
## && tar -xf default.tar.gz \
# && pip install mercurial \
# && hg clone https://bitbucket.org/nsegata/phylophlan \
## && cd phylophlan \
##for development version: (stick to old one for now)
## && hg pull && hg update dev \
# && wget http://www.microbesonline.org/fasttree/FastTree \
# && chmod +x FastTree \
# && cp FastTree /usr/bin \
##usearch binary copied from gaia for now and put in imp code -> need to solve this
#
# #move usearch binary to webdav? this uses my downloadlink, from 2.5.2016 , not sure how long this works.
## && wget --user-agent=Mozilla --content-disposition -E -c http://drive5.com/cgi-bin/upload3.py?license=2016050205235811143 \
# # && chmod +x usearch8.1.1861_i86linux32 \
# # && cp usearch8.1.1861_i86linux32 /usr/bin/usearch \
## && cd \
# This cleans the old git repo and downloads the new one. Remember to replace with a proper
# tag when released
RUN cd /home/imp/code \
&& rm -rf ./* \
&& rm -rf ./.git* \
&& git clone --branch binning_refactor2 --single-branch https://git-r3lab.uni.lu/IMP-dev/IMP.git .
# R with checkpoint libraries
ADD dependencies.R /home/imp/lib/
RUN mkdir -p /home/imp/lib/.checkpoint \
&& echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > /home/imp/.Rprofile \
&& Rscript -e "install.packages('checkpoint')" \
&& Rscript -e "library(checkpoint);checkpoint('2016-06-20', project='/home/imp/lib', checkpointLocation='/home/imp/lib')" \
&& Rscript -e "source('http://bioconductor.org/biocLite.R');biocLite('genomeIntervals')"
RUN wget https://webdav-r3lab.uni.lu/public/R3lab/IMP/linux64.tbl2asn.gz -O /tmp/tbl2asn.gz \
&& gzip -d /tmp/tbl2asn.gz \
&& chmod +x /tmp/tbl2asn \
&& sudo mv /tmp/tbl2asn /usr/bin
######################
# runtime parameters #
######################
ENV AMPHORA2_home /home/imp/lib/AMPHORA2 ###CHECK IF THIS IS CORRECT?
ENV PATH /home/imp/lib/pullseq/src:/home/imp/lib/trinityrnaseq_r20140717:/home/imp/lib/Platypus_0.8.1:/home/imp/lib/megahit:/home/imp/lib/fastqc/FastQC:$PATH
ENV LD_LIBRARY_PATH /usr/local/lib/:/home/imp/lib/Platypus_0.8.1
VOLUME ["/home/imp/data", "/home/imp/output", "/home/imp/databases", "/home/imp/code"]
WORKDIR /home/imp/code
CMD ["snakemake", "ALL"]
# Base container with tools dependencies for the IMP pipeline
#
# VERSION 1.2
# VERSION 1.4.1
FROM ubuntu:trusty
MAINTAINER yohan.jarosz@uni.lu
......@@ -27,9 +27,6 @@ RUN apt-get update \
RUN apt-get update \
&& apt-get install -yq openjdk-7-jdk \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -yq bioperl=1.6.923-1 \
&& rm -rf /var/lib/apt/lists/*
......@@ -47,28 +44,20 @@ RUN apt-get update \
RUN apt-get update \
&& apt-get install -yq r-base r-base-dev \
&& apt-get install -yq libcurl4-openssl-dev libssl-dev \
&& apt-get install -yq libncurses5 libncurses5-dev sudo libatlas-base-dev python2.7 gfortran python-dev \
&& apt-get install -yq python-matplotlib \
&& update-alternatives --set java /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java \
&& rm -rf /var/lib/apt/lists/*
## Python dependencies and bioservices. Using version 1.3.5 because script doesn't work with latest version
RUN mkdir -p /home/imp/tmp /home/imp/lib \
&& cd /home/imp/tmp \
&& wget https://bootstrap.pypa.io/get-pip.py \
&& python3 get-pip.py \
&& python2.7 get-pip.py \
&& pip3 install snakemake docopt \
# && pip2.7 install numpy \
# && pip2.7 install scipy scikit-learn docopt \
&& pip3 install docopt \
#&& pip2.7 install numpy \
#&& pip2.7 install scipy scikit-learn docopt \
&& pip install -Iv https://pypi.python.org/packages/source/b/bioservices/bioservices-1.3.5.tar.gz \
&& rm get-pip.py
# R with checkpoint libraries
ADD dependencies.R /home/imp/lib/
RUN mkdir /root/.checkpoint \
&& echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile \
&& Rscript -e "install.packages('checkpoint')" \
&& Rscript -e "library(checkpoint);checkpoint('2015-04-27', project='/home/imp/lib')" \
&& Rscript -e "source('http://bioconductor.org/biocLite.R');biocLite('genomeIntervals')"
# Base container with tools needed for the IMP pipeline
#
# VERSION 1.4.1
FROM docker-r3lab.uni.lu/imp/imp-deps:1.4.1
MAINTAINER yohan.jarosz@uni.lu
######################################
# Tools installations #
######################################
## fastuniq
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/FastUniq-1.1.tar.gz -nv\
&& tar -xzf FastUniq-1.1.tar.gz \
&& cd FastUniq/source \
&& make \
&& mv fastuniq /usr/bin/. \
&& cd ../.. && rm -rf FastUniq* \
## Trimmomatic
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/Trimmomatic-Src-0.32.zip -nv\
&& unzip Trimmomatic-Src-0.32.zip \
&& cd trimmomatic-0.32 \
&& ant \
&& cp dist/jar/trimmomatic-0.32.jar /home/imp/lib/. \
&& cd .. && rm -rf *rimmomatic*
## idba ud
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/idba-1.1.1.tar.gz -nv\
&& tar -xzf idba-1.1.1.tar.gz \
&& cd idba-1.1.1 \
&& sed -i -e 's/static const uint32_t kMaxShortSequence = 128/static const uint32_t kMaxShortSequence = 2048/' src/sequence/short_sequence.h \
&& ./configure \
&& make \
&& make install \
&& mv bin/idba_ud /usr/bin \
&& mv bin/fq2fa /usr/bin \
&& cd .. && rm -rf idba* \
## cap3
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/cap3.linux.x86_64.tar -nv\
&& tar -xf cap3.linux.x86_64.tar \
&& cp CAP3/cap3 /usr/bin \
&& rm -rf cap3* CAP3 \
## bwa
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/bwa-0.7.9a.tar.bz2 -nv\
&& tar -xjf bwa-0.7.9a.tar.bz2 \
&& cd bwa-0.7.9a \
&& make \
&& mv bwa /usr/bin \
&& cd .. && rm -rf bwa*
## htsjdk - dependency of picard tools and FastQC (below)
RUN cd /home/imp/lib \
&& mkdir fastqc \
&& cd fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/htsjdk-1.138.tgz -O htsjdk-1.138.tgz -nv \
&& tar -xzf htsjdk-1.138.tgz \
&& cd htsjdk-1.138 \
&& ant htsjdk-jar \
&& cd .. && rm htsjdk-1.138.tgz
## Picard tools - dependency of fastqc (below)
RUN cd /home/imp/lib \
&& cd /home/imp/lib/fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/picard.1.138.tgz -O picard-1.138.tgz -nv \
&& tar -xzf picard-1.138.tgz \
&& mv /home/imp/lib/fastqc/htsjdk-1.138 /home/imp/lib/fastqc/picard-1.138/htsjdk \
&& cd picard-1.138/ \
&& ant -lib lib/ant package-commands \
&& cd .. && rm picard-1.138.tgz
## FastQC
RUN cd /home/imp/lib \
&& cd /home/imp/lib/fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/fastqc_v0.11.3.zip -nv \
&& unzip fastqc_v0.11.3.zip \
&& cd FastQC \
&& chmod 775 fastqc
## freebayes
RUN cd /home/imp/tmp \
&& git clone --recursive https://git-r3lab.uni.lu/R3/freebayes.git \
&& cd freebayes \
&& git checkout -b v0.9.16 \
&& make \
&& make install \
&& cd .. && rm -rf freebayes \
## vcftools
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/vcftools_0.1.12b.tar.gz -nv \
&& tar -xzf vcftools_0.1.12b.tar.gz \
&& cd vcftools_0.1.12b \
&& make \
&& make install \
&& cp -r bin/* /usr/bin \
&& cp -r perl/* /etc/perl/. \
&& cd .. && rm -rf vcftools*
## prokka
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/prokka-1.11.tar.gz -nv \
&& tar -xzf prokka-1.11.tar.gz \
&& cd prokka-1.11 \
&& cp bin/prokka* /usr/bin \
&& cp binaries/linux/* /usr/bin \
&& cd .. && rm -rf prokka* \
## parallel
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/parallel-20140422.tar.bz2 -nv \
&& tar -xjf parallel-20140422.tar.bz2 \
&& cd parallel-20140422 \
&& ./configure \
&& make \
&& make install \
&& cd .. && rm -rf parallel*
## sortmerna
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/sortmerna.2.0.tgz -nv \
&& tar -xzf sortmerna.2.0.tgz \
&& cd sortmerna-2.0 \
&& sh build.sh \
&& mv sortmerna indexdb_rna /usr/bin/. \
&& mv scripts/*.sh /home/imp/lib/. \
&& cd .. && rm -rf sortmerna*
## bedtools2
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/bedtools-2.18.0.tar.gz -O bedtools-2.18.0.tar.gz -nv \
&& tar -xzf bedtools-2.18.0.tar.gz \
&& cd bedtools-2.18.0 \
&& make \
&& cp bin/* /usr/bin/. \
&& cd /home/imp/tmp && rm -rf *
## Install KronaPlot
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/KronaTools-2.5.tar -nv \
&& tar -xvf KronaTools-2.5.tar \
&& cd KronaTools-2.5 \
&& perl install.pl \
&& cd /home/imp/lib && rm -rf KronaTools-2.5.tar
## htslib
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/htslib-1.2.1.tar.bz2 -nv \
&& tar -jxvf htslib-1.2.1.tar.bz2 \
&& cd htslib-1.2.1 \
&& ./configure && make && make install \
&& cd /home/imp/tmp && rm -rf *
## Platypus
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/Platypus-latest.tgz -nv \
&& tar -xvzf Platypus-latest.tgz \
&& cd Platypus_0.8.1 \
&& bash buildPlatypus.sh \
&& cd /home/imp/lib && rm Platypus-latest.tgz
RUN cd /home/imp/lib \
&& rm -rf megahit \
&& git clone --branch v1.0.6 https://github.com/voutcn/megahit.git \
&& cd megahit \
&& make \
&& mv megahit* /usr/bin/.
## Vizbin JAR
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/VizBin-dist.jar -O VizBin-dist.jar -nv
## Quast/metaQuast
RUN cd /home/imp/lib \
&& git config --global http.postBuffer 2M \
&& git config --global http.maxRequestBuffer 100M \
&& git config --global core.compression 0 \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/quast-3.1.zip -O quast.zip -nv\
&& unzip quast.zip \
&& cd quast-release_3.1 \
&& ln -s $PWD/metaquast.py /usr/bin/metaquast
## CheckM
RUN cd /home/imp/lib \
&& pip2.7 install pysam dendropy ScreamingBackpack \
&& wget https://github.com/Ecogenomics/CheckM/archive/v1.0.6.tar.gz \
&& tar xf v1.0.6.tar.gz \
&& cd CheckM-1.0.6/ \
&& python2.7 setup.py install \
&& cd .. \
&& rm -rf CheckM-1.0.6/ \
&& wget https://github.com/matsen/pplacer/releases/download/v1.1.alpha17/pplacer-Linux-v1.1.alpha17.zip \
&& unzip pplacer-Linux-v1.1.alpha17.zip \
&& cp pplacer-Linux-v1.1.alpha17/guppy /usr/bin \
&& cp pplacer-Linux-v1.1.alpha17/pplacer /usr/bin \
&& rm -r pplacer-Linux-v1.1.alpha17
## pullseq
RUN cd /home/imp/lib \
&& git clone https://github.com/bcthomas/pullseq \
&& cd pullseq \
&& ./bootstrap \
&& ./configure \
&& make \
&& make install \
&& cd .. \
&& rm -rf pullseq
## MaxBin & deps
RUN cd /home/imp/lib \
&& wget -c --no-check-certificate https://webdav-r3lab-server.uni.lu/public/R3lab/IMP/MaxBin-2.2.1.tar.gz \
&& tar -xvzf MaxBin-2.2.1.tar.gz \
&& cd MaxBin-2.2.1/src \
&& make \
&& rm /home/imp/lib/MaxBin-2.2.1.tar.gz \
&& cd /home/imp/lib \
&& wget -c --no-check-certificate https://webdav-r3lab-server.uni.lu/public/R3lab/IMP/bowtie2-2.2.9-source.zip \
&& unzip bowtie2-2.2.9-source.zip \
&& cd bowtie2-2.2.9 \
&& make \
&& make install \
&& cd .. \
&& rm -rf bowtie2* \
&& cd /home/imp/lib \
&& wget -c --no-check-certificate https://webdav-r3lab-server.uni.lu/public/R3lab/IMP/FragGeneScan1.30.tar.gz \
&& tar -xvzf FragGeneScan1.30.tar.gz \
&& cd FragGeneScan1.30 \
&& make clean \
&& make fgs \
&& chmod 777 -R /home/imp/lib/FragGeneScan1.30 \
&& rm /home/imp/lib/FragGeneScan1.30.tar.gz
RUN wget https://webdav-r3lab.uni.lu/public/R3lab/IMP/linux64.tbl2asn.gz -O /tmp/tbl2asn.gz \
&& gzip -f -d /tmp/tbl2asn.gz \
&& chmod +x /tmp/tbl2asn \
&& mv /tmp/tbl2asn /usr/bin
#!/bin/bash
set -e
IMP_VERSION=$1
docker build -t docker-r3lab.uni.lu/imp/imp-deps:$IMP_VERSION -f Dockerfile-dependencies .
docker build -t docker-r3lab.uni.lu/imp/imp-tools:$IMP_VERSION -f Dockerfile-tools .
docker build -t docker-r3lab.uni.lu/imp/imp:$IMP_VERSION .
docker save docker-r3lab.uni.lu/imp/imp:$IMP_VERSION > imp-$IMP_VERSION.tar
gzip imp-$IMP_VERSION.tar
exit 0
......@@ -9,3 +9,13 @@ require(stringr)
require(xtable)
require(beanplot)
require(psych)
require(caTools)
require(fpc)
require(FNN)
require(RColorBrewer)
require(scales)
require(diptest)
require(mixtools)
require(gclus)
require(cowplot)
require(gridExtra)
#!/bin/bash
usermod -u ${LOCAL_USER_ID} imp
groupmod -g ${LOCAL_GROUP_ID} imp
exec gosu imp "$@"
# List of dependencies for IMP
## With the wrapper script
* [docker](https://docs.docker.com/installation)
* [git](http://www.git-scm.com)
* [python3]( https://www.python.org/downloads)
## With Snakemake
* [openjdk](http://openjdk.java.net/) - 7
* [bioperl](http://www.bioperl.org/wiki/Main_Page) - 1.6.923-1
* [tabix](http://www.htslib.org/doc/tabix.html) - 0.2.6-2
* [fastuniq](http://sourceforge.net/projects/fastuniq/) - 1.1
* [samtools](http://samtools.sourceforge.net/)
* [gnuplot](http://www.gnuplot.info/)
* [python](http://python.org/) - 3.4
* [R](https://www.r-project.org/)
* [python](http://python.org/) - 2.7
* [gfortran](http://gcc.gnu.org/fortran/)
* [libatlas](http://math-atlas.sourceforge.net/)
* [docopt](http://docopt.org/)
* [bioservices](https://pypi.python.org/pypi/bioservices) - 1.3.5
* [numpy](http://www.numpy.org/)
* [scipy](https://www.scipy.org/)
* [matplotlib](http://matplotlib.org/)
* [sklearn](http://scikit-learn.org/stable/index.html)
* [Trimmomatic](http://www.usadellab.org/cms/index.php?page=trimmomatic) - 0.32
* [idba ud](http://wiki.hpc.ufl.edu/doc/IDBA-UD) - 1.1.1
* [cap3](http://seq.cs.iastate.edu/cap3.html)
* [bwa](http://bio-bwa.sourceforge.net/) - 0.7.9a
* [htsjdk](https://github.com/samtools/htsjdk) - 1.138
* [Picard tools](https://github.com/broadinstitute/picard)
* [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - v0.11.3
* [Freebayes](https://github.com/ekg/freebayes) - v0.9.16
* [vcftools](http://vcftools.sourceforge.net/) - 0.1.12b
* [prokka](https://github.com/tseemann/prokka) - 1.11
* [parallel](https://www.gnu.org/software/parallel/) - 20140422
* [sortmerna](http://bioinfo.lifl.fr/RNA/sortmerna/) - 2.0
* [bedtools2](https://github.com/arq5x/bedtools2) - 2.24.0
* [KronaTools](https://github.com/marbl/Krona/wiki) - 2.5
* [htslib](http://www.htslib.org/) - 1.2.1
* [Platypus](https://github.com/andyrimmer/Platypus) - 0.8.1
* [megahit](https://github.com/voutcn/megahit)
* [Vizbin](https://github.com/claczny/VizBin)
* [Quast/metaQuast](http://bioinf.spbau.ru/en/metaquast) - 3.1
Some low dependencies are not listed.
For a complete list of all dependencies, please see what is installed in `docker/Dockerfile-dependencies`
### R dependencies
* genomeIntervals from biocLite
We use the `checkpoint` library set to the `2015-04-27` to install the following R packages:
* ggplot2
* gtools
* data.table
* reshape
* grid
* grDevices
* genomeIntervals
* stringr
* xtable
* beanplot
* psych