Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • IMP/IMP
  • IMP-dev/IMP
2 results
Show changes
Commits on Source (983)
.snakemake
TEST_DATA
build
.DS_Store
KronaTools-2.5.tar
KronaTools-2.5/
db
env/
run_IMP_A02-20150318.sh
build_gz/
run_tests.sh
.*
!.gitignore
!.gitlab-ci.yml
*~
build_bz2/
run_A01.sh
conf/userconfig.imp.json
output/*
stats.json
db
__pycache__
stages:
- dag
- init
- preprocessing
- assembly
- analysis
- binning
- manual
databases:
stage: manual
when: manual
script:
- pip3.4 install --editable . --user
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d $CI_PROJECT_DIR/$CI_BUILD_ID init
before_script:
- pip3.4 install --editable . --user
# INIT
i-cl:
stage: init
script:
- pytest
i-with-another-filter:
stage: init
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db init --index $CI_PROJECT_DIR/test/small.fa
# DAG
# see if the snakemake graph is resolving to the end
dag-mgmt-default:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mgmt-maxbin:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -b maxbin -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mgmt-idba:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -a idba -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mgmt-nf:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mgmt-nf2:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n" --no-filtering
dag-mg-default:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mg-nf:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mg-nf2:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n" --no-filtering
dag-mt-default:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db run -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mt-nf:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mt-nf2:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db run -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n" --no-filtering
dag-mgmt-no-binning:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b no run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID -x "snakemake -n"
dag-mt-no-binning:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b no run -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
dag-mg-no-binning:
stage: dag
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b no run -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics -x "snakemake -n"
# PREPROCESSING
p-mgmt-default:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db preprocessing -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
p-mgmt-nf:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db preprocessing -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
p-mg-default:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db preprocessing -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics --single-step
p-mg-nf:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db preprocessing -m $CI_PROJECT_DIR/test/MG.R1.small.fq -m $CI_PROJECT_DIR/test/MG.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics --single-step
p-mt-default:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db preprocessing -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics --single-step
p-mt-nf:
stage: preprocessing
script:
- impy -s . -c $CI_PROJECT_DIR/test/no_filtering.conf.json -d /mnt/data/db preprocessing -t $CI_PROJECT_DIR/test/MT.R1.small.fq -t $CI_PROJECT_DIR/test/MT.R2.small.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-omics --single-step
# Assembly
a-mgmt-idba:
stage: manual
when: manual
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a idba assembly -m /mnt/data/input/assembly-megahit/mg.r1.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.r2.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.se.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r1.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r2.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
a-mg-idba:
stage: manual
when: manual
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a idba assembly -m /mnt/data/input/assembly-megahit/mg.r1.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.r2.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
a-mt-idba:
stage: manual
when: manual
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a idba assembly -t /mnt/data/input/assembly-megahit/mt.r1.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r2.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
a-mgmt-megahit:
stage: assembly
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a megahit assembly -m /mnt/data/input/assembly-megahit/mg.r1.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.r2.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.se.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r1.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r2.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
a-mg-megahit:
stage: assembly
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a megahit assembly -m /mnt/data/input/assembly-megahit/mg.r1.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.r2.preprocessed.fq -m /mnt/data/input/assembly-megahit/mg.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
a-mt-megahit:
stage: assembly
script:
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -a megahit assembly -t /mnt/data/input/assembly-megahit/mt.r1.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.r2.preprocessed.fq -t /mnt/data/input/assembly-megahit/mt.se.preprocessed.fq -o $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
an-mgmt:
stage: analysis
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/analysis-mgmt/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -e IMP_SUDO=sudo -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db analysis --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
an-mg:
stage: analysis
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/analysis-mg/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -e IMP_SUDO=sudo -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db analysis --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
an-mt:
stage: analysis
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/analysis-mt/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -e IMP_SUDO=sudo -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db analysis --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
# binny-mgmt:
# stage: binning
# script:
# - mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
# - cp -r /mnt/data/input/binny-mgmt/* $CI_PROJECT_DIR/$CI_BUILD_ID
# - impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db binning --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
# binny-mg:
# stage: binning
# script:
# - mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
# - cp -r /mnt/data/input/binny-mg/* $CI_PROJECT_DIR/$CI_BUILD_ID
# - impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db binning --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
maxbin-mgmt:
stage: manual
when: manual
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/maxbin-mgmt/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b maxbin binning --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step
maxbin-mg:
stage: manual
when: manual
script:
- mkdir -p $CI_PROJECT_DIR/$CI_BUILD_ID
- cp -r /mnt/data/input/maxbin-mg/* $CI_PROJECT_DIR/$CI_BUILD_ID
- impy -s . -c $CI_PROJECT_DIR/test/default.conf.json -d /mnt/data/db -b maxbin binning --data-dir $CI_PROJECT_DIR/$CI_BUILD_ID --single-step --single-omics
# v 1.4.1
* fix command line bug (check config file exists)
* fix command line bug (loading container from local tarball)
* handling cases where no SE reads after preprocessing
* fix MT "binning" step
* command line handle environment variable
* remove deprecated IMP script (use impy instead)
# v 1.4.0
* Update command line utility: impy
* Integrate more tools into the pipeline (e.g. MaxBin)
* Part of the pipeline can be launched without the others
* Testing pipeline workflow and tools
* Update container to version 1.4
# v 1.2.0
* Upgrade Snakemake to the latest Version
* Current version of the IMP code is shipped inside the docker container
# v 1.1.1
* Change megahit version
#!/usr/bin/env python3
from lib.docopt import docopt
from lib.path import Path
import subprocess
import os
import json
import shlex
from copy import deepcopy
import tempfile
import sys
__doc__ = """Integrated Metaomic Pipeline.
____ __ __ ____
(_ _)( \/ )( _ \\
_)(_ ) ( )___/
(____)(_/\/\_)(__)
Usage:
IMP [-m MG1 -m MG2] [-t MT1 -t MT2] -o OUTPUT [--enter] [--norm] [-n CONTAINER] [-d DBPATH] [-e ENV] ... [COMMANDS ...]
IMP --init [-d DBPATH]
IMP (-h | --help)
IMP --version
Options:
-e ENV Environment variable to pass to the container
--enter Enter the container
--init Initialize IMP databases (Take a while)
--norm Don't delete the container after use. Useful for debugging.
-h --help Show this help and exit
-m MG Path to the metagenomics paired files (must be 2 files).
-t MT Path to the metatranscriptomic paired files (2 files).
-d DBPATH Path to the databases [default: db]
-n CONTAINER Name of the container. Useful when you want to run a previous version of IMP.
-o OUTPUT Path to the output directory
"""
def get_version():
return subprocess.check_output(
['git', '--no-pager', 'log', '-n', '1', '--pretty=format:%H']
)
def dict_merge(a, b):
"""
Deep merge 2 dicts together
"""
if not isinstance(b, dict):
return b
result = deepcopy(a)
for k, v in b.items():
if k in result and isinstance(result[k], dict):
result[k] = dict_merge(result[k], v)
else:
result[k] = deepcopy(v)
return result
def yes_or_no(question):
reply = str(input(question + ' (y/n): ')).lower().strip()
if reply[0] == 'y':
return True
if reply[0] == 'n':
return False
else:
return yes_or_no("Please enter ")
def init(args):
CURRENT_PATH = Path(__file__).parent.abspath()
# start docker container to index files and setup prokka
container_name = args['-n'] is not None and args['-n'] or 'imp:latest'
db_path = Path(args['-d']).abspath()
cmd = [
'docker', 'run', '--rm',
'-v %s:/code' % CURRENT_PATH,
'-v %s:/databases' % db_path, container_name, 'snakemake -s /code/init.rule'
]
print("Executing", '"', ' '.join(cmd), '"')
subprocess.call(shlex.split(' '.join(cmd)))
def run(args):
CURRENT_PATH = Path(__file__).parent.abspath()
# find common path
mg_data = [Path(p).abspath() for p in args['-m']]
mt_data = [Path(p).abspath() for p in args['-t']]
# check paths
for pth in mg_data + mt_data:
if not pth.exists():
print("'%s' does not exist" % pth)
exit(1)
common_path = Path(os.path.commonprefix(mg_data + mt_data)).dirname()
# update data paths
mg_data = [p.partition(common_path)[-1][1:] for p in mg_data]
mt_data = [p.partition(common_path)[-1][1:] for p in mt_data]
output = Path(args['-o']).abspath()
if not output.exists():
if yes_or_no("Output '%s' doesn't exist. Create ?" % output):
output.makedirs()
else:
exit(0)
container_name = args['-n'] is not None and args['-n'] or 'imp:latest'
db_path = Path(args['-d']).abspath()
# configure IMP mount point to the docker container
mount_points = [
'-v %s:/data' % common_path,
'-v %s:/code' % CURRENT_PATH,
'-v %s:/output' % output,
'-v %s:/databases' % db_path,
]
# environement variables (add MG and MT data)
envs = ['-e {}="{}"'.format(*e.split('=')) for e in args['-e']]
# prepare MG and MT data
mg = ['/data/' + d for d in mg_data]
mt = ['/data/' + d for d in mt_data]
if mg:
envs += ['-e MG="%s"' % ' '.join(mg)]
if mt:
envs += ['-e MT="%s"' % ' '.join(mt)]
# CL
cmd = ['docker', 'run'] + mount_points + envs
# rm the container by default
if not args['--norm']:
cmd += ['--rm']
# if --enter flag is specified, add attach tty and set mode to interactive
if args['--enter']:
cmd += ['-it']
# add container name and commands to pass to snakemake
cmd += [container_name] + args['COMMANDS']
# if --enter flag is specified, change the command
if args['--enter']:
cmd += ['/bin/bash']
# parse CL correctly
cmd = shlex.split(' '.join(cmd))
print("Executing", '"', ' '.join(cmd), '"')
subprocess.call(cmd)
def validate(args):
mg = args['-m']
if mg and len(mg) != 2:
print('Metagenomic data should be 2 paired files', file=sys.stderr)
return False
mt = args['-t']
if mt and len(mt) != 2:
print('Metatranscriptomic data should be 2 paired files', file=sys.stderr)
return False
if not mg and not mt:
print('You should provide at metagenomic and/or metatranscriptomic data', file=sys.stderr)
return False
return True
if __name__ == '__main__':
args = docopt(__doc__, version=get_version(), options_first=True)
if not validate(args):
exit(1)
if args['--init']:
init(args)
else:
run(args)
MIT License
Copyright (c) 2016 Luxembourg Centre for Systems Biomedicine
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
include *.rst
# Upload and release changes of impy to [PyPI](https://pypi.python.org/pypi).
* Login to PyPI.
* Makes changes in `setup.py` (version, ...)
* Upload the package.
```
python setup.py sdist upload
```
###
IMP
###
The Integrated Meta-omic Pipeline (IMP) is developed to perform large-scale, reproducible and automated integrative reference free analysis of metagenomic and metatranscriptomic data. IMP also performs single omic (i.e. metagenomic-only and metatrancriptomic-only) analysis as an additional functionality.
*************************
Documentation and website
*************************
All documentation and resources can be found : `here <http://r3lab.uni.lu/web/imp/doc.html>`_.
All components used to develop IMP workflow care addressed under the `R3lab frozen pages <http://r3lab.uni.lu/frozen/imp>`_.
# include configuration file
include:
"config"
def prepare_environment(stepname):
"""
Prepare the output directories and logs.
stepname: the name of the pipeline step
return: the step master directory, the step log
"""
out = os.path.join(OUTPUTDIR, stepname)
# mkdirs
if not os.path.exists(out):
os.makedirs(out)
elif not os.path.isdir(out):
raise OSError("//[IMP] Output is not a directory: %s" % out)
if not os.path.exists(TMPDIR):
os.makedirs(TMPDIR)
bench = os.path.join(out, 'benchmarks')
if not os.path.exists(bench):
os.makedirs(bench)
return out, os.path.join(out, '%s.log' % stepname)
# INCLUDES PROCESSING RULES
include:
"rules/Util.rules"
include:
"rules/Preprocessing/master.rules"
include:
"rules/Assembly/master.rules"
"rules/ini/config"
# define the data types used and the assembly
if MG and MT:
TYPES = ['mg', 'mt']
ASS = 'mgmt'
elif MG:
TYPES = ['mg']
ASS = 'mg'
elif MT:
TYPES = ['mt']
ASS = 'mt'
workdir:
OUTPUTDIR
# include rules for the workflow based on the input parameters
include:
"rules/Analysis/master.rules"
"rules/data.input.rules"
# INTEGRATIVE MG-MT workflow
if MG and MT:
if 'preprocessing' in IMP_STEPS:
include:
"workflows/integrative/Preprocessing"
if 'assembly' in IMP_STEPS:
include:
"workflows/integrative/Assembly"
if 'analysis' in IMP_STEPS:
include:
"workflows/integrative/Analysis"
if 'binning' in IMP_STEPS:
include:
"workflows/integrative/Binning"
if 'report' in IMP_STEPS:
include:
"workflows/integrative/Report"
# Single omics MG workflow
elif MG:
if 'preprocessing' in IMP_STEPS:
include:
"workflows/single_omics/mg/Preprocessing"
if 'assembly' in IMP_STEPS:
include:
"workflows/single_omics/mg/Assembly"
if 'analysis' in IMP_STEPS:
include:
"workflows/single_omics/mg/Analysis"
if 'binning' in IMP_STEPS:
include:
"workflows/single_omics/mg/Binning"
if 'report' in IMP_STEPS:
include:
"workflows/single_omics/mg/Report"
elif MT:
if 'preprocessing' in IMP_STEPS:
include:
"workflows/single_omics/mt/Preprocessing"
if 'assembly' in IMP_STEPS:
include:
"workflows/single_omics/mt/Assembly"
if 'analysis' in IMP_STEPS:
include:
"workflows/single_omics/mt/Analysis"
if 'binning' in IMP_STEPS:
include:
"workflows/single_omics/mt/Binning"
if 'report' in IMP_STEPS:
include:
"workflows/single_omics/mt/Report"
else:
raise Exception('No input data.')
inputs = []
if 'preprocessing' in IMP_STEPS:
inputs.append('preprocessing.done')
if 'assembly' in IMP_STEPS:
inputs.append('assembly.done')
if 'analysis' in IMP_STEPS:
inputs.append('analysis.done')
if 'binning' in IMP_STEPS:
inputs.append('binning.done')
if 'report' in IMP_STEPS:
inputs.append('report.done')
# master command
rule ALL:
input:
preprocessing_output_files(),
assembly_output_files(),
analysis_output_files()
shell:
"echo 'DONE'"
inputs
output:
touch('workflow.done')
1.4.1
# How to build a new tarball version from docker files
> This README assumes that you are in the `docker` directory of the IMP project.
## Increment Version number in each Docker files
Edit the docker file `docker/Dockerfile`:
Increment the version number in that file.
> The <version> number can be anything: e.g '1.1.2' or 'my-feature'
Edit the `impy.py` script and put the `IMP_VERSION` variable to the same version.
Tag the new version on the Gitlab.
Edit the Docker file and change the IMP clone process to clone the same version.
## Build the docker images locally
> In order to build the docker images, you must have [Docker](https://docs.docker.com/installation/) installed.
### Build dependencies
docker build -t docker-r3lab.uni.lu/imp/imp-deps:<version> -f Dockerfile-dependencies .
> 'docker-r3lab.uni.lu/imp/imp-deps:<version>' is the image name that we will have to give to the tools Docker file.
### Build tools
docker build -t docker-r3lab.uni.lu/imp/imp-tools:<version> -f Dockerfile-tools .
> 'docker-r3lab.uni.lu/imp/imp-tools:<version>' is the image name that we will have to give to the main Docker file.
### Build the main Docker file.
Edit the docker file and change `FROM` to the image name you gave in the previous step.
Then build the file
docker build -t docker-r3lab.uni.lu/imp/imp:<version> .
## Build the tarball
docker save docker-r3lab.uni.lu/imp/imp:<version> > imp-<version>.tar
gzip -9 imp-<version>.tar
## Put the tarball online
Must be under `https://webdav-r3lab.uni.lu/public/R3lab/IMP/dist/`
# Base container with tools needed for the IMP pipeline
#
# VERSION 0.1
# VERSION 1.4.1
FROM ubuntu:trusty
FROM docker-r3lab.uni.lu/imp/imp-tools:1.4.1
MAINTAINER yohan.jarosz@uni.lu
USER root
################
# dependencies #
################
RUN echo "deb http://ftp.halifax.rwth-aachen.de/ubuntu trusty main restricted universe multiverse" > /etc/apt/sources.list
RUN echo "deb http://ftp.halifax.rwth-aachen.de/ubuntu trusty-security main restricted universe" >> /etc/apt/sources.list
RUN echo "deb http://ftp.halifax.rwth-aachen.de/ubuntu trusty-updates main restricted universe multiverse">> /etc/apt/sources.list
RUN apt-get update \
&& apt-get install -yq python-software-properties software-properties-common \
&& add-apt-repository "deb http://cran.stat.ucla.edu/bin/linux/ubuntu trusty/" \
&& gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 \
&& gpg -a --export E084DAB9 | apt-key add - \
&& apt-get update \
&& apt-get -yq upgrade \
&& apt-get install -yq make cmake wget build-essential unzip openjdk-7-jdk ant git tig vim \
&& apt-get install -yq libboost-filesystem1.54-dev libboost-program-options1.54-dev libboost-regex1.54-dev libboost-system1.54-dev libboost-thread1.54-dev libboost-system1.54-dev \
&& apt-get install -yq bioperl=1.6.923-1 tabix=0.2.6-2 zlib1g zlib1g-dev samtools gnuplot python3 \
&& apt-get install -yq r-base r-base-dev \
&& apt-get install -yq libncurses5 libncurses5-dev sudo libatlas-base-dev python2.7 gfortran python-dev \
&& update-alternatives --set java /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java
## Python dependencies and bioservices. Using version 1.3.5 because script doesn't work with latest version
RUN mkdir -p /home/imp/tmp /home/imp/lib \
&& cd /home/imp/tmp \
&& wget https://bootstrap.pypa.io/get-pip.py \
&& python3 get-pip.py \
&& python2.7 get-pip.py \
&& pip3 install snakemake docopt \
&& pip2.7 install numpy \
&& pip2.7 install scipy scikit-learn docopt \
&& pip install -Iv https://pypi.python.org/packages/source/b/bioservices/bioservices-1.3.5.tar.gz \
&& rm get-pip.py
# R with checkpoint libraries
ADD dependencies.R /home/imp/lib/
RUN mkdir /root/.checkpoint \
&& echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile \
&& Rscript -e "install.packages('checkpoint')" \
&& Rscript -e "library(checkpoint);checkpoint('2015-04-27', project='/home/imp/lib')" \
&& Rscript -e "source('http://bioconductor.org/biocLite.R');biocLite('genomeIntervals')"
######################################
# Tools installations #
######################################
## fastuniq
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/FastUniq-1.1.tar.gz \
&& tar -xzf FastUniq-1.1.tar.gz \
&& cd FastUniq/source \
&& make \
&& mv fastuniq /usr/bin/. \
&& cd ../.. && rm -rf FastUniq* \
## Trimmomatic
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/Trimmomatic-Src-0.32.zip \
&& unzip Trimmomatic-Src-0.32.zip \
&& cd trimmomatic-0.32 \
&& ant \
&& cp dist/jar/trimmomatic-0.32.jar /home/imp/lib/. \
#&& mkdir -p /usr/db/trimmomatic \
#&& cp adapters/* /usr/db/trimmomatic/. \
&& cd .. && rm -rf *rimmomatic*
## idba ud
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/idba-1.1.1.tar.gz \
&& tar -xzf idba-1.1.1.tar.gz \
&& cd idba-1.1.1 \
&& sed -i -e 's/static const uint32_t kMaxShortSequence = 128/static const uint32_t kMaxShortSequence = 2048/' src/sequence/short_sequence.h \
&& ./configure \
&& make \
&& make install \
&& mv bin/idba_ud /usr/bin \
&& mv bin/fq2fa /usr/bin \
&& cd .. && rm -rf idba* \
## cap3
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/cap3.linux.x86_64.tar \
&& tar -xf cap3.linux.x86_64.tar \
&& cp CAP3/cap3 /usr/bin \
&& rm -rf cap3* CAP3 \
## bwa
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/bwa-0.7.9a.tar.bz2 \
&& tar -xjf bwa-0.7.9a.tar.bz2 \
&& cd bwa-0.7.9a \
&& make \
&& mv bwa /usr/bin \
&& cd .. && rm -rf bwa*
## htsjdk - dependency of picard tools and FastQC (below)
RUN cd /home/imp/lib \
&& mkdir fastqc \
&& cd fastqc \
&& git clone https://github.com/samtools/htsjdk.git \
&& cd htsjdk/ \
&& ant htsjdk-jar \
## Picard tools - dependency of fastqc (below)
&& cd /home/imp/lib/fastqc \
&& git clone https://github.com/broadinstitute/picard.git \
&& mv /home/imp/lib/fastqc/htsjdk /home/imp/lib/fastqc/picard/ \
&& cd picard/ \
&& ant -lib lib/ant package-commands \
## FastQC
&& cd /home/imp/lib/fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/fastqc_v0.11.3.zip \
&& unzip fastqc_v0.11.3.zip \
&& cd FastQC \
&& chmod 775 fastqc
## freebayes
RUN cd /home/imp/tmp \
&& git clone --recursive https://git-r3lab.uni.lu/R3/freebayes.git \
&& cd freebayes \
&& git checkout -b v0.9.16 \
&& make \
&& make install \
&& cd .. && rm -rf freebayes \
## vcftools
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/vcftools_0.1.12b.tar.gz \
&& tar -xzf vcftools_0.1.12b.tar.gz \
&& cd vcftools_0.1.12b \
&& make \
&& make install \
&& cp -r bin/* /usr/bin \
&& cp -r perl/* /etc/perl/. \
&& cd .. && rm -rf vcftools*
## prokka
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/prokka-1.11.tar.gz \
&& tar -xzf prokka-1.11.tar.gz \
&& cd prokka-1.11 \
&& cp bin/prokka* /usr/bin \
&& cp binaries/linux/* /usr/bin \
&& cd .. && rm -rf prokka* \
## parallel
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/parallel-20140422.tar.bz2 \
&& tar -xjf parallel-20140422.tar.bz2 \
&& cd parallel-20140422 \
&& ./configure \
&& make \
&& make install \
&& cd .. && rm -rf parallel* \
## sortmerna
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/sortmerna.2.0.tgz \
&& tar -xzf sortmerna.2.0.tgz \
&& cd sortmerna-2.0 \
&& sh build.sh \
&& mv sortmerna indexdb_rna /usr/bin/. \
&& mv scripts/*.sh /home/imp/lib/. \
&& cd .. && rm -rf sortmerna* \
## bedtools2
&& git clone https://git-r3lab.uni.lu/R3/bedtools2.git \
&& cd bedtools2 \
&& git checkout v2.22.0 \
&& make
## Install KronaPlot
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/KronaTools-2.5.tar \
&& tar -xvf KronaTools-2.5.tar \
&& cd KronaTools-2.5 \
&& perl install.pl
## htslib
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/htslib-1.2.1.tar.bz2 \
&& tar -jxvf htslib-1.2.1.tar.bz2 \
&& cd htslib-1.2.1 \
&& ./configure && make && make install \
&& cd .. && rm -rf htslib-1.2.1
## Platypus
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/Platypus-latest.tgz \
&& tar -xvzf Platypus-latest.tgz \
&& cd Platypus_0.8.1 \
&& bash buildPlatypus.sh
## Megahit
RUN cd /home/imp/lib \
&& git clone https://git-r3lab.uni.lu/R3/megahit.git \
&& cd megahit \
&& git checkout v0.2.1 \
&& make \
&& mv megahit* /usr/bin/. \
&& mv sdbg_builder_cpu /usr/bin/.
## Vizbin JAR
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/VizBin-dist.jar -O VizBin-dist.jar
RUN cd /home/imp/lib
######################
# ship pipeline code #
######################
# (commented for now)
#RUN cd /home/imp \
# && git clone https://git-r3lab.uni.lu/shaman.narayanasamy/IMP.git
######################
# set up environment #
######################
# add Snakemake
RUN pip3 install snakemake==3.7.1
## add snakemake completion
RUN echo "`snakemake --bash-completion`" >> ~/.bashrc \
## add LD_LIBRARY_PATH
&& echo "export LD_LIBRARY_PATH=/usr/local/lib" >> ~/.bashrc
#####################
# Ship IMP codebase #
#####################
RUN mkdir -p /home/imp/code /home/imp/output /home/imp/data /home/imp/bin /home/imp/databases \
&& cd /home/imp/code \
&& git clone --branch 1.4.1 --single-branch https://git-r3lab.uni.lu/IMP/IMP.git . \
&& cd /usr && ln -s /home/imp/databases db
# R with checkpoint libraries
ADD dependencies.R /home/imp/lib/
RUN mkdir /home/imp/lib/.checkpoint \
&& echo "r <- getOption('repos'); r['CRAN'] <- 'https://cloud.r-project.org/'; options(repos = r);" > /home/imp/.Rprofile \
&& Rscript -e "install.packages('checkpoint', repos='https://cloud.r-project.org/')" \
&& Rscript -e "library(checkpoint);checkpoint('2016-06-20', checkpointLocation='/home/imp/lib', project='/home/imp/lib')" \
&& Rscript -e "source('http://bioconductor.org/biocLite.R');biocLite('genomeIntervals', dependencies=TRUE)" \
&& chmod -R 777 /home/imp/lib/.checkpoint /home/imp/.Rprofile
#### Add gosu
ENV GOSU_VERSION 1.7
RUN set -x \
&& apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/* \
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)" \
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture).asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
&& rm -r "$GNUPGHOME" /usr/local/bin/gosu.asc \
&& chmod +x /usr/local/bin/gosu \
&& gosu nobody true
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
#### Add IMP user
RUN groupadd imp && useradd -g imp -d /home/imp imp \
&& chown imp:imp -R /home/imp/ \
&& chmod -R 0777 /home/imp \
&& echo 'imp:imp' |chpasswd \
&& echo "imp ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers.d/imp
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
######################
# runtime parameters #
######################
ENV PATH /home/imp/lib/trinityrnaseq_r20140717:/home/imp/lib/Platypus_0.8.1:/home/imp/lib/megahit:/home/imp/lib/fastqc/FastQC:$PATH
ENV PATH /home/imp/lib/FragGeneScan1.30:/home/imp/lib/MaxBin-2.2.1:/home/imp/lib/pullseq/src:/home/imp/lib/Platypus_0.8.1:/home/imp/lib/megahit:/home/imp/lib/fastqc/FastQC:$PATH
ENV LD_LIBRARY_PATH /usr/local/lib/:/home/imp/lib/Platypus_0.8.1
VOLUME ["/home/imp/data", "/home/imp/output", "/home/imp/databases", "/home/imp/code"]
WORKDIR /home/imp/code
VOLUME ["/data", "/output", "/databases", "/code"]
WORKDIR /code
CMD ["snakemake", "ALL"]
# Base container with tools needed for the IMP pipeline
#
# VERSION 1.4.1
FROM docker-r3lab.uni.lu/imp/imp:1.3
MAINTAINER yohan.jarosz@uni.lu
######################################
# Tools installations #
######################################
## Reinstall bioperl
RUN (echo y;echo o conf prerequisites_policy follow;echo o conf commit)|cpan -f -i CJFIELDS/BioPerl-1.6.924.tar.gz \
#&& apt-get install -yq bioperl=1.6.923-1 \
&& rm -rf /var/lib/apt/lists/*
## CheckM
RUN cd /home/imp/lib \
&& pip2.7 install pysam dendropy ScreamingBackpack \
&& wget https://github.com/Ecogenomics/CheckM/archive/v1.0.6.tar.gz \
&& tar xf v1.0.6.tar.gz \
&& cd CheckM-1.0.6/ \
&& python2.7 setup.py install \
&& cd .. \
&& rm -r CheckM-1.0.6/ \
&& wget https://github.com/matsen/pplacer/releases/download/v1.1.alpha17/pplacer-Linux-v1.1.alpha17.zip \
&& unzip pplacer-Linux-v1.1.alpha17.zip \
&& cp pplacer-Linux-v1.1.alpha17/guppy /usr/bin \
&& cp pplacer-Linux-v1.1.alpha17/pplacer /usr/bin \
&& rm -r pplacer-Linux-v1.1.alpha17
##CheckM Data
RUN cd /home/imp/lib \
&& wget https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_v1.0.6.tar.gz
RUN cd /home/imp/lib \
&& mkdir checkm_data_v1.0.6 \
&& tar -xf checkm_data_v1.0.6.tar.gz -C checkm_data_v1.0.6 \
#echo used for interactive prompt
&& echo checkm_data_v1.0.6 | checkm data setRoot checkm_data_v1.0.6/
##Amphora2
RUN cd /home/imp/lib \
&& git clone https://github.com/mherold1/AMPHORA2 \
&& wget https://github.com/stamatak/standard-RAxML/archive/v8.2.8.tar.gz -O stamatak-standard-RAxML.tar.gz \
&& tar xf stamatak-standard-RAxML.tar.gz \
&& cd standard-RAxML-8.2.8; make -f Makefile.gcc; make -f Makefile.PTHREADS.gcc; sudo cp raxmlHPC* /usr/bin/. \
&& cd .. \
&& (echo y;echo o conf prerequisites_policy follow;echo o conf commit)|cpan -f -i Parallel::ForkManager
# && wget ftp://emboss.open-bio.org/pub/EMBOSS/old/6.5.0/EMBOSS-6.5.7.tar.gz \
# && tar -xf EMBOSS-6.5.7.tar.gz \
# && cd EMBOSS-6.5.7 \
# && ./configure \
# && make \
# && make install
# FOR NOW: using apt-get install emboss
# && apt-get install emboss
RUN cd /home/imp/lib \
&& sudo apt-get update \
&& echo y| sudo apt-get install emboss
# && wget ftp://emboss.open-bio.org/pub/EMBOSS/old/6.5.0/EMBOSS-6.5.7.tar.gz \
# && tar -xf EMBOSS-6.5.7.tar.gz \
# && cd EMBOSS-6.5.7 \
# && ./configure --without-mysql --without-postgresql --without-axis2c --without-hpdf --without-x\
# && make \
#&& make install \
#&& ldconfig \
# && make install
##pullseq
RUN cd /home/imp/lib \
&& git clone https://github.com/bcthomas/pullseq \
&& cd pullseq \
&& ./bootstrap \
&& ./configure \
&& make \
&& make install
##Phylophlan, removed for now maybe revisit at a later timepoint, MH 16.6.2016
#RUN cd /home/imp/lib \
## && wget https://bitbucket.org/nsegata/phylophlan/get/default.tar.gz \
## && tar -xf default.tar.gz \
# && pip install mercurial \
# && hg clone https://bitbucket.org/nsegata/phylophlan \
## && cd phylophlan \
##for development version: (stick to old one for now)
## && hg pull && hg update dev \
# && wget http://www.microbesonline.org/fasttree/FastTree \
# && chmod +x FastTree \
# && cp FastTree /usr/bin \
##usearch binary copied from gaia for now and put in imp code -> need to solve this
#
# #move usearch binary to webdav? this uses my downloadlink, from 2.5.2016 , not sure how long this works.
## && wget --user-agent=Mozilla --content-disposition -E -c http://drive5.com/cgi-bin/upload3.py?license=2016050205235811143 \
# # && chmod +x usearch8.1.1861_i86linux32 \
# # && cp usearch8.1.1861_i86linux32 /usr/bin/usearch \
## && cd \
# This cleans the old git repo and downloads the new one. Remember to replace with a proper
# tag when released
RUN cd /home/imp/code \
&& rm -rf ./* \
&& rm -rf ./.git* \
&& git clone --branch binning_refactor2 --single-branch https://git-r3lab.uni.lu/IMP-dev/IMP.git .
# R with checkpoint libraries
ADD dependencies.R /home/imp/lib/
RUN mkdir -p /home/imp/lib/.checkpoint \
&& echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > /home/imp/.Rprofile \
&& Rscript -e "install.packages('checkpoint')" \
&& Rscript -e "library(checkpoint);checkpoint('2016-06-20', project='/home/imp/lib', checkpointLocation='/home/imp/lib')" \
&& Rscript -e "source('http://bioconductor.org/biocLite.R');biocLite('genomeIntervals')"
RUN wget https://webdav-r3lab.uni.lu/public/R3lab/IMP/linux64.tbl2asn.gz -O /tmp/tbl2asn.gz \
&& gzip -d /tmp/tbl2asn.gz \
&& chmod +x /tmp/tbl2asn \
&& sudo mv /tmp/tbl2asn /usr/bin
######################
# runtime parameters #
######################
ENV AMPHORA2_home /home/imp/lib/AMPHORA2 ###CHECK IF THIS IS CORRECT?
ENV PATH /home/imp/lib/pullseq/src:/home/imp/lib/trinityrnaseq_r20140717:/home/imp/lib/Platypus_0.8.1:/home/imp/lib/megahit:/home/imp/lib/fastqc/FastQC:$PATH
ENV LD_LIBRARY_PATH /usr/local/lib/:/home/imp/lib/Platypus_0.8.1
VOLUME ["/home/imp/data", "/home/imp/output", "/home/imp/databases", "/home/imp/code"]
WORKDIR /home/imp/code
CMD ["snakemake", "ALL"]
# Base container with tools dependencies for the IMP pipeline
#
# VERSION 1.4.1
FROM ubuntu:trusty
MAINTAINER yohan.jarosz@uni.lu
################
# dependencies #
################
RUN echo "deb ftp://ftp.halifax.rwth-aachen.de/ubuntu trusty main restricted universe multiverse" > /etc/apt/sources.list \
&& echo "deb ftp://ftp.halifax.rwth-aachen.de/ubuntu trusty-security main restricted universe" >> /etc/apt/sources.list \
&& echo "deb ftp://ftp.halifax.rwth-aachen.de/ubuntu trusty-updates main restricted universe multiverse">> /etc/apt/sources.list \
&& apt-get update \
&& apt-get install -yq python-software-properties software-properties-common \
&& add-apt-repository "deb http://cran.stat.ucla.edu/bin/linux/ubuntu trusty/" \
&& gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 \
&& gpg -a --export E084DAB9 | apt-key add - \
&& apt-get update \
&& apt-get -yq upgrade \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -yq make cmake wget build-essential unzip ant git tig vim \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -yq openjdk-7-jdk \
&& apt-get install -yq bioperl=1.6.923-1 \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -yq tabix=0.2.6-2 zlib1g zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -yq samtools gnuplot python3 \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -yq python-numpy python-scipy python-matplotlib python-sklearn \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update \
&& apt-get install -yq r-base r-base-dev \
&& apt-get install -yq libcurl4-openssl-dev libssl-dev \
&& apt-get install -yq libncurses5 libncurses5-dev sudo libatlas-base-dev python2.7 gfortran python-dev \
&& apt-get install -yq python-matplotlib \
&& update-alternatives --set java /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java \
&& rm -rf /var/lib/apt/lists/*
## Python dependencies and bioservices. Using version 1.3.5 because script doesn't work with latest version
RUN mkdir -p /home/imp/tmp /home/imp/lib \
&& cd /home/imp/tmp \
&& wget https://bootstrap.pypa.io/get-pip.py \
&& python3 get-pip.py \
&& python2.7 get-pip.py \
&& pip3 install docopt \
#&& pip2.7 install numpy \
#&& pip2.7 install scipy scikit-learn docopt \
&& pip install -Iv https://pypi.python.org/packages/source/b/bioservices/bioservices-1.3.5.tar.gz \
&& rm get-pip.py
# Base container with tools needed for the IMP pipeline
#
# VERSION 1.4.1
FROM docker-r3lab.uni.lu/imp/imp-deps:1.4.1
MAINTAINER yohan.jarosz@uni.lu
######################################
# Tools installations #
######################################
## fastuniq
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/FastUniq-1.1.tar.gz -nv\
&& tar -xzf FastUniq-1.1.tar.gz \
&& cd FastUniq/source \
&& make \
&& mv fastuniq /usr/bin/. \
&& cd ../.. && rm -rf FastUniq* \
## Trimmomatic
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/Trimmomatic-Src-0.32.zip -nv\
&& unzip Trimmomatic-Src-0.32.zip \
&& cd trimmomatic-0.32 \
&& ant \
&& cp dist/jar/trimmomatic-0.32.jar /home/imp/lib/. \
&& cd .. && rm -rf *rimmomatic*
## idba ud
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/idba-1.1.1.tar.gz -nv\
&& tar -xzf idba-1.1.1.tar.gz \
&& cd idba-1.1.1 \
&& sed -i -e 's/static const uint32_t kMaxShortSequence = 128/static const uint32_t kMaxShortSequence = 2048/' src/sequence/short_sequence.h \
&& ./configure \
&& make \
&& make install \
&& mv bin/idba_ud /usr/bin \
&& mv bin/fq2fa /usr/bin \
&& cd .. && rm -rf idba* \
## cap3
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/cap3.linux.x86_64.tar -nv\
&& tar -xf cap3.linux.x86_64.tar \
&& cp CAP3/cap3 /usr/bin \
&& rm -rf cap3* CAP3 \
## bwa
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/bwa-0.7.9a.tar.bz2 -nv\
&& tar -xjf bwa-0.7.9a.tar.bz2 \
&& cd bwa-0.7.9a \
&& make \
&& mv bwa /usr/bin \
&& cd .. && rm -rf bwa*
## htsjdk - dependency of picard tools and FastQC (below)
RUN cd /home/imp/lib \
&& mkdir fastqc \
&& cd fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/htsjdk-1.138.tgz -O htsjdk-1.138.tgz -nv \
&& tar -xzf htsjdk-1.138.tgz \
&& cd htsjdk-1.138 \
&& ant htsjdk-jar \
&& cd .. && rm htsjdk-1.138.tgz
## Picard tools - dependency of fastqc (below)
RUN cd /home/imp/lib \
&& cd /home/imp/lib/fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/picard.1.138.tgz -O picard-1.138.tgz -nv \
&& tar -xzf picard-1.138.tgz \
&& mv /home/imp/lib/fastqc/htsjdk-1.138 /home/imp/lib/fastqc/picard-1.138/htsjdk \
&& cd picard-1.138/ \
&& ant -lib lib/ant package-commands \
&& cd .. && rm picard-1.138.tgz
## FastQC
RUN cd /home/imp/lib \
&& cd /home/imp/lib/fastqc \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/fastqc_v0.11.3.zip -nv \
&& unzip fastqc_v0.11.3.zip \
&& cd FastQC \
&& chmod 775 fastqc
## freebayes
RUN cd /home/imp/tmp \
&& git clone --recursive https://git-r3lab.uni.lu/R3/freebayes.git \
&& cd freebayes \
&& git checkout -b v0.9.16 \
&& make \
&& make install \
&& cd .. && rm -rf freebayes \
## vcftools
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/vcftools_0.1.12b.tar.gz -nv \
&& tar -xzf vcftools_0.1.12b.tar.gz \
&& cd vcftools_0.1.12b \
&& make \
&& make install \
&& cp -r bin/* /usr/bin \
&& cp -r perl/* /etc/perl/. \
&& cd .. && rm -rf vcftools*
## prokka
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/prokka-1.11.tar.gz -nv \
&& tar -xzf prokka-1.11.tar.gz \
&& cd prokka-1.11 \
&& cp bin/prokka* /usr/bin \
&& cp binaries/linux/* /usr/bin \
&& cd .. && rm -rf prokka* \
## parallel
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/parallel-20140422.tar.bz2 -nv \
&& tar -xjf parallel-20140422.tar.bz2 \
&& cd parallel-20140422 \
&& ./configure \
&& make \
&& make install \
&& cd .. && rm -rf parallel*
## sortmerna
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/sortmerna.2.0.tgz -nv \
&& tar -xzf sortmerna.2.0.tgz \
&& cd sortmerna-2.0 \
&& sh build.sh \
&& mv sortmerna indexdb_rna /usr/bin/. \
&& mv scripts/*.sh /home/imp/lib/. \
&& cd .. && rm -rf sortmerna*
## bedtools2
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/bedtools-2.18.0.tar.gz -O bedtools-2.18.0.tar.gz -nv \
&& tar -xzf bedtools-2.18.0.tar.gz \
&& cd bedtools-2.18.0 \
&& make \
&& cp bin/* /usr/bin/. \
&& cd /home/imp/tmp && rm -rf *
## Install KronaPlot
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/KronaTools-2.5.tar -nv \
&& tar -xvf KronaTools-2.5.tar \
&& cd KronaTools-2.5 \
&& perl install.pl \
&& cd /home/imp/lib && rm -rf KronaTools-2.5.tar
## htslib
RUN cd /home/imp/tmp \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/htslib-1.2.1.tar.bz2 -nv \
&& tar -jxvf htslib-1.2.1.tar.bz2 \
&& cd htslib-1.2.1 \
&& ./configure && make && make install \
&& cd /home/imp/tmp && rm -rf *
## Platypus
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/Platypus-latest.tgz -nv \
&& tar -xvzf Platypus-latest.tgz \
&& cd Platypus_0.8.1 \
&& bash buildPlatypus.sh \
&& cd /home/imp/lib && rm Platypus-latest.tgz
RUN cd /home/imp/lib \
&& rm -rf megahit \
&& git clone --branch v1.0.6 https://github.com/voutcn/megahit.git \
&& cd megahit \
&& make \
&& mv megahit* /usr/bin/.
## Vizbin JAR
RUN cd /home/imp/lib \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/VizBin-dist.jar -O VizBin-dist.jar -nv
## Quast/metaQuast
RUN cd /home/imp/lib \
&& git config --global http.postBuffer 2M \
&& git config --global http.maxRequestBuffer 100M \
&& git config --global core.compression 0 \
&& wget --no-check-certificate https://webdav-r3lab.uni.lu/public/R3lab/IMP/quast-3.1.zip -O quast.zip -nv\
&& unzip quast.zip \
&& cd quast-release_3.1 \
&& ln -s $PWD/metaquast.py /usr/bin/metaquast
## CheckM
RUN cd /home/imp/lib \
&& pip2.7 install pysam dendropy ScreamingBackpack \
&& wget https://github.com/Ecogenomics/CheckM/archive/v1.0.6.tar.gz \
&& tar xf v1.0.6.tar.gz \
&& cd CheckM-1.0.6/ \
&& python2.7 setup.py install \
&& cd .. \
&& rm -rf CheckM-1.0.6/ \
&& wget https://github.com/matsen/pplacer/releases/download/v1.1.alpha17/pplacer-Linux-v1.1.alpha17.zip \
&& unzip pplacer-Linux-v1.1.alpha17.zip \
&& cp pplacer-Linux-v1.1.alpha17/guppy /usr/bin \
&& cp pplacer-Linux-v1.1.alpha17/pplacer /usr/bin \
&& rm -r pplacer-Linux-v1.1.alpha17
## pullseq
RUN cd /home/imp/lib \
&& git clone https://github.com/bcthomas/pullseq \
&& cd pullseq \
&& ./bootstrap \
&& ./configure \
&& make \
&& make install \
&& cd .. \
&& rm -rf pullseq
## MaxBin & deps
RUN cd /home/imp/lib \
&& wget -c --no-check-certificate https://webdav-r3lab-server.uni.lu/public/R3lab/IMP/MaxBin-2.2.1.tar.gz \
&& tar -xvzf MaxBin-2.2.1.tar.gz \
&& cd MaxBin-2.2.1/src \
&& make \
&& rm /home/imp/lib/MaxBin-2.2.1.tar.gz \
&& cd /home/imp/lib \
&& wget -c --no-check-certificate https://webdav-r3lab-server.uni.lu/public/R3lab/IMP/bowtie2-2.2.9-source.zip \
&& unzip bowtie2-2.2.9-source.zip \
&& cd bowtie2-2.2.9 \
&& make \
&& make install \
&& cd .. \
&& rm -rf bowtie2* \
&& cd /home/imp/lib \
&& wget -c --no-check-certificate https://webdav-r3lab-server.uni.lu/public/R3lab/IMP/FragGeneScan1.30.tar.gz \
&& tar -xvzf FragGeneScan1.30.tar.gz \
&& cd FragGeneScan1.30 \
&& make clean \
&& make fgs \
&& chmod 777 -R /home/imp/lib/FragGeneScan1.30 \
&& rm /home/imp/lib/FragGeneScan1.30.tar.gz
RUN wget https://webdav-r3lab.uni.lu/public/R3lab/IMP/linux64.tbl2asn.gz -O /tmp/tbl2asn.gz \
&& gzip -f -d /tmp/tbl2asn.gz \
&& chmod +x /tmp/tbl2asn \
&& mv /tmp/tbl2asn /usr/bin
# Docker compose how to
# Docker How To
**IMP** is provided as a docker container with all dependencies shipped inside.
You 'only' need to install docker and you are ready to use **IMP**.
For more information about `docker` and how to install and use it, please refers to the [Official documentation](https://docs.docker.com/).
## Run IMP latest container
* Run latest version of **IMP**. For the first run, it will download all dependencies. It can take some time.
```bash
docker run imp:latest
... downloading data ...
Error: Snakefile "Snakefile" not present.
```
## Run IMP on your data
* The complete command is
```bash
docker run \
-v $DATA_DIR:/data \
-v $OUTPUT_DIR:/output \
-e MG="/data/$MGR1 /data/$MGR2" \
-e MT="/data/$MTR1 /data/$MTR2" \
imp:latest
```
* `docker run`: Run a docker container
* `-v $DATA_DIR:/data` : Where `$DATA_DIR` is the directory under your data is located. (*All paths must be absolute**)
>You need to provide the container your data. By default, **IMP** expect that you have metagenomic and metatransciptiomic data and will look into the container `/data` directory. You do it with the `-v` (for volume) flag.
* `-v OUTPUT_DIR:/output`: Where `$OUTPUT_DIR` is the directory where output data will be located.
> As the `$DATA_DIR`, you need to mount an output directory to hold all result files from **IMP**.
* `-e MG="/data/$MGR1 /data/$MGR2"`: Where `$MGR1` and `$MGR2` are the metagenomic paired-end reads.
* `-e MT="/data/$MTR1 /data/$MTR2"`: Where `$MTR1` and `$MTR2` are the metatransciptiomic paired-end reads.
* `imp:latest`: The name of the container to use (<name>:<version>)
> You may use `sudo`.
### Example
```bash
$ pwd
/Users/imp/data
$ ls
MG.R1.fq MG.R2.fq MT.R1.fq MT.R2.fq
$ ls /Users/imp/build
$ docker run \
-v /Users/imp/data:/data \
-v /Users/imp/build:/output \
-e MG="/data/MG.R1.fq /data/MG.R2.fq" \
-e MT="/data/MT.R1.fq MG=/data/MT.R2.fq" \
imp:latest
$ ls /Users/imp/build
IMP.html
Preprocessing
Assembly
Mapping
Analysis
...
```
## Use a configuration file.
Many parameters can be overridden in `IMP` via a config file. You may provide using it with the `-e` flag and using the `CONFIGFILE` environment variable.
```bash
docker run \
-e CONFIGFILE="path/to/config/file.json"
... other parameters ...
```
## Build new container image
```bash
cd <IMP_DIR>/docker
docker build -t <name> .
```
Name can be abything you want. Then you can
```bash
docker run <name>
```
## Use development version of IMP
You need to *erase* imp source code in the container and mount the latest version of the source code.
```bash
$ cd $DIR
$ git clone https://git-r3lab.uni.lu/shaman.narayanasamy/IMP.git
$ cd IMP
$ git checkout dev
$ docker run \
-v $DIR/IMP:/home/imp/integrated-metaomic-pipeline \
... other parameters ...
```
Any change to the source code will be applied immediatly inside the container.
## Log in inside the container
If you want to enter the container and use `IMP` from there, you'll have to erase the docker entrypoint:
```bash
docker run \
--entrypoint /bin/bash -it \
... other parameters here ...
```
## Uploading container onto r3lab webdav
# Mount drive
sudo mount.davfs https://webdav-r3lab.uni.lu/public/R3lab/IMP /mnt/
# Copy the files as you would on a regular drive
#!/bin/bash
set -e
IMP_VERSION=$1
docker build -t docker-r3lab.uni.lu/imp/imp-deps:$IMP_VERSION -f Dockerfile-dependencies .
docker build -t docker-r3lab.uni.lu/imp/imp-tools:$IMP_VERSION -f Dockerfile-tools .
docker build -t docker-r3lab.uni.lu/imp/imp:$IMP_VERSION .
docker save docker-r3lab.uni.lu/imp/imp:$IMP_VERSION > imp-$IMP_VERSION.tar
gzip imp-$IMP_VERSION.tar
exit 0
......@@ -9,3 +9,13 @@ require(stringr)
require(xtable)
require(beanplot)
require(psych)
require(caTools)
require(fpc)
require(FNN)
require(RColorBrewer)
require(scales)
require(diptest)
require(mixtools)
require(gclus)
require(cowplot)
require(gridExtra)
#!/bin/bash
usermod -u ${LOCAL_USER_ID} imp
groupmod -g ${LOCAL_GROUP_ID} imp
exec gosu imp "$@"