Skip to content
Snippets Groups Projects
Commit e237026c authored by Shaman Narayanasamy's avatar Shaman Narayanasamy
Browse files

Small change to fix conflict

parents 38f54af4 1df5ff1d
No related branches found
No related tags found
No related merge requests found
......@@ -21,8 +21,8 @@ __doc__ = """Integrated Metaomic Pipeline.
(____)(_/\/\_)(__)
Usage:
IMP [-m MG1 -m MG2] [-t MT1 -t MT2] -o OUTPUT [--enter] [--norm] [-r REPO] [-n CONTAINER] [-v VERSION] [-c CONFIGFILE] [-d DBPATH] [-a ASSEMBLER] [-e ENV] ... [COMMANDS ...]
IMP --init [-d DBPATH] [-n CONTAINER] [-v VERSION] [-r REPO]
IMP [-m MG1 -m MG2] [-t MT1 -t MT2] -o OUTPUT [--enter] [--norm] [--current] [-r REPO] [-n CONTAINER] [-v VERSION] [-c CONFIGFILE] [-d DBPATH] [-a ASSEMBLER] [-e ENV] ... [COMMANDS ...]
IMP --init [--current] [-d DBPATH] [-n CONTAINER] [-v VERSION] [-r REPO] [-c CONFIGFILE]
IMP (-h | --help)
IMP --version
......@@ -32,6 +32,7 @@ Options:
--init Initialize IMP databases (Take a while).
--norm Don't delete the container after use. Useful for debugging.
--ask Ask to create directory if it doesn't exist.
--current Use the current version of the IMP codebase (what you have pulled).
-c CONFIG Pass a user defined config file. Default: conf/userconfig.imp.json
-h --help Show this help and exit
-m MG Path to the metagenomics paired files (must be 2 files).
......@@ -44,8 +45,18 @@ Options:
-a ASSEMBLER Name of the assembler for MGMT. Only idba and megahit are supported.
Typical use:
# first run
./IMP --init
# simple run with default options
./IMP -m input/mg.r1 -m input/mg.r2 -t input/mt.r1 -t input/mt.r2 -o output
./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory
# use a different database path
./IMP --init -d /path/to/databases_directory
./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory -d /path/to/databases_directory
# use the IMP code you have pulled instead of the one shipped inside the container.
./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory --current
""".format(
name=IMP_IMAGE_NAME,
......@@ -114,15 +125,6 @@ def install_imp(repo):
os.remove(fname)
def get_git_version():
"""
Get the current git hash.
"""
return subprocess.check_output(
['git', '--no-pager', 'log', '-n', '1', '--pretty=format:%H']
)
def map_user(command, directory):
"""
User inside the docker container and outside the container are not the same.
......@@ -141,16 +143,23 @@ def init(args):
Must be run at least once.
"""
CURRENT_PATH = Path(__file__).parent.abspath()
version = args['-v']
container_name = args['-n']
database_path = Path(args['-d']).abspath()
docker_cmd = 'docker run --rm -v {p}:/code -v {d}:/databases {n}:{v}'.format(
p=CURRENT_PATH,
d=database_path,
n=args['-n'],
v=args['-v']
)
# prepare docker command
docker_cmd = 'docker run --rm -v {d}:/databases -e CONFIGFILE={c} {n}:{v}'
formatting_args = {
'd': database_path,
'n': args['-n'],
'v': args['-v'],
'c': args['-c']
}
# override docker command if the user want to mount a specific version of IMP codebase.
if args['--current']:
formatting_args['p'] = CURRENT_PATH
docker_cmd = 'docker run --rm -v {p}:/code -v {d}:/databases -e CONFIGFILE={c} {n}:{v}'
# format docker command
docker_cmd = docker_cmd.format(**formatting_args)
# IMP command + user mapping (see https://github.com/docker/docker/pull/12648)
cmd = docker_cmd + map_user('snakemake -s /code/rules/init', '/databases')
print("Executing", '"', cmd, '"')
subprocess.call(cmd, shell=True)
......@@ -184,12 +193,14 @@ def run(args):
# configure IMP mount point to the docker container
mount_points = [
'-v %s:/data' % common_path,
'-v %s:/code' % CURRENT_PATH,
'-v %s:/output' % output,
'-v %s:/databases' % database_path,
]
# add code mount point if the user want to mount a specific version of IMP codebase.
if args['--current']:
mount_points.append('-v %s:/code' % CURRENT_PATH)
# environement variables: add MG and MT data and config if specified
# environment variables: add MG and MT data and config if specified
envs = ['-e {}="{}"'.format(*e.split('=')) for e in args['-e']]
# prepare MG and MT data
......@@ -249,7 +260,7 @@ def validate(args):
if __name__ == '__main__':
check_installation()
args = docopt(__doc__, version=get_git_version(), options_first=True)
args = docopt(__doc__, version=IMP_VERSION, options_first=True)
check_imp_installed(args['-n'], args['-v'], args['-r'])
if args['--init']:
......
......@@ -188,6 +188,15 @@ RUN echo "`snakemake --bash-completion`" >> ~/.bashrc \
## add LD_LIBRARY_PATH
&& echo "export LD_LIBRARY_PATH=/usr/local/lib" >> ~/.bashrc
#####################
# Ship IMP codebase #
#####################
RUN mkdir /code \
&& cd /code \
&& git clone --branch 1.2.0 --single-branch https://git-r3lab.uni.lu/shaman.narayanasamy/IMP.git
######################
# runtime parameters #
######################
......
......@@ -3,7 +3,8 @@ We use a config file to pass variables to IMP engine.
The default parameters are visible in `src/config.imp.json`.
You could override some parameters via the file `conf/userconfig.imp.json`.
> Please do not override parameters directly on `src/config.imp.json` as it may be overridden with the next IMP update.
Please do not override parameters directly on `src/config.imp.json` as it may
be overridden with the next IMP update.
Eventually you could pass a different location for the config file via an environment variable
if you are using snakemake, or via the IMP wrapper script option.
......@@ -90,3 +91,88 @@ if you are using snakemake, or via the IMP wrapper script option.
},
}
=======
if you are using Snakemake, or via the IMP wrapper script `-c` option.
## General parameters
* threads: Number of max threads to use.
* memory_total_gb: Some tools need to set the max memory they could use.
* memory_per_core_gb: Some tools need to set the max memory they could use per cores.
* tmp_dir: Path to a temporary directory.
* raws - Metagenomics: Path to the metagenomics paired files.
* raws - Metatranscriptomics: Path to the metatranscriptomics paired files.
* outputdir: Path to the output directory.
* db_path: Path to the databases.
* preprocessing_filtering: If you want to filter reads from a database. Can be true or false.
* assembler: The assembler to use. Could be idba or megahit.
## Example config file
{
"threads": 8,
"output": /home/user/temp
"conf/userconfig.imp.json": false
}
IMP will take all default parameters and override those provided via this config file.
## Per tool/step parameters
### Trimmomatic
* pkg_url: Where to download the trimmomatic package to fetch the adapters databases.
* adapter: What adapter to use.
Following parameters are taken from the [Trimmomatic documentation](http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf):
* leading: Cut bases off the start of a read, if below a threshold quality.
* minlen: Specifies the minimum length of reads to be kept.
* palindrome_clip_threshold: Specifies how accurate the match between the two 'adapter ligated' reads must be for PE palindrome read alignment.
* simple_clip_threshold: Specifies how accurate the match between any adapter etc. sequence must be against a read.
* trailing: Specifies the minimum quality required to keep a base.
* seed_mismatch: specifies the maximum mismatch count which will still allow a full match to be performed.
* window_size: Specifies the number of bases to average across.
* window_quality: Specifies the average quality required.
* strictness: This value, which should be set between 0 and 1, specifies the
balance between preserving as much read length as possible vs. removal of incorrect
bases. A low value of this parameter favours longer reads, while a high value favours read correctness.
* target_length: This specifies the read length which is likely to allow the location of the read within the target sequence to be determined.
* jarfile: Path to the trimmomatic JAR file on your system. (You don't need to set it if you are using the docker container.)
### idba_ud
* mink: Minimum k value.
* maxk: Maximum k value.
* step: Increment of k-mer of each iteration.
* perid: Similarity for alignment.
### vizbin
* dimension: 50,
* kmer: 5,
* size: 4,
* theta: 0.5,
* perp: 30,
* cutoff: 1000
* jarfile: Path to the Vizbin JAR file on your system. (You don't need to set it if you are using the docker container.)
### human_filtering
* filter: Name of the filter.
* url: URL to download database.
### sortmerna
* pkg_url: Url to download sormerna databases from
* files: Databases to use and index.
### prokka
* pkg_url: Url to download prokka databases from
* databases: List of databases to use.
### kegg
* db_ec2pthy and db_hierarchy: Url to downladod KEgg information from.
>>>>>>> 1df5ff1d0568f3aaa91dd3dea53188cb854dc1e1
......@@ -121,7 +121,6 @@ rule ANALYSIS_MG_CALL_VARIANT:
"%s/MGMT.assembly.merged.fa" % A_OUT,
"%s/MG.reads.sorted.bam" % A_OUT,
output:
#"%s/MG.variants.isec.vcf.gz" % AN_OUT,
"%s/MG.variants.samtools.vcf.gz" % AN_OUT,
"%s/MG.variants.freebayes.vcf.gz" % AN_OUT,
"%s/MG.variants.platypus.vcf.gz" % AN_OUT
......@@ -147,32 +146,32 @@ rule ANALYSIS_MG_CALL_VARIANT:
VCF_FRB=$(mktemp --tmpdir={TMPDIR} -t "XXXXXX.frb.vcf")
VCF_PLT=$(mktemp --tmpdir={TMPDIR} -t "XXXXXX.plt.vcf")
### run_mpileup {input[0]} {input[1]} {output[1]}
### run_mpileup {input[0]} {input[1]} {output[0]}
echo "Running samtools mpileup"
samtools mpileup -uf {input[0]} {input[1]} |\
bcftools view -vcg - |\
vcf-convert -r {input[0]} -v 4.2 > $VCF_MPU
bgzip -c $VCF_MPU > {output[1]}
tabix -f -p vcf {output[1]}
bgzip -c $VCF_MPU > {output[0]}
tabix -f -p vcf {output[0]}
### run_freebayes {input[0]} {input[1]} {output[2]}
### run_freebayes {input[0]} {input[1]} {output[1]}
echo "Running freebayes"
freebayes -f {input[0]} {input[1]} |\
vcf-convert -r {input[0]} -v 4.2 > $VCF_FRB
bgzip -c $VCF_FRB > {output[2]}
tabix -f -p vcf {output[2]}
bgzip -c $VCF_FRB > {output[1]}
tabix -f -p vcf {output[1]}
### run_platypus {input[0]} {input[1]} {output[3]}
### run_platypus {input[0]} {input[1]} {output[2]}
echo "Running platypus"
Platypus.py callVariants --refFile={input[0]} \
--bamFiles={input[1]} --nCPU={THREADS} -o $VCF_PLT
bgzip -c $VCF_PLT > {output[3]}
tabix -f -p vcf {output[3]}
bgzip -c $VCF_PLT > {output[2]}
tabix -f -p vcf {output[2]}
#### "Merging outputs from all the callers"
### Must remove colons from the contig names in upstream steps. Unable to merge the variants
### due to this problem
#vcf-isec -f -a -n +2 {output[1]} {output[2]} > {AN_OUT}/MG.variants.isec.vcf
#vcf-isec -f -a -n +2 {output[0]} {output[1]} > {AN_OUT}/MG.variants.isec.vcf
## Compress and index the output.
#bgzip -c {AN_OUT}/MG.variants.isec.vcf > {output[0]}
......@@ -193,7 +192,6 @@ rule ANALYSIS_MT_CALL_VARIANT:
"%s/MGMT.assembly.merged.fa" % A_OUT,
"%s/MT.reads.sorted.bam" % A_OUT,
output:
#"%s/MT.variants.isec.vcf.gz" % AN_OUT,
"%s/MT.variants.samtools.vcf.gz" % AN_OUT,
"%s/MT.variants.freebayes.vcf.gz" % AN_OUT,
"%s/MT.variants.platypus.vcf.gz" % AN_OUT
......@@ -219,32 +217,32 @@ rule ANALYSIS_MT_CALL_VARIANT:
VCF_FRB=$(mktemp --tmpdir={TMPDIR} -t "XXXXXX.frb.vcf")
VCF_PLT=$(mktemp --tmpdir={TMPDIR} -t "XXXXXX.plt.vcf")
### run_mpileup {input[0]} {input[1]} {output[1]}
### run_mpileup {input[0]} {input[1]} {output[0]}
echo "Running samtools mpileup"
samtools mpileup -uf {input[0]} {input[1]} |\
bcftools view -vcg - |\
vcf-convert -r {input[0]} -v 4.2 > $VCF_MPU
bgzip -c $VCF_MPU > {output[1]}
tabix -f -p vcf {output[1]}
bgzip -c $VCF_MPU > {output[0]}
tabix -f -p vcf {output[0]}
### run_freebayes {input[0]} {input[1]} {output[2]}
### run_freebayes {input[0]} {input[1]} {output[1]}
echo "Running freebayes"
freebayes -f {input[0]} {input[1]} |\
vcf-convert -r {input[0]} -v 4.2 > $VCF_FRB
bgzip -c $VCF_FRB > {output[2]}
tabix -f -p vcf {output[2]}
bgzip -c $VCF_FRB > {output[1]}
tabix -f -p vcf {output[1]}
### run_platypus {input[0]} {input[1]} {output[3]}
### run_platypus {input[0]} {input[1]} {output[2]}
echo "Running platypus"
Platypus.py callVariants --refFile={input[0]} \
--bamFiles={input[1]} --nCPU={THREADS} -o $VCF_PLT
bgzip -c $VCF_PLT > {output[3]}
tabix -f -p vcf {output[3]}
bgzip -c $VCF_PLT > {output[2]}
tabix -f -p vcf {output[2]}
### "Merging outputs from all the callers"
## Must remove colons from the contig names in upstream steps. Unable to merge the variants
## due to this problem
#vcf-isec -f -a -n +2 {output[1]} {output[2]} > {AN_OUT}/MT.variants.isec.vcf
#vcf-isec -f -a -n +2 {output[0]} {output[1]} > {AN_OUT}/MT.variants.isec.vcf
## Compress and index the output.
#bgzip -c {AN_OUT}/MT.variants.isec.vcf > {output[0]}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment