Small change to fix conflict

e237026c · Shaman Narayanasamy · 38f54af4 · 1df5ff1d · e237026c · e237026c
Commit e237026c authored 9 years ago by Shaman Narayanasamy
--- a/IMP
+++ b/IMP
@@ -21,8 +21,8 @@ __doc__ = """Integrated Metaomic Pipeline.
 (____)(_/\/\_)(__)

 Usage:
-  IMP [-m MG1 -m MG2] [-t MT1 -t MT2] -o OUTPUT [--enter] [--norm] [-r REPO] [-n CONTAINER] [-v VERSION] [-c CONFIGFILE] [-d DBPATH] [-a ASSEMBLER] [-e ENV] ... [COMMANDS ...]
-  IMP --init [-d DBPATH] [-n CONTAINER] [-v VERSION] [-r REPO]
+  IMP [-m MG1 -m MG2] [-t MT1 -t MT2] -o OUTPUT [--enter] [--norm] [--current] [-r REPO] [-n CONTAINER] [-v VERSION] [-c CONFIGFILE] [-d DBPATH] [-a ASSEMBLER] [-e ENV] ... [COMMANDS ...]
+  IMP --init [--current] [-d DBPATH] [-n CONTAINER] [-v VERSION] [-r REPO] [-c CONFIGFILE]
  IMP (-h | --help)
  IMP --version

@@ -32,6 +32,7 @@ Options:
  --init            Initialize IMP databases (Take a while).
  --norm            Don't delete the container after use. Useful for debugging.
  --ask             Ask to create directory if it doesn't exist.
+  --current         Use the current version of the IMP codebase (what you have pulled).
  -c CONFIG         Pass a user defined config file. Default: conf/userconfig.imp.json
  -h --help         Show this help and exit
  -m MG             Path to the metagenomics paired files (must be 2 files).
@@ -44,8 +45,18 @@ Options:
  -a ASSEMBLER      Name of the assembler for MGMT. Only idba and megahit are supported.

 Typical use:
+    # first run
+    ./IMP --init
+
    # simple run with default options
-    ./IMP -m input/mg.r1 -m input/mg.r2 -t input/mt.r1 -t input/mt.r2 -o output
+    ./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory
+
+    # use a different database path
+    ./IMP --init -d /path/to/databases_directory
+    ./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory -d /path/to/databases_directory
+
+    # use the IMP code you have pulled instead of the one shipped inside the container.
+    ./IMP -m input/mg.r1.fq -m input/mg.r2.fq -t input/mt.r1.fq -t input/mt.r2.fq -o output_directory --current

 """.format(
    name=IMP_IMAGE_NAME,
@@ -114,15 +125,6 @@ def install_imp(repo):
    os.remove(fname)


-def get_git_version():
-    """
-    Get the current git hash.
-    """
-    return subprocess.check_output(
-        ['git', '--no-pager', 'log', '-n', '1', '--pretty=format:%H']
-    )
-
-
 def map_user(command, directory):
    """
    User inside the docker container and outside the container are not the same.
@@ -141,16 +143,23 @@ def init(args):
    Must be run at least once.
    """
    CURRENT_PATH = Path(__file__).parent.abspath()
-    version = args['-v']
-    container_name = args['-n']
    database_path = Path(args['-d']).abspath()
-
-    docker_cmd = 'docker run --rm -v {p}:/code -v {d}:/databases {n}:{v}'.format(
-        p=CURRENT_PATH,
-        d=database_path,
-        n=args['-n'],
-        v=args['-v']
-    )
+    # prepare docker command
+    docker_cmd = 'docker run --rm -v {d}:/databases -e CONFIGFILE={c} {n}:{v}'
+    formatting_args = {
+        'd': database_path,
+        'n': args['-n'],
+        'v': args['-v'],
+        'c': args['-c']
+    }
+    # override docker command if the user want to mount a specific version of IMP codebase.
+    if args['--current']:
+        formatting_args['p'] = CURRENT_PATH
+        docker_cmd = 'docker run --rm -v {p}:/code -v {d}:/databases -e CONFIGFILE={c} {n}:{v}'
+
+    # format docker command
+    docker_cmd = docker_cmd.format(**formatting_args)
+    # IMP command + user mapping (see https://github.com/docker/docker/pull/12648)
    cmd = docker_cmd + map_user('snakemake -s /code/rules/init', '/databases')
    print("Executing", '"', cmd, '"')
    subprocess.call(cmd, shell=True)
@@ -184,12 +193,14 @@ def run(args):
    # configure IMP mount point to the docker container
    mount_points = [
        '-v %s:/data' % common_path,
-        '-v %s:/code' % CURRENT_PATH,
        '-v %s:/output' % output,
        '-v %s:/databases' % database_path,
    ]
+    # add code mount point if the user want to mount a specific version of IMP codebase.
+    if args['--current']:
+        mount_points.append('-v %s:/code' % CURRENT_PATH)

-    # environement variables: add MG and MT data and config if specified
+    # environment variables: add MG and MT data and config if specified
    envs = ['-e {}="{}"'.format(*e.split('=')) for e in args['-e']]

    # prepare MG and MT data
@@ -249,7 +260,7 @@ def validate(args):

 if __name__ == '__main__':
    check_installation()
-    args = docopt(__doc__, version=get_git_version(), options_first=True)
+    args = docopt(__doc__, version=IMP_VERSION, options_first=True)
    check_imp_installed(args['-n'], args['-v'], args['-r'])

    if args['--init']:

--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -188,6 +188,15 @@ RUN echo "`snakemake --bash-completion`" >> ~/.bashrc \
 ## add LD_LIBRARY_PATH
    && echo "export LD_LIBRARY_PATH=/usr/local/lib" >> ~/.bashrc

+#####################
+# Ship IMP codebase #
+#####################
+
+RUN mkdir /code \
+    && cd /code \
+    && git clone --branch 1.2.0 --single-branch https://git-r3lab.uni.lu/shaman.narayanasamy/IMP.git
+
+
 ######################
 # runtime parameters #
 ######################

--- a/docs/PARAMETERS.md
+++ b/docs/PARAMETERS.md
@@ -3,7 +3,8 @@ We use a config file to pass variables to IMP engine.
 The default parameters are visible in `src/config.imp.json`.
 You could override some parameters via the file `conf/userconfig.imp.json`.

-> Please do not override parameters directly on `src/config.imp.json` as it may be overridden with the next IMP update.
+Please do not override parameters directly on `src/config.imp.json` as it may 
+be overridden with the next IMP update.

 Eventually you could pass a different location for the config file via an environment variable
 if you are using snakemake, or via the IMP wrapper script option.
@@ -90,3 +91,88 @@ if you are using snakemake, or via the IMP wrapper script option.
    },

 }
+=======
+if you are using Snakemake, or via the IMP wrapper script `-c` option.
+
+
+## General parameters
+
+* threads: Number of max threads to use.
+* memory_total_gb: Some tools need to set the max memory they could use.
+* memory_per_core_gb: Some tools need to set the max memory they could use per cores.
+* tmp_dir: Path to a temporary directory.
+* raws - Metagenomics: Path to the metagenomics paired files.
+* raws - Metatranscriptomics: Path to the metatranscriptomics paired files.
+* outputdir: Path to the output directory.
+* db_path: Path to the databases.
+* preprocessing_filtering: If you want to filter reads from a database. Can be true or false.
+* assembler: The assembler to use. Could be idba or megahit.
+
+
+## Example config file
+
+    {
+      "threads": 8,
+      "output": /home/user/temp
+      "conf/userconfig.imp.json": false
+    }
+
+IMP will take all default parameters and override those provided via this config file.
+
+
+## Per tool/step parameters
+
+
+### Trimmomatic
+
+* pkg_url: Where to download the trimmomatic package to fetch the adapters databases.
+* adapter: What adapter to use.
+
+Following parameters are taken from the [Trimmomatic documentation](http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf):
+* leading: Cut bases off the start of a read, if below a threshold quality.
+* minlen: Specifies the minimum length of reads to be kept.
+* palindrome_clip_threshold: Specifies how accurate the match between the two 'adapter ligated' reads must be for PE palindrome read alignment.
+* simple_clip_threshold: Specifies how accurate the match between any adapter etc. sequence must be against a read.
+* trailing: Specifies the minimum quality required to keep a base.
+* seed_mismatch: specifies the maximum mismatch count which will still allow a full match to be performed.
+* window_size: Specifies the number of bases to average across.
+* window_quality: Specifies the average quality required.
+* strictness: This value, which should be set between 0 and 1, specifies the
+balance between preserving as much read length as possible vs. removal of incorrect
+bases. A low value of this parameter favours longer reads, while a high value favours read correctness.
+* target_length: This specifies the read length which is likely to allow the location of the read within the target sequence to be determined.
+* jarfile: Path to the trimmomatic JAR file on your system. (You don't need to set it if you are using the docker container.)
+
+
+### idba_ud
+* mink: Minimum k value.
+* maxk: Maximum k value.
+* step: Increment of k-mer of each iteration.
+* perid: Similarity for alignment.
+
+### vizbin
+* dimension: 50,
+* kmer: 5,
+* size: 4,
+* theta: 0.5,
+* perp: 30,
+* cutoff: 1000
+* jarfile: Path to the Vizbin JAR file on your system. (You don't need to set it if you are using the docker container.)
+
+
+### human_filtering
+* filter: Name of the filter.
+* url: URL to download database.
+
+### sortmerna
+* pkg_url: Url to download sormerna databases from
+* files: Databases to use and index.
+
+### prokka
+* pkg_url: Url to download prokka databases from
+* databases: List of databases to use.
+
+
+### kegg
+* db_ec2pthy and  db_hierarchy: Url to downladod KEgg information from.
+>>>>>>> 1df5ff1d0568f3aaa91dd3dea53188cb854dc1e1
--- a/rules/Analysis/MGMT.rules
+++ b/rules/Analysis/MGMT.rules
@@ -121,7 +121,6 @@ rule ANALYSIS_MG_CALL_VARIANT:
        "%s/MGMT.assembly.merged.fa" % A_OUT,
        "%s/MG.reads.sorted.bam" % A_OUT,
    output:
-        #"%s/MG.variants.isec.vcf.gz" % AN_OUT,
        "%s/MG.variants.samtools.vcf.gz" % AN_OUT,
        "%s/MG.variants.freebayes.vcf.gz" % AN_OUT,
        "%s/MG.variants.platypus.vcf.gz" % AN_OUT
@@ -147,32 +146,32 @@ rule ANALYSIS_MG_CALL_VARIANT:
        VCF_FRB=$(mktemp --tmpdir={TMPDIR} -t "XXXXXX.frb.vcf")
        VCF_PLT=$(mktemp --tmpdir={TMPDIR} -t "XXXXXX.plt.vcf")

-        ### run_mpileup {input[0]} {input[1]} {output[1]}
+        ### run_mpileup {input[0]} {input[1]} {output[0]}
        echo "Running samtools mpileup"
        samtools mpileup -uf {input[0]} {input[1]} |\
        bcftools view -vcg - |\
        vcf-convert -r {input[0]} -v 4.2 > $VCF_MPU
-        bgzip -c $VCF_MPU > {output[1]}
-        tabix -f -p vcf {output[1]}
+        bgzip -c $VCF_MPU > {output[0]}
+        tabix -f -p vcf {output[0]}

-        ### run_freebayes {input[0]} {input[1]} {output[2]}
+        ### run_freebayes {input[0]} {input[1]} {output[1]}
        echo "Running freebayes"
        freebayes -f {input[0]} {input[1]} |\
        vcf-convert -r {input[0]} -v 4.2 > $VCF_FRB
-        bgzip -c $VCF_FRB > {output[2]}
-        tabix -f -p vcf {output[2]}
+        bgzip -c $VCF_FRB > {output[1]}
+        tabix -f -p vcf {output[1]}

-        ### run_platypus {input[0]} {input[1]} {output[3]}
+        ### run_platypus {input[0]} {input[1]} {output[2]}
        echo "Running platypus"
        Platypus.py callVariants --refFile={input[0]} \
        --bamFiles={input[1]} --nCPU={THREADS} -o $VCF_PLT
-        bgzip -c $VCF_PLT > {output[3]}
-        tabix -f -p vcf {output[3]}
+        bgzip -c $VCF_PLT > {output[2]}
+        tabix -f -p vcf {output[2]}

        #### "Merging outputs from all the callers"
        ### Must remove colons from the contig names in upstream steps. Unable to merge the variants
        ### due to this problem
-        #vcf-isec -f -a -n +2 {output[1]} {output[2]} > {AN_OUT}/MG.variants.isec.vcf
+        #vcf-isec -f -a -n +2 {output[0]} {output[1]} > {AN_OUT}/MG.variants.isec.vcf

        ## Compress and index the output.
        #bgzip -c {AN_OUT}/MG.variants.isec.vcf > {output[0]}
@@ -193,7 +192,6 @@ rule ANALYSIS_MT_CALL_VARIANT:
        "%s/MGMT.assembly.merged.fa" % A_OUT,
        "%s/MT.reads.sorted.bam" % A_OUT,
    output:
-        #"%s/MT.variants.isec.vcf.gz" % AN_OUT,
        "%s/MT.variants.samtools.vcf.gz" % AN_OUT,
        "%s/MT.variants.freebayes.vcf.gz" % AN_OUT,
        "%s/MT.variants.platypus.vcf.gz" % AN_OUT
@@ -219,32 +217,32 @@ rule ANALYSIS_MT_CALL_VARIANT:
        VCF_FRB=$(mktemp --tmpdir={TMPDIR} -t "XXXXXX.frb.vcf")
        VCF_PLT=$(mktemp --tmpdir={TMPDIR} -t "XXXXXX.plt.vcf")

-        ### run_mpileup {input[0]} {input[1]} {output[1]}
+        ### run_mpileup {input[0]} {input[1]} {output[0]}
        echo "Running samtools mpileup"
        samtools mpileup -uf {input[0]} {input[1]} |\
        bcftools view -vcg - |\
        vcf-convert -r {input[0]} -v 4.2 > $VCF_MPU
-        bgzip -c $VCF_MPU > {output[1]}
-        tabix -f -p vcf {output[1]}
+        bgzip -c $VCF_MPU > {output[0]}
+        tabix -f -p vcf {output[0]}

-        ### run_freebayes {input[0]} {input[1]} {output[2]}
+        ### run_freebayes {input[0]} {input[1]} {output[1]}
        echo "Running freebayes"
        freebayes -f {input[0]} {input[1]} |\
        vcf-convert -r {input[0]} -v 4.2 > $VCF_FRB
-        bgzip -c $VCF_FRB > {output[2]}
-        tabix -f -p vcf {output[2]}
+        bgzip -c $VCF_FRB > {output[1]}
+        tabix -f -p vcf {output[1]}

-        ### run_platypus {input[0]} {input[1]} {output[3]}
+        ### run_platypus {input[0]} {input[1]} {output[2]}
        echo "Running platypus"
        Platypus.py callVariants --refFile={input[0]} \
        --bamFiles={input[1]} --nCPU={THREADS} -o $VCF_PLT
-        bgzip -c $VCF_PLT > {output[3]}
-        tabix -f -p vcf {output[3]}
+        bgzip -c $VCF_PLT > {output[2]}
+        tabix -f -p vcf {output[2]}

        ### "Merging outputs from all the callers"
        ## Must remove colons from the contig names in upstream steps. Unable to merge the variants
        ## due to this problem
-        #vcf-isec -f -a -n +2 {output[1]} {output[2]} > {AN_OUT}/MT.variants.isec.vcf
+        #vcf-isec -f -a -n +2 {output[0]} {output[1]} > {AN_OUT}/MT.variants.isec.vcf

        ## Compress and index the output.
        #bgzip -c {AN_OUT}/MT.variants.isec.vcf > {output[0]}