From cdf0f8e223f57e5066f091697934bd7c8cb37430 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Sun, 5 Feb 2012 14:41:35 -0500
Subject: [PATCH] greater control over splitting on D or N CIGAR ops.

---
 src/bamToBed/bamToBed.cpp                       |  4 ++--
 src/coverageBed/coverageBed.cpp                 |  2 +-
 src/genomeCoverageBed/genomeCoverageBed.cpp     | 15 +++++++++------
 src/intersectBed/intersectBed.cpp               |  2 +-
 src/utils/BlockedIntervals/BlockedIntervals.cpp | 15 ++++++++++-----
 src/utils/BlockedIntervals/BlockedIntervals.h   |  7 ++++---
 6 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/src/bamToBed/bamToBed.cpp b/src/bamToBed/bamToBed.cpp
index 344be862..bcf7c2b4 100644
--- a/src/bamToBed/bamToBed.cpp
+++ b/src/bamToBed/bamToBed.cpp
@@ -413,7 +413,7 @@ void PrintBed(const BamAlignment &bam,  const RefVector &refs, bool useEditDista
         vector<BED> bedBlocks;
         string chrom = refs.at(bam.RefID).RefName;
         // extract the block starts and lengths from the CIGAR string
-        GetBamBlocks(bam, chrom, bedBlocks);
+        GetBamBlocks(bam, chrom, bedBlocks, false, true);
 
         unsigned int i;
         for (i = 0; i < bedBlocks.size(); ++i) {
@@ -442,7 +442,7 @@ void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDist
     string chrom = refs.at(bam.RefID).RefName;
     CHRPOS alignmentEnd = bam.GetEndPosition();
     // extract the block starts and lengths from the CIGAR string
-    GetBamBlocks(bam, chrom, bedBlocks);
+    GetBamBlocks(bam, chrom, bedBlocks, false, true);
 
     // write BED6 portion
     if (useEditDistance == false && bamTag == "") {
diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp
index dfd3a199..2d3bba81 100644
--- a/src/coverageBed/coverageBed.cpp
+++ b/src/coverageBed/coverageBed.cpp
@@ -117,7 +117,7 @@ void BedCoverage::CollectCoverageBam(string bamFile) {
                 bedVector bedBlocks;
                 // since we are counting coverage, we do want to split blocks when a
                 // deletion (D) CIGAR op is encountered (hence the true for the last parm)
-                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true);
+                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, false, true);
                 // use countSplitHits to avoid over-counting each split chunk
                 // as distinct read coverage.
                 _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
diff --git a/src/genomeCoverageBed/genomeCoverageBed.cpp b/src/genomeCoverageBed/genomeCoverageBed.cpp
index a06ac204..cdb04446 100644
--- a/src/genomeCoverageBed/genomeCoverageBed.cpp
+++ b/src/genomeCoverageBed/genomeCoverageBed.cpp
@@ -243,11 +243,16 @@ void BedGenomeCoverage::CoverageBam(string bamFile) {
             StartNewChrom(chrom);
 
         // add coverage accordingly.
-        if (_obeySplits) {
+        if (!_only_5p_end && !_only_3p_end) {
             bedVector bedBlocks;
-            // since we are counting coverage, we do want to split blocks when a
-            // deletion (D) CIGAR op is encountered (hence the true for the last parm)
-            GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true);
+            // we always want to split blocks when a D CIGAR op is found.
+            // if the user invokes -split, we want to also split on N ops.
+            if (_obeySplits) { // "D" true, "N" true
+                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true);
+            }
+            else { // "D" true, "N" false
+                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);
+            }
             AddBlockedCoverage(bedBlocks);
         }
         else if (_only_5p_end) {
@@ -258,8 +263,6 @@ void BedGenomeCoverage::CoverageBam(string bamFile) {
             int pos = ( bam.IsReverseStrand() ) ? start : end;
             AddCoverage(pos,pos);
         }
-        else
-            AddCoverage(start, end);
     }
     // close the BAM
     reader.Close();
diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp
index 559eec23..1d410ae1 100644
--- a/src/intersectBed/intersectBed.cpp
+++ b/src/intersectBed/intersectBed.cpp
@@ -323,7 +323,7 @@ void BedIntersect::IntersectBam(string bamFile) {
         // break alignment into discrete blocks,
         bedVector bed_blocks;
         string chrom = refs.at(bam.RefID).RefName;
-        GetBamBlocks(bam, chrom, bed_blocks);
+        GetBamBlocks(bam, chrom, bed_blocks, false, true);
         // create a basic BED entry from the BAM alignment
         BED bed;
         MakeBedFromBam(bam, chrom, bed_blocks, bed);
diff --git a/src/utils/BlockedIntervals/BlockedIntervals.cpp b/src/utils/BlockedIntervals/BlockedIntervals.cpp
index eead4ca7..659f299b 100644
--- a/src/utils/BlockedIntervals/BlockedIntervals.cpp
+++ b/src/utils/BlockedIntervals/BlockedIntervals.cpp
@@ -15,7 +15,8 @@
 void GetBamBlocks(const BamAlignment &bam,
                   const string &chrom,
                   bedVector &bedBlocks,
-                  bool breakOnDeletionOps)
+                  bool breakOnDeletionOps,
+                  bool breakOnSkipOps)
 {
     vector<int> starts;
     vector<int> lengths;
@@ -47,10 +48,14 @@ void GetBamBlocks(const BamAlignment &bam,
                 }
             case ('P') : break;
             case ('N') :
-                bedBlocks.push_back( BED(chrom, currPosition, currPosition + blockLength,
-                                      bam.Name, ToString(bam.MapQuality), strand) );
-                currPosition += cigItr->Length + blockLength;
-                blockLength = 0;
+                if (!breakOnSkipOps)
+                    blockLength += cigItr->Length;
+                else {
+                    bedBlocks.push_back( BED(chrom, currPosition, currPosition + blockLength,
+                                          bam.Name, ToString(bam.MapQuality), strand) );
+                    currPosition += cigItr->Length + blockLength;
+                    blockLength = 0;
+                }
             case ('H') : break;                             // for 'H' - do nothing, move to next op
             default    :
                 printf("ERROR: Invalid Cigar op type\n");   // shouldn't get here
diff --git a/src/utils/BlockedIntervals/BlockedIntervals.h b/src/utils/BlockedIntervals/BlockedIntervals.h
index 5a3a5c2c..1f6c182e 100644
--- a/src/utils/BlockedIntervals/BlockedIntervals.h
+++ b/src/utils/BlockedIntervals/BlockedIntervals.h
@@ -21,9 +21,10 @@ using namespace BamTools;
     into discrete alignment blocks.
 */
 void GetBamBlocks(const BamAlignment &bam,
-                  const string &chrom,
-                  bedVector &bedBlocks,
-                  bool breakOnDeletionOps = false);
+                      const string &chrom,
+                      bedVector &bedBlocks,
+                      bool breakOnDeletionOps,
+                      bool breakOnSkipOps);
 
 /* break a BED12 record into discrete BED6 blocks. */
 void GetBedBlocks(const BED &bed, bedVector &bedBlocks);
-- 
GitLab