From cdf0f8e223f57e5066f091697934bd7c8cb37430 Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Sun, 5 Feb 2012 14:41:35 -0500 Subject: [PATCH] greater control over splitting on D or N CIGAR ops. --- src/bamToBed/bamToBed.cpp | 4 ++-- src/coverageBed/coverageBed.cpp | 2 +- src/genomeCoverageBed/genomeCoverageBed.cpp | 15 +++++++++------ src/intersectBed/intersectBed.cpp | 2 +- src/utils/BlockedIntervals/BlockedIntervals.cpp | 15 ++++++++++----- src/utils/BlockedIntervals/BlockedIntervals.h | 7 ++++--- 6 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/bamToBed/bamToBed.cpp b/src/bamToBed/bamToBed.cpp index 344be862..bcf7c2b4 100644 --- a/src/bamToBed/bamToBed.cpp +++ b/src/bamToBed/bamToBed.cpp @@ -413,7 +413,7 @@ void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDista vector<BED> bedBlocks; string chrom = refs.at(bam.RefID).RefName; // extract the block starts and lengths from the CIGAR string - GetBamBlocks(bam, chrom, bedBlocks); + GetBamBlocks(bam, chrom, bedBlocks, false, true); unsigned int i; for (i = 0; i < bedBlocks.size(); ++i) { @@ -442,7 +442,7 @@ void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDist string chrom = refs.at(bam.RefID).RefName; CHRPOS alignmentEnd = bam.GetEndPosition(); // extract the block starts and lengths from the CIGAR string - GetBamBlocks(bam, chrom, bedBlocks); + GetBamBlocks(bam, chrom, bedBlocks, false, true); // write BED6 portion if (useEditDistance == false && bamTag == "") { diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp index dfd3a199..2d3bba81 100644 --- a/src/coverageBed/coverageBed.cpp +++ b/src/coverageBed/coverageBed.cpp @@ -117,7 +117,7 @@ void BedCoverage::CollectCoverageBam(string bamFile) { bedVector bedBlocks; // since we are counting coverage, we do want to split blocks when a // deletion (D) CIGAR op is encountered (hence the true for the last parm) - GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true); + GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, false, true); // use countSplitHits to avoid over-counting each split chunk // as distinct read coverage. _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly); diff --git a/src/genomeCoverageBed/genomeCoverageBed.cpp b/src/genomeCoverageBed/genomeCoverageBed.cpp index a06ac204..cdb04446 100644 --- a/src/genomeCoverageBed/genomeCoverageBed.cpp +++ b/src/genomeCoverageBed/genomeCoverageBed.cpp @@ -243,11 +243,16 @@ void BedGenomeCoverage::CoverageBam(string bamFile) { StartNewChrom(chrom); // add coverage accordingly. - if (_obeySplits) { + if (!_only_5p_end && !_only_3p_end) { bedVector bedBlocks; - // since we are counting coverage, we do want to split blocks when a - // deletion (D) CIGAR op is encountered (hence the true for the last parm) - GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true); + // we always want to split blocks when a D CIGAR op is found. + // if the user invokes -split, we want to also split on N ops. + if (_obeySplits) { // "D" true, "N" true + GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true); + } + else { // "D" true, "N" false + GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false); + } AddBlockedCoverage(bedBlocks); } else if (_only_5p_end) { @@ -258,8 +263,6 @@ void BedGenomeCoverage::CoverageBam(string bamFile) { int pos = ( bam.IsReverseStrand() ) ? start : end; AddCoverage(pos,pos); } - else - AddCoverage(start, end); } // close the BAM reader.Close(); diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp index 559eec23..1d410ae1 100644 --- a/src/intersectBed/intersectBed.cpp +++ b/src/intersectBed/intersectBed.cpp @@ -323,7 +323,7 @@ void BedIntersect::IntersectBam(string bamFile) { // break alignment into discrete blocks, bedVector bed_blocks; string chrom = refs.at(bam.RefID).RefName; - GetBamBlocks(bam, chrom, bed_blocks); + GetBamBlocks(bam, chrom, bed_blocks, false, true); // create a basic BED entry from the BAM alignment BED bed; MakeBedFromBam(bam, chrom, bed_blocks, bed); diff --git a/src/utils/BlockedIntervals/BlockedIntervals.cpp b/src/utils/BlockedIntervals/BlockedIntervals.cpp index eead4ca7..659f299b 100644 --- a/src/utils/BlockedIntervals/BlockedIntervals.cpp +++ b/src/utils/BlockedIntervals/BlockedIntervals.cpp @@ -15,7 +15,8 @@ void GetBamBlocks(const BamAlignment &bam, const string &chrom, bedVector &bedBlocks, - bool breakOnDeletionOps) + bool breakOnDeletionOps, + bool breakOnSkipOps) { vector<int> starts; vector<int> lengths; @@ -47,10 +48,14 @@ void GetBamBlocks(const BamAlignment &bam, } case ('P') : break; case ('N') : - bedBlocks.push_back( BED(chrom, currPosition, currPosition + blockLength, - bam.Name, ToString(bam.MapQuality), strand) ); - currPosition += cigItr->Length + blockLength; - blockLength = 0; + if (!breakOnSkipOps) + blockLength += cigItr->Length; + else { + bedBlocks.push_back( BED(chrom, currPosition, currPosition + blockLength, + bam.Name, ToString(bam.MapQuality), strand) ); + currPosition += cigItr->Length + blockLength; + blockLength = 0; + } case ('H') : break; // for 'H' - do nothing, move to next op default : printf("ERROR: Invalid Cigar op type\n"); // shouldn't get here diff --git a/src/utils/BlockedIntervals/BlockedIntervals.h b/src/utils/BlockedIntervals/BlockedIntervals.h index 5a3a5c2c..1f6c182e 100644 --- a/src/utils/BlockedIntervals/BlockedIntervals.h +++ b/src/utils/BlockedIntervals/BlockedIntervals.h @@ -21,9 +21,10 @@ using namespace BamTools; into discrete alignment blocks. */ void GetBamBlocks(const BamAlignment &bam, - const string &chrom, - bedVector &bedBlocks, - bool breakOnDeletionOps = false); + const string &chrom, + bedVector &bedBlocks, + bool breakOnDeletionOps, + bool breakOnSkipOps); /* break a BED12 record into discrete BED6 blocks. */ void GetBedBlocks(const BED &bed, bedVector &bedBlocks); -- GitLab