diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp index fcc94c9d46297849faf0ec6cbbfb3cb793b19985..6a18ead64370a282b54b1a27686ac1593d770dfb 100644 --- a/src/coverageBed/coverageBed.cpp +++ b/src/coverageBed/coverageBed.cpp @@ -13,8 +13,8 @@ #include "coverageBed.h" // build -BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, - bool &writeHistogram, bool &bamInput, bool &obeySplits) { +BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand, + bool writeHistogram, bool bamInput, bool obeySplits, bool eachBase) { _bedAFile = bedAFile; _bedBFile = bedBFile; @@ -24,24 +24,15 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, _forceStrand = forceStrand; _obeySplits = obeySplits; + _eachBase = eachBase; _writeHistogram = writeHistogram; _bamInput = bamInput; - if (_bedA->bedFile != "stdin") { // process a file - if (_bamInput == false) { //bed/gff - CollectCoverageBed(); - } - else { - CollectCoverageBam(_bedA->bedFile); - } - } - else { // process stdin - if (_bamInput == false) - CollectCoverageBed(); - else { - CollectCoverageBam("stdin"); - } - } + + if (_bamInput == false) + CollectCoverageBed(); + else + CollectCoverageBam(_bedA->bedFile); } // destroy @@ -144,20 +135,29 @@ void BedCoverage::ReportCoverage() { map<unsigned int, unsigned int> allDepthHist; unsigned int totalLength = 0; + // process each chromosome masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin(); masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end(); - for (; chromItr != chromEnd; ++chromItr) { - + for (; chromItr != chromEnd; ++chromItr) + { + // for each chrom, process each bin binsToBedCovs::const_iterator binItr = chromItr->second.begin(); binsToBedCovs::const_iterator binEnd = chromItr->second.end(); - for (; binItr != binEnd; ++binItr) { - + for (; binItr != binEnd; ++binItr) + { + // for each chrom & bin, compute and report + // the observed coverage for each feature vector<BEDCOV>::const_iterator bedItr = binItr->second.begin(); vector<BEDCOV>::const_iterator bedEnd = binItr->second.end(); - for (; bedItr != bedEnd; ++bedItr) { - - int zeroDepthCount = 0; - int depth = 0; + for (; bedItr != bedEnd; ++bedItr) + { + int zeroDepthCount = 0; // number of bases with zero depth + int depth = 0; // tracks the depth at the current base + + // the start is either the first base in the feature OR + // the leftmost position of an overlapping feature. e.g. (s = start): + // A ---------- + // B s ------------ int start = min(bedItr->minOverlapStart, bedItr->start); // track the numnber of bases in the feature covered by @@ -165,51 +165,62 @@ void BedCoverage::ReportCoverage() { map<unsigned int, unsigned int> depthHist; map<unsigned int, DEPTH>::const_iterator depthItr; - for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) { - + // compute the coverage observed at each base in the feature marching from start to end. + for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) + { + // map pointer grabbing the starts and ends observed at this position depthItr = bedItr->depthMap.find(pos); - - if (depthItr != bedItr->depthMap.end()) { + // increment coverage if starts observed at this position. + if (depthItr != bedItr->depthMap.end()) depth += depthItr->second.starts; - if ((pos > bedItr->start) && (pos <= bedItr->end)) { - if (depth == 0) zeroDepthCount++; + // update coverage assuming the current position is within the current B feature + if ((pos > bedItr->start) && (pos <= bedItr->end)) { + if (depth == 0) zeroDepthCount++; + // update our histograms, assuming we are not reporting "per-base" coverage. + if (_eachBase == false) { depthHist[depth]++; allDepthHist[depth]++; } - depth = depth - depthItr->second.ends; - } - else { - if ((pos > bedItr->start) && (pos <= bedItr->end)) { - if (depth == 0) zeroDepthCount++; - depthHist[depth]++; - allDepthHist[depth]++; + else { + _bedB->reportBedTab(*bedItr); + printf("%d\t%d\n", pos-bedItr->start, depth); } } + // decrement coverage if ends observed at this position. + if (depthItr != bedItr->depthMap.end()) + depth = depth - depthItr->second.ends; } - // Report the coverage for the current interval. - CHRPOS length = bedItr->end - bedItr->start; - totalLength += length; - - int nonZeroBases = (length - zeroDepthCount); - float fractCovered = (float) nonZeroBases / length; - - if (_writeHistogram == false) { - _bedB->reportBedTab(*bedItr); - printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered); - } - else { - map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin(); - map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end(); - for (; histItr != histEnd; ++histItr) { - float fractAtThisDepth = (float) histItr->second / length; - _bedB->reportBedTab(*bedItr); - printf("%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, length, fractAtThisDepth); - } + // Summarize the coverage for the current interval, + // assuming the user has not requested "per-base" coverage. + if (_eachBase == false) { + CHRPOS length = bedItr->end - bedItr->start; + totalLength += length; + int nonZeroBases = (length - zeroDepthCount); + float fractCovered = (float) nonZeroBases / length; + + // print a summary of the coverage + if (_writeHistogram == false) { + _bedB->reportBedTab(*bedItr); + printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered); + } + // report the number of bases with coverage == x + else { + map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin(); + map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end(); + for (; histItr != histEnd; ++histItr) + { + float fractAtThisDepth = (float) histItr->second / length; + _bedB->reportBedTab(*bedItr); + printf("%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, length, fractAtThisDepth); + } + } } } } } + // report a histogram of coverage among _all_ + // features in B. if (_writeHistogram == true) { map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin(); map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end(); diff --git a/src/coverageBed/coverageBed.h b/src/coverageBed/coverageBed.h index 310974f41f6b7a01b8c3fe8c606a02e22572c13a..d09aa54e51547346eaf6dd73075fae00b17bea0e 100644 --- a/src/coverageBed/coverageBed.h +++ b/src/coverageBed/coverageBed.h @@ -36,8 +36,8 @@ class BedCoverage { public: // constructor - BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram, - bool &bamInput, bool &obeySplits); + BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand, bool writeHistogram, + bool bamInput, bool obeySplits, bool eachBase); // destructor ~BedCoverage(void); @@ -62,6 +62,9 @@ private: // should we split BED/BAM into discrete blocks? bool _obeySplits; + + // should discrete coverage be reported for each base in each feature? + bool _eachBase; // private function for reporting coverage information void ReportCoverage(); diff --git a/src/coverageBed/coverageMain.cpp b/src/coverageBed/coverageMain.cpp index efc76bb2a9264cca1adff7fea00bd2fa35dbb858..7a36263290b4be46e4788982dd486855a4ff4682 100644 --- a/src/coverageBed/coverageMain.cpp +++ b/src/coverageBed/coverageMain.cpp @@ -35,6 +35,7 @@ int main(int argc, char* argv[]) { // parm flags bool forceStrand = false; bool writeHistogram = false; + bool eachBase = false; bool obeySplits = false; bool bamInput = false; bool haveBedA = false; @@ -87,6 +88,9 @@ int main(int argc, char* argv[]) { else if (PARAMETER_CHECK("-hist", 5, parameterLength)) { writeHistogram = true; } + else if(PARAMETER_CHECK("-d", 2, parameterLength)) { + eachBase = true; + } else if (PARAMETER_CHECK("-split", 6, parameterLength)) { obeySplits = true; } @@ -103,7 +107,7 @@ int main(int argc, char* argv[]) { } if (!showHelp) { - BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput, obeySplits); + BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput, obeySplits, eachBase); delete bg; return 0; } @@ -135,7 +139,11 @@ void ShowHelp(void) { cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl; cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl; cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl; - + + cerr << "\t-d\t" << "Report the depth at each position in each B feature." << endl; + cerr << "\t\tPositions reported are one based. Each position" << endl; + cerr << "\t\tand depth follow the complete B feature." << endl << endl; + cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl; cerr << "\t\twhen computing coverage." << endl; cerr << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl;