Skip to content
Snippets Groups Projects
Commit 7b2ca585 authored by Aaron's avatar Aaron
Browse files

Added per base coverage (-d) to coverageBed and cleaned up the main loop.

parent 9b2eb4e6
No related branches found
No related tags found
No related merge requests found
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
#include "coverageBed.h" #include "coverageBed.h"
// build // build
BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand,
bool &writeHistogram, bool &bamInput, bool &obeySplits) { bool writeHistogram, bool bamInput, bool obeySplits, bool eachBase) {
_bedAFile = bedAFile; _bedAFile = bedAFile;
_bedBFile = bedBFile; _bedBFile = bedBFile;
...@@ -24,24 +24,15 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, ...@@ -24,24 +24,15 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand,
_forceStrand = forceStrand; _forceStrand = forceStrand;
_obeySplits = obeySplits; _obeySplits = obeySplits;
_eachBase = eachBase;
_writeHistogram = writeHistogram; _writeHistogram = writeHistogram;
_bamInput = bamInput; _bamInput = bamInput;
if (_bedA->bedFile != "stdin") { // process a file
if (_bamInput == false) { //bed/gff if (_bamInput == false)
CollectCoverageBed(); CollectCoverageBed();
} else
else { CollectCoverageBam(_bedA->bedFile);
CollectCoverageBam(_bedA->bedFile);
}
}
else { // process stdin
if (_bamInput == false)
CollectCoverageBed();
else {
CollectCoverageBam("stdin");
}
}
} }
// destroy // destroy
...@@ -144,20 +135,29 @@ void BedCoverage::ReportCoverage() { ...@@ -144,20 +135,29 @@ void BedCoverage::ReportCoverage() {
map<unsigned int, unsigned int> allDepthHist; map<unsigned int, unsigned int> allDepthHist;
unsigned int totalLength = 0; unsigned int totalLength = 0;
// process each chromosome
masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin(); masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end(); masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end();
for (; chromItr != chromEnd; ++chromItr) { for (; chromItr != chromEnd; ++chromItr)
{
// for each chrom, process each bin
binsToBedCovs::const_iterator binItr = chromItr->second.begin(); binsToBedCovs::const_iterator binItr = chromItr->second.begin();
binsToBedCovs::const_iterator binEnd = chromItr->second.end(); binsToBedCovs::const_iterator binEnd = chromItr->second.end();
for (; binItr != binEnd; ++binItr) { for (; binItr != binEnd; ++binItr)
{
// for each chrom & bin, compute and report
// the observed coverage for each feature
vector<BEDCOV>::const_iterator bedItr = binItr->second.begin(); vector<BEDCOV>::const_iterator bedItr = binItr->second.begin();
vector<BEDCOV>::const_iterator bedEnd = binItr->second.end(); vector<BEDCOV>::const_iterator bedEnd = binItr->second.end();
for (; bedItr != bedEnd; ++bedItr) { for (; bedItr != bedEnd; ++bedItr)
{
int zeroDepthCount = 0; int zeroDepthCount = 0; // number of bases with zero depth
int depth = 0; int depth = 0; // tracks the depth at the current base
// the start is either the first base in the feature OR
// the leftmost position of an overlapping feature. e.g. (s = start):
// A ----------
// B s ------------
int start = min(bedItr->minOverlapStart, bedItr->start); int start = min(bedItr->minOverlapStart, bedItr->start);
// track the numnber of bases in the feature covered by // track the numnber of bases in the feature covered by
...@@ -165,51 +165,62 @@ void BedCoverage::ReportCoverage() { ...@@ -165,51 +165,62 @@ void BedCoverage::ReportCoverage() {
map<unsigned int, unsigned int> depthHist; map<unsigned int, unsigned int> depthHist;
map<unsigned int, DEPTH>::const_iterator depthItr; map<unsigned int, DEPTH>::const_iterator depthItr;
for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) { // compute the coverage observed at each base in the feature marching from start to end.
for (CHRPOS pos = start+1; pos <= bedItr->end; pos++)
{
// map pointer grabbing the starts and ends observed at this position
depthItr = bedItr->depthMap.find(pos); depthItr = bedItr->depthMap.find(pos);
// increment coverage if starts observed at this position.
if (depthItr != bedItr->depthMap.end()) { if (depthItr != bedItr->depthMap.end())
depth += depthItr->second.starts; depth += depthItr->second.starts;
if ((pos > bedItr->start) && (pos <= bedItr->end)) { // update coverage assuming the current position is within the current B feature
if (depth == 0) zeroDepthCount++; if ((pos > bedItr->start) && (pos <= bedItr->end)) {
if (depth == 0) zeroDepthCount++;
// update our histograms, assuming we are not reporting "per-base" coverage.
if (_eachBase == false) {
depthHist[depth]++; depthHist[depth]++;
allDepthHist[depth]++; allDepthHist[depth]++;
} }
depth = depth - depthItr->second.ends; else {
} _bedB->reportBedTab(*bedItr);
else { printf("%d\t%d\n", pos-bedItr->start, depth);
if ((pos > bedItr->start) && (pos <= bedItr->end)) {
if (depth == 0) zeroDepthCount++;
depthHist[depth]++;
allDepthHist[depth]++;
} }
} }
// decrement coverage if ends observed at this position.
if (depthItr != bedItr->depthMap.end())
depth = depth - depthItr->second.ends;
} }
// Report the coverage for the current interval. // Summarize the coverage for the current interval,
CHRPOS length = bedItr->end - bedItr->start; // assuming the user has not requested "per-base" coverage.
totalLength += length; if (_eachBase == false) {
CHRPOS length = bedItr->end - bedItr->start;
int nonZeroBases = (length - zeroDepthCount); totalLength += length;
float fractCovered = (float) nonZeroBases / length; int nonZeroBases = (length - zeroDepthCount);
float fractCovered = (float) nonZeroBases / length;
if (_writeHistogram == false) {
_bedB->reportBedTab(*bedItr); // print a summary of the coverage
printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered); if (_writeHistogram == false) {
} _bedB->reportBedTab(*bedItr);
else { printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered);
map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin(); }
map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end(); // report the number of bases with coverage == x
for (; histItr != histEnd; ++histItr) { else {
float fractAtThisDepth = (float) histItr->second / length; map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin();
_bedB->reportBedTab(*bedItr); map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end();
printf("%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, length, fractAtThisDepth); for (; histItr != histEnd; ++histItr)
} {
float fractAtThisDepth = (float) histItr->second / length;
_bedB->reportBedTab(*bedItr);
printf("%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, length, fractAtThisDepth);
}
}
} }
} }
} }
} }
// report a histogram of coverage among _all_
// features in B.
if (_writeHistogram == true) { if (_writeHistogram == true) {
map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin(); map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin();
map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end(); map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end();
......
...@@ -36,8 +36,8 @@ class BedCoverage { ...@@ -36,8 +36,8 @@ class BedCoverage {
public: public:
// constructor // constructor
BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram, BedCoverage(string &bedAFile, string &bedBFile, bool forceStrand, bool writeHistogram,
bool &bamInput, bool &obeySplits); bool bamInput, bool obeySplits, bool eachBase);
// destructor // destructor
~BedCoverage(void); ~BedCoverage(void);
...@@ -62,6 +62,9 @@ private: ...@@ -62,6 +62,9 @@ private:
// should we split BED/BAM into discrete blocks? // should we split BED/BAM into discrete blocks?
bool _obeySplits; bool _obeySplits;
// should discrete coverage be reported for each base in each feature?
bool _eachBase;
// private function for reporting coverage information // private function for reporting coverage information
void ReportCoverage(); void ReportCoverage();
......
...@@ -35,6 +35,7 @@ int main(int argc, char* argv[]) { ...@@ -35,6 +35,7 @@ int main(int argc, char* argv[]) {
// parm flags // parm flags
bool forceStrand = false; bool forceStrand = false;
bool writeHistogram = false; bool writeHistogram = false;
bool eachBase = false;
bool obeySplits = false; bool obeySplits = false;
bool bamInput = false; bool bamInput = false;
bool haveBedA = false; bool haveBedA = false;
...@@ -87,6 +88,9 @@ int main(int argc, char* argv[]) { ...@@ -87,6 +88,9 @@ int main(int argc, char* argv[]) {
else if (PARAMETER_CHECK("-hist", 5, parameterLength)) { else if (PARAMETER_CHECK("-hist", 5, parameterLength)) {
writeHistogram = true; writeHistogram = true;
} }
else if(PARAMETER_CHECK("-d", 2, parameterLength)) {
eachBase = true;
}
else if (PARAMETER_CHECK("-split", 6, parameterLength)) { else if (PARAMETER_CHECK("-split", 6, parameterLength)) {
obeySplits = true; obeySplits = true;
} }
...@@ -103,7 +107,7 @@ int main(int argc, char* argv[]) { ...@@ -103,7 +107,7 @@ int main(int argc, char* argv[]) {
} }
if (!showHelp) { if (!showHelp) {
BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput, obeySplits); BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput, obeySplits, eachBase);
delete bg; delete bg;
return 0; return 0;
} }
...@@ -135,7 +139,11 @@ void ShowHelp(void) { ...@@ -135,7 +139,11 @@ void ShowHelp(void) {
cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl; cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl;
cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl; cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl;
cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl; cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl;
cerr << "\t-d\t" << "Report the depth at each position in each B feature." << endl;
cerr << "\t\tPositions reported are one based. Each position" << endl;
cerr << "\t\tand depth follow the complete B feature." << endl << endl;
cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl; cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl;
cerr << "\t\twhen computing coverage." << endl; cerr << "\t\twhen computing coverage." << endl;
cerr << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl; cerr << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment