From 0da6473fee87a82fc10170b5f942bf37ac574c25 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Mon, 22 Mar 2010 21:25:16 -0400
Subject: [PATCH] Added native BAM support to coverageBed.

---
 src/coverageBed/coverageBed.cpp  | 83 ++++++++++++++++++++++++--------
 src/coverageBed/coverageBed.h    | 19 +++++++-
 src/coverageBed/coverageMain.cpp | 39 ++++++++++-----
 3 files changed, 107 insertions(+), 34 deletions(-)

diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp
index 7d93de37..d07e7261 100755
--- a/src/coverageBed/coverageBed.cpp
+++ b/src/coverageBed/coverageBed.cpp
@@ -12,8 +12,8 @@
 #include "lineFileUtilities.h"
 #include "coverageBed.h"
 
-
-BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram) {
+// build
+BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram, bool &bamInput) {
 	
 	this->bedAFile       = bedAFile;
 	this->bedBFile       = bedBFile;
@@ -23,16 +23,17 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand,
 	
 	this->forceStrand    = forceStrand;
 	this->writeHistogram = writeHistogram;
+	this->bamInput       = bamInput;
 }
 
-
-
+// destroy
 BedCoverage::~BedCoverage(void) {
+	delete this->bedA;
+	delete this->bedB;
 }
 
 
- 
-void BedCoverage::CollectCoverage(istream &bedInput) {
+void BedCoverage::CollectCoverageBed(istream &bedInput) {
 	
 	// load the "B" bed file into a map so
 	// that we can easily compare "A" to it for overlaps
@@ -63,6 +64,44 @@ void BedCoverage::CollectCoverage(istream &bedInput) {
 }
 
 
+void BedCoverage::CollectCoverageBam(string bamFile) {
+
+	// load the "B" bed file into a map so
+	// that we can easily compare "A" to it for overlaps
+	bedB->loadBedFileIntoMap();
+	
+	// open the BAM file
+	BamReader reader;
+	reader.Open(bamFile);
+
+	// get header & reference information
+	string header = reader.GetHeaderText();
+	RefVector refs = reader.GetReferenceData();
+
+	// convert each aligned BAM entry to BED 
+	// and compute coverage on B
+	BamAlignment bam;	
+	while (reader.GetNextAlignment(bam)) {
+		
+		if (bam.IsMapped()) {
+			
+			// construct a new BED entry from the current BAM alignment.	
+			BED a;
+			a.chrom  = refs.at(bam.RefID).RefName;
+			a.start  = bam.Position;
+			a.end    = bam.Position + bam.AlignedBases.size();
+			a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; 	
+
+			bedB->countHits(a, this->forceStrand);
+		}
+	}
+	// report the coverage (summary or histogram) for BED B.
+	ReportCoverage();
+	// close the BAM file	
+	reader.Close();
+}
+
+
 void BedCoverage::ReportCoverage() {
 
 	map<unsigned int, unsigned int> allDepthHist;
@@ -81,8 +120,8 @@ void BedCoverage::ReportCoverage() {
 			for (; bedItr != bedEnd; ++bedItr) {
 									
 				int zeroDepthCount = 0;
-				int depth = 0;
-				int start = min(bedItr->minOverlapStart, bedItr->start);
+				int depth          = 0;
+				int start          = min(bedItr->minOverlapStart, bedItr->start);
 				
 				// track the numnber of bases in the feature covered by
 				// 0, 1, 2, ... n features in A
@@ -112,11 +151,11 @@ void BedCoverage::ReportCoverage() {
 				}
 
 				// Report the coverage for the current interval.
-				int length = bedItr->end - bedItr->start;
+				int length   = bedItr->end - bedItr->start;
 				totalLength += length;
 				
-				int nonZeroBases =  (length-zeroDepthCount);
-				float fractCovered = (float) nonZeroBases /length;
+				int nonZeroBases   = (length - zeroDepthCount);
+				float fractCovered = (float) nonZeroBases / length;
 				
 				if (this->writeHistogram == false) {
 					bedB->reportBedTab(*bedItr);
@@ -147,16 +186,22 @@ void BedCoverage::ReportCoverage() {
 
 void BedCoverage::DetermineBedInput() {
 	if (bedA->bedFile != "stdin") {   // process a file
-		ifstream beds(bedA->bedFile.c_str(), ios::in);
-		if ( !beds ) {
-			cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl;
-			exit (1);
+		if (this->bamInput == false) { //bed/gff
+			ifstream beds(bedA->bedFile.c_str(), ios::in);
+			if ( !beds ) {
+				cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl;
+				exit (1);
+			}
+			CollectCoverageBed(beds);
 		}
-		CollectCoverage(beds);
+		else 
+			CollectCoverageBam(bedA->bedFile);
 	}
-	else {   						// process stdin
-		CollectCoverage(cin);		
+	else {   // process stdin
+		if (this->bamInput == false) 
+			CollectCoverageBed(cin);
+		else 
+			CollectCoverageBam("stdin");	
 	}
 }
 
-
diff --git a/src/coverageBed/coverageBed.h b/src/coverageBed/coverageBed.h
index 3a3cd37b..a4aa2fc6 100755
--- a/src/coverageBed/coverageBed.h
+++ b/src/coverageBed/coverageBed.h
@@ -13,6 +13,11 @@
 #define COVERAGEBED_H
 
 #include "bedFile.h"
+
+#include "BamReader.h"
+#include "BamAux.h"
+using namespace BamTools;
+
 #include <vector>
 #include <algorithm>
 #include <iostream>
@@ -30,26 +35,36 @@ class BedCoverage {
 public:
 
 	// constructor 
-	BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram);
+	BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram, bool &bamInput);
 
 	// destructor
 	~BedCoverage(void);
 	
-	void CollectCoverage(istream &bedInput);
+	void CollectCoverageBed(istream &bedInput);
+
+	void CollectCoverageBam(string bamFile);
 	
 	void DetermineBedInput();
 	
 private:
 
+	// input files.
 	string bedAFile;
 	string bedBFile;
 
 	// instance of a bed file class.
 	BedFile *bedA, *bedB;
 	
+	// do we care about strandedness when counting coverage?
 	bool forceStrand;
+	
+	// should we write a histogram for each feature in B?
 	bool writeHistogram;
 	
+	// are we dealing with BAM input for "A"?
+	bool bamInput;
+	
+	// private function for reporting coverage information
 	void ReportCoverage();
 };
 #endif /* COVERAGEBED_H */
diff --git a/src/coverageBed/coverageMain.cpp b/src/coverageBed/coverageMain.cpp
index dad0232b..d05cc0c4 100755
--- a/src/coverageBed/coverageMain.cpp
+++ b/src/coverageBed/coverageMain.cpp
@@ -31,12 +31,13 @@ int main(int argc, char* argv[]) {
 	// input files
 	string bedAFile;
 	string bedBFile;
-	bool forceStrand = false;
-	bool writeHistogram = false;
 	
-	bool haveBedA = false;
-	bool haveBedB = false;
-
+	// parm flags
+	bool forceStrand    = false;
+	bool writeHistogram = false;
+	bool bamInput     = false;	
+	bool haveBedA       = false;
+	bool haveBedB       = false;
 
 	// check to see if we should print out some help
 	if(argc <= 1) showHelp = true;
@@ -64,6 +65,14 @@ int main(int argc, char* argv[]) {
 				i++;
 			}
 		}
+		if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
+			if ((i+1) < argc) {
+				haveBedA = true;
+				bamInput = true;
+				bedAFile = argv[i + 1];
+				i++;		
+			}
+		}
 		else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
 			if ((i+1) < argc) {
 				haveBedB = true;
@@ -91,7 +100,7 @@ int main(int argc, char* argv[]) {
 
 	if (!showHelp) {
 
-		BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram);
+		BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput);
 		bg->DetermineBedInput();
 		return 0;
 	}
@@ -112,21 +121,25 @@ void ShowHelp(void) {
 	cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl;
 
 	cerr << "Options: " << endl;
+
+	cerr << "\t-abam\t"			<< "The A input file is in BAM format." << endl << endl;
+
 	cerr << "\t-s\t"	 	    << "Force strandedness.  That is, only include hits in A that" << endl;
 	cerr						<< "\t\toverlap B on the same strand." << endl;
 	cerr						<< "\t\t- By default, hits are included without respect to strand." << endl << endl;
 
-	cerr << "\t-hist\t"	 	    << "Report a histogram of coverage for each feature in B" << endl << endl;
-	cerr						<< "\t\tOutput (tab delimited):" << endl;
+	cerr << "\t-hist\t"	 	    << "Report a histogram of coverage for each feature in B" << endl;
+	cerr						<< "\t\tas well as a summary histogram for _all_ features in B." << endl << endl;
+	cerr						<< "\t\tOutput (tab delimited) after each feature in B:" << endl;
+	cerr						<< "\t\t  1) depth\n\t\t  2) # bases at depth\n\t\t  3) size of B\n\t\t  4) % of B at depth" << endl << endl;
 	
 
 	cerr << "Default Output:  " << endl;
 	cerr << "\t" << " After each entry in B, reports: " << endl; 
-	cerr << "\t  1) The number of features in A that overlapped the B interval." << endl;
-	cerr << "\t  2) The number of bases in B that had non-zero coverage." << endl;
-	cerr << "\t  3) The length of the entry in B." << endl;
-	cerr << "\t  4) The fraction of bases in B that had non-zero coverage." << endl;
+	cerr << "\t   1) The number of features in A that overlapped the B interval." << endl;
+	cerr << "\t   2) The number of bases in B that had non-zero coverage." << endl;
+	cerr << "\t   3) The length of the entry in B." << endl;
+	cerr << "\t   4) The fraction of bases in B that had non-zero coverage." << endl << endl;
 	
 	exit(1);
-
 }
-- 
GitLab