Skip to content
Snippets Groups Projects
Commit 0da6473f authored by Aaron's avatar Aaron
Browse files

Added native BAM support to coverageBed.

parent 98532d54
No related branches found
No related tags found
No related merge requests found
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
#include "lineFileUtilities.h" #include "lineFileUtilities.h"
#include "coverageBed.h" #include "coverageBed.h"
// build
BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram) { BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram, bool &bamInput) {
this->bedAFile = bedAFile; this->bedAFile = bedAFile;
this->bedBFile = bedBFile; this->bedBFile = bedBFile;
...@@ -23,16 +23,17 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, ...@@ -23,16 +23,17 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand,
this->forceStrand = forceStrand; this->forceStrand = forceStrand;
this->writeHistogram = writeHistogram; this->writeHistogram = writeHistogram;
this->bamInput = bamInput;
} }
// destroy
BedCoverage::~BedCoverage(void) { BedCoverage::~BedCoverage(void) {
delete this->bedA;
delete this->bedB;
} }
void BedCoverage::CollectCoverageBed(istream &bedInput) {
void BedCoverage::CollectCoverage(istream &bedInput) {
// load the "B" bed file into a map so // load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps // that we can easily compare "A" to it for overlaps
...@@ -63,6 +64,44 @@ void BedCoverage::CollectCoverage(istream &bedInput) { ...@@ -63,6 +64,44 @@ void BedCoverage::CollectCoverage(istream &bedInput) {
} }
void BedCoverage::CollectCoverageBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
// construct a new BED entry from the current BAM alignment.
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.Position + bam.AlignedBases.size();
a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
bedB->countHits(a, this->forceStrand);
}
}
// report the coverage (summary or histogram) for BED B.
ReportCoverage();
// close the BAM file
reader.Close();
}
void BedCoverage::ReportCoverage() { void BedCoverage::ReportCoverage() {
map<unsigned int, unsigned int> allDepthHist; map<unsigned int, unsigned int> allDepthHist;
...@@ -81,8 +120,8 @@ void BedCoverage::ReportCoverage() { ...@@ -81,8 +120,8 @@ void BedCoverage::ReportCoverage() {
for (; bedItr != bedEnd; ++bedItr) { for (; bedItr != bedEnd; ++bedItr) {
int zeroDepthCount = 0; int zeroDepthCount = 0;
int depth = 0; int depth = 0;
int start = min(bedItr->minOverlapStart, bedItr->start); int start = min(bedItr->minOverlapStart, bedItr->start);
// track the numnber of bases in the feature covered by // track the numnber of bases in the feature covered by
// 0, 1, 2, ... n features in A // 0, 1, 2, ... n features in A
...@@ -112,11 +151,11 @@ void BedCoverage::ReportCoverage() { ...@@ -112,11 +151,11 @@ void BedCoverage::ReportCoverage() {
} }
// Report the coverage for the current interval. // Report the coverage for the current interval.
int length = bedItr->end - bedItr->start; int length = bedItr->end - bedItr->start;
totalLength += length; totalLength += length;
int nonZeroBases = (length-zeroDepthCount); int nonZeroBases = (length - zeroDepthCount);
float fractCovered = (float) nonZeroBases /length; float fractCovered = (float) nonZeroBases / length;
if (this->writeHistogram == false) { if (this->writeHistogram == false) {
bedB->reportBedTab(*bedItr); bedB->reportBedTab(*bedItr);
...@@ -147,16 +186,22 @@ void BedCoverage::ReportCoverage() { ...@@ -147,16 +186,22 @@ void BedCoverage::ReportCoverage() {
void BedCoverage::DetermineBedInput() { void BedCoverage::DetermineBedInput() {
if (bedA->bedFile != "stdin") { // process a file if (bedA->bedFile != "stdin") { // process a file
ifstream beds(bedA->bedFile.c_str(), ios::in); if (this->bamInput == false) { //bed/gff
if ( !beds ) { ifstream beds(bedA->bedFile.c_str(), ios::in);
cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl; if ( !beds ) {
exit (1); cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl;
exit (1);
}
CollectCoverageBed(beds);
} }
CollectCoverage(beds); else
CollectCoverageBam(bedA->bedFile);
} }
else { // process stdin else { // process stdin
CollectCoverage(cin); if (this->bamInput == false)
CollectCoverageBed(cin);
else
CollectCoverageBam("stdin");
} }
} }
...@@ -13,6 +13,11 @@ ...@@ -13,6 +13,11 @@
#define COVERAGEBED_H #define COVERAGEBED_H
#include "bedFile.h" #include "bedFile.h"
#include "BamReader.h"
#include "BamAux.h"
using namespace BamTools;
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <iostream> #include <iostream>
...@@ -30,26 +35,36 @@ class BedCoverage { ...@@ -30,26 +35,36 @@ class BedCoverage {
public: public:
// constructor // constructor
BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram); BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram, bool &bamInput);
// destructor // destructor
~BedCoverage(void); ~BedCoverage(void);
void CollectCoverage(istream &bedInput); void CollectCoverageBed(istream &bedInput);
void CollectCoverageBam(string bamFile);
void DetermineBedInput(); void DetermineBedInput();
private: private:
// input files.
string bedAFile; string bedAFile;
string bedBFile; string bedBFile;
// instance of a bed file class. // instance of a bed file class.
BedFile *bedA, *bedB; BedFile *bedA, *bedB;
// do we care about strandedness when counting coverage?
bool forceStrand; bool forceStrand;
// should we write a histogram for each feature in B?
bool writeHistogram; bool writeHistogram;
// are we dealing with BAM input for "A"?
bool bamInput;
// private function for reporting coverage information
void ReportCoverage(); void ReportCoverage();
}; };
#endif /* COVERAGEBED_H */ #endif /* COVERAGEBED_H */
...@@ -31,12 +31,13 @@ int main(int argc, char* argv[]) { ...@@ -31,12 +31,13 @@ int main(int argc, char* argv[]) {
// input files // input files
string bedAFile; string bedAFile;
string bedBFile; string bedBFile;
bool forceStrand = false;
bool writeHistogram = false;
bool haveBedA = false; // parm flags
bool haveBedB = false; bool forceStrand = false;
bool writeHistogram = false;
bool bamInput = false;
bool haveBedA = false;
bool haveBedB = false;
// check to see if we should print out some help // check to see if we should print out some help
if(argc <= 1) showHelp = true; if(argc <= 1) showHelp = true;
...@@ -64,6 +65,14 @@ int main(int argc, char* argv[]) { ...@@ -64,6 +65,14 @@ int main(int argc, char* argv[]) {
i++; i++;
} }
} }
if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
bamInput = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) { else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) { if ((i+1) < argc) {
haveBedB = true; haveBedB = true;
...@@ -91,7 +100,7 @@ int main(int argc, char* argv[]) { ...@@ -91,7 +100,7 @@ int main(int argc, char* argv[]) {
if (!showHelp) { if (!showHelp) {
BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram); BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput);
bg->DetermineBedInput(); bg->DetermineBedInput();
return 0; return 0;
} }
...@@ -112,21 +121,25 @@ void ShowHelp(void) { ...@@ -112,21 +121,25 @@ void ShowHelp(void) {
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl; cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl;
cerr << "Options: " << endl; cerr << "Options: " << endl;
cerr << "\t-abam\t" << "The A input file is in BAM format." << endl << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only include hits in A that" << endl; cerr << "\t-s\t" << "Force strandedness. That is, only include hits in A that" << endl;
cerr << "\t\toverlap B on the same strand." << endl; cerr << "\t\toverlap B on the same strand." << endl;
cerr << "\t\t- By default, hits are included without respect to strand." << endl << endl; cerr << "\t\t- By default, hits are included without respect to strand." << endl << endl;
cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl << endl; cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl;
cerr << "\t\tOutput (tab delimited):" << endl; cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl;
cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl;
cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl;
cerr << "Default Output: " << endl; cerr << "Default Output: " << endl;
cerr << "\t" << " After each entry in B, reports: " << endl; cerr << "\t" << " After each entry in B, reports: " << endl;
cerr << "\t 1) The number of features in A that overlapped the B interval." << endl; cerr << "\t 1) The number of features in A that overlapped the B interval." << endl;
cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl; cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl;
cerr << "\t 3) The length of the entry in B." << endl; cerr << "\t 3) The length of the entry in B." << endl;
cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl; cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl << endl;
exit(1); exit(1);
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment