Skip to content
Snippets Groups Projects
Commit 0da6473f authored by Aaron's avatar Aaron
Browse files

Added native BAM support to coverageBed.

parent 98532d54
No related branches found
No related tags found
No related merge requests found
......@@ -12,8 +12,8 @@
#include "lineFileUtilities.h"
#include "coverageBed.h"
BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram) {
// build
BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram, bool &bamInput) {
this->bedAFile = bedAFile;
this->bedBFile = bedBFile;
......@@ -23,16 +23,17 @@ BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand,
this->forceStrand = forceStrand;
this->writeHistogram = writeHistogram;
this->bamInput = bamInput;
}
// destroy
BedCoverage::~BedCoverage(void) {
delete this->bedA;
delete this->bedB;
}
void BedCoverage::CollectCoverage(istream &bedInput) {
void BedCoverage::CollectCoverageBed(istream &bedInput) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
......@@ -63,6 +64,44 @@ void BedCoverage::CollectCoverage(istream &bedInput) {
}
void BedCoverage::CollectCoverageBam(string bamFile) {
// load the "B" bed file into a map so
// that we can easily compare "A" to it for overlaps
bedB->loadBedFileIntoMap();
// open the BAM file
BamReader reader;
reader.Open(bamFile);
// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
if (bam.IsMapped()) {
// construct a new BED entry from the current BAM alignment.
BED a;
a.chrom = refs.at(bam.RefID).RefName;
a.start = bam.Position;
a.end = bam.Position + bam.AlignedBases.size();
a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
bedB->countHits(a, this->forceStrand);
}
}
// report the coverage (summary or histogram) for BED B.
ReportCoverage();
// close the BAM file
reader.Close();
}
void BedCoverage::ReportCoverage() {
map<unsigned int, unsigned int> allDepthHist;
......@@ -81,8 +120,8 @@ void BedCoverage::ReportCoverage() {
for (; bedItr != bedEnd; ++bedItr) {
int zeroDepthCount = 0;
int depth = 0;
int start = min(bedItr->minOverlapStart, bedItr->start);
int depth = 0;
int start = min(bedItr->minOverlapStart, bedItr->start);
// track the numnber of bases in the feature covered by
// 0, 1, 2, ... n features in A
......@@ -112,11 +151,11 @@ void BedCoverage::ReportCoverage() {
}
// Report the coverage for the current interval.
int length = bedItr->end - bedItr->start;
int length = bedItr->end - bedItr->start;
totalLength += length;
int nonZeroBases = (length-zeroDepthCount);
float fractCovered = (float) nonZeroBases /length;
int nonZeroBases = (length - zeroDepthCount);
float fractCovered = (float) nonZeroBases / length;
if (this->writeHistogram == false) {
bedB->reportBedTab(*bedItr);
......@@ -147,16 +186,22 @@ void BedCoverage::ReportCoverage() {
void BedCoverage::DetermineBedInput() {
if (bedA->bedFile != "stdin") { // process a file
ifstream beds(bedA->bedFile.c_str(), ios::in);
if ( !beds ) {
cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl;
exit (1);
if (this->bamInput == false) { //bed/gff
ifstream beds(bedA->bedFile.c_str(), ios::in);
if ( !beds ) {
cerr << "Error: The requested bed file (" << bedA->bedFile << ") could not be opened. Exiting!" << endl;
exit (1);
}
CollectCoverageBed(beds);
}
CollectCoverage(beds);
else
CollectCoverageBam(bedA->bedFile);
}
else { // process stdin
CollectCoverage(cin);
else { // process stdin
if (this->bamInput == false)
CollectCoverageBed(cin);
else
CollectCoverageBam("stdin");
}
}
......@@ -13,6 +13,11 @@
#define COVERAGEBED_H
#include "bedFile.h"
#include "BamReader.h"
#include "BamAux.h"
using namespace BamTools;
#include <vector>
#include <algorithm>
#include <iostream>
......@@ -30,26 +35,36 @@ class BedCoverage {
public:
// constructor
BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram);
BedCoverage(string &bedAFile, string &bedBFile, bool &forceStrand, bool &writeHistogram, bool &bamInput);
// destructor
~BedCoverage(void);
void CollectCoverage(istream &bedInput);
void CollectCoverageBed(istream &bedInput);
void CollectCoverageBam(string bamFile);
void DetermineBedInput();
private:
// input files.
string bedAFile;
string bedBFile;
// instance of a bed file class.
BedFile *bedA, *bedB;
// do we care about strandedness when counting coverage?
bool forceStrand;
// should we write a histogram for each feature in B?
bool writeHistogram;
// are we dealing with BAM input for "A"?
bool bamInput;
// private function for reporting coverage information
void ReportCoverage();
};
#endif /* COVERAGEBED_H */
......@@ -31,12 +31,13 @@ int main(int argc, char* argv[]) {
// input files
string bedAFile;
string bedBFile;
bool forceStrand = false;
bool writeHistogram = false;
bool haveBedA = false;
bool haveBedB = false;
// parm flags
bool forceStrand = false;
bool writeHistogram = false;
bool bamInput = false;
bool haveBedA = false;
bool haveBedB = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
......@@ -64,6 +65,14 @@ int main(int argc, char* argv[]) {
i++;
}
}
if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
if ((i+1) < argc) {
haveBedA = true;
bamInput = true;
bedAFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
if ((i+1) < argc) {
haveBedB = true;
......@@ -91,7 +100,7 @@ int main(int argc, char* argv[]) {
if (!showHelp) {
BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram);
BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, forceStrand, writeHistogram, bamInput);
bg->DetermineBedInput();
return 0;
}
......@@ -112,21 +121,25 @@ void ShowHelp(void) {
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <a.bed> -b <b.bed>" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-abam\t" << "The A input file is in BAM format." << endl << endl;
cerr << "\t-s\t" << "Force strandedness. That is, only include hits in A that" << endl;
cerr << "\t\toverlap B on the same strand." << endl;
cerr << "\t\t- By default, hits are included without respect to strand." << endl << endl;
cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl << endl;
cerr << "\t\tOutput (tab delimited):" << endl;
cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl;
cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl;
cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl;
cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl;
cerr << "Default Output: " << endl;
cerr << "\t" << " After each entry in B, reports: " << endl;
cerr << "\t 1) The number of features in A that overlapped the B interval." << endl;
cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl;
cerr << "\t 3) The length of the entry in B." << endl;
cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl;
cerr << "\t 1) The number of features in A that overlapped the B interval." << endl;
cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl;
cerr << "\t 3) The length of the entry in B." << endl;
cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl << endl;
exit(1);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment