diff --git a/src/genomeCoverageBed/genomeCoverageBed.cpp b/src/genomeCoverageBed/genomeCoverageBed.cpp index 5b2276eec1bc03bd7ca0d55968e33809721bbe97..80191b1baa36013a595a38e8f90b2dca54a042bf 100755 --- a/src/genomeCoverageBed/genomeCoverageBed.cpp +++ b/src/genomeCoverageBed/genomeCoverageBed.cpp @@ -15,15 +15,16 @@ /* Constructor */ -BedCoverage::BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, int &max) { +BedCoverage::BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, bool &bedGraph, int &max) { - this->bedFile = bedFile; + this->bedFile = bedFile; this->genomeFile = genomeFile; - this->eachBase = eachBase; + this->eachBase = eachBase; this->startSites = startSites; - this->max = max; + this->bedGraph = bedGraph; + this->max = max; - this->bed = new BedFile(bedFile); + this->bed = new BedFile(bedFile); } @@ -139,14 +140,14 @@ void BedCoverage::CoverageBeds(istream &bedInput) { // process the results of the last chromosome. ReportChromCoverage(chromCov, chromSizes[currChrom], currChrom, chromDepthHist); - if (! this->eachBase) { + if (this->eachBase == false && this->bedGraph == false) { ReportGenomeCoverage(chromSizes, chromDepthHist); } } -void BedCoverage::ReportChromCoverage(vector<DEPTH> &chromCov, int &chromSize, string &chrom, chromHistMap &chromDepthHist) { +void BedCoverage::ReportChromCoverage(const vector<DEPTH> &chromCov, int &chromSize, string &chrom, chromHistMap &chromDepthHist) { if (this->eachBase) { int depth = 0; // initialize the depth @@ -159,6 +160,9 @@ void BedCoverage::ReportChromCoverage(vector<DEPTH> &chromCov, int &chromSize, s depth = depth - chromCov[pos].ends; } } + else if (this->bedGraph) { + ReportChromCoverageBedGraph(chromCov, chromSize, chrom); + } else { int depth = 0; // initialize the depth @@ -232,6 +236,46 @@ void BedCoverage::ReportGenomeCoverage(map<string, int> &chromSizes, chromHistMa } +void BedCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, int &chromSize, string &chrom) { + + int depth = 0; // initialize the depth + int lastStart = -1 ; + int lastDepth = -1 ; + + for (int pos = 0; pos < chromSize; pos++) { + depth += chromCov[pos].starts; + + if (depth == 0 && lastDepth != -1) { + // We've found a new block of zero coverage, so report + // the previous block of non-zero coverage. + cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth << endl; + lastDepth = -1; + lastStart = -1; + } + else if (depth > 0 && depth != lastDepth) { + // Coverage depth has changed, print the last interval coverage (if any) + if (lastDepth != -1) { + cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth << endl; + } + //Set current position as the new interval start + depth + lastDepth = depth; + lastStart = pos; + } + // Default: the depth has not changed, so we will not print anything. + // Proceed until the depth changes. + + // Update depth + depth = depth - chromCov[pos].ends; + } + + + //Print information about the last position + if (lastDepth != -1) { + cout << chrom << "\t" << lastStart << "\t" << chromSize << "\t" << lastDepth << endl; + } +} + + void BedCoverage::DetermineBedInput() { if (bed->bedFile != "stdin") { // process a file ifstream beds(bed->bedFile.c_str(), ios::in); diff --git a/src/genomeCoverageBed/genomeCoverageBed.h b/src/genomeCoverageBed/genomeCoverageBed.h index 5c88f4931d4da01eda7006412545784321fcf3b4..14fcf8685fcc9fce165085cc4daf4a54e847dea2 100755 --- a/src/genomeCoverageBed/genomeCoverageBed.h +++ b/src/genomeCoverageBed/genomeCoverageBed.h @@ -34,14 +34,18 @@ class BedCoverage { public: // constructor - BedCoverage(string &, string &, bool &, bool &, int &); + BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, bool &bedGraph, int &max); // destructor ~BedCoverage(void); void CoverageBeds(istream &bedInput); - void ReportChromCoverage(vector<DEPTH> &, int &, string &, chromHistMap&); - void ReportGenomeCoverage(map<string, int> &, chromHistMap&); + + void ReportChromCoverage(const vector<DEPTH> &, int &chromSize, string &chrom, chromHistMap&); + + void ReportGenomeCoverage(map<string, int> &chromSizes, chromHistMap &chromDepthHist); + + void ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, int &chromSize, string &chrom); void DetermineBedInput(); @@ -51,10 +55,10 @@ private: string genomeFile; bool eachBase; bool startSites; + bool bedGraph; int max; // The BED file from which to compute coverage. BedFile *bed; chromDepthMap chromCov; - }; diff --git a/src/genomeCoverageBed/genomeCoverageMain.cpp b/src/genomeCoverageBed/genomeCoverageMain.cpp index 88cbdea0815841f06bbcef8d2983939c2e23d3e6..4efb107fe20ae595fb6e75b3205296dcd63bfa66 100755 --- a/src/genomeCoverageBed/genomeCoverageMain.cpp +++ b/src/genomeCoverageBed/genomeCoverageMain.cpp @@ -34,10 +34,11 @@ int main(int argc, char* argv[]) { string genomeFile; int max = 999999999; - bool haveBed = false; + bool haveBed = false; bool haveGenome = false; bool startSites = false; - bool eachBase = false; + bool bedGraph = false; + bool eachBase = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; @@ -75,6 +76,9 @@ int main(int argc, char* argv[]) { else if(PARAMETER_CHECK("-d", 2, parameterLength)) { eachBase = true; } + else if(PARAMETER_CHECK("-bg", 3, parameterLength)) { + bedGraph = true; + } else if(PARAMETER_CHECK("-max", 4, parameterLength)) { if ((i+1) < argc) { max = atoi(argv[i + 1]); @@ -92,10 +96,14 @@ int main(int argc, char* argv[]) { cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; showHelp = true; } + if (bedGraph && eachBase) { + cerr << endl << "*****" << endl << "*****ERROR: Use -d or -bedgraph, not both" << endl << "*****" << endl; + showHelp = true; + } if (!showHelp) { - BedCoverage *bc = new BedCoverage(bedFile, genomeFile, eachBase, startSites, max); + BedCoverage *bc = new BedCoverage(bedFile, genomeFile, eachBase, startSites, bedGraph, max); bc->DetermineBedInput(); return 0; @@ -119,8 +127,12 @@ void ShowHelp(void) { cerr << "\t-d\t" << "Report the depth at each genome position." << endl; cerr << "\t\tDefault behavior is to report a histogram." << endl << endl; + cerr << "\t-bg\t" << "Report depth in BedGraph format. For details, see:" << endl; + cerr << "\t\tgenome.ucsc.edu/goldenPath/help/bedgraph.html" << endl << endl; + cerr << "\t-max\t" << "Combine all positions with a depth >= max into" << endl; - cerr << "\t\ta single bin in the histogram." << endl; + cerr << "\t\ta single bin in the histogram. Irrelevant" << endl; + cerr << "\t\tfor -d and -bedGraph" << endl; cerr << "\t\t- (INTEGER)" << endl << endl; cerr << "Notes: " << endl; @@ -140,7 +152,7 @@ void ShowHelp(void) { cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e /" << endl; - cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; + cerr << "\t\"select chrom, size from hg19.chromInfo\" | awk 'NR>1' > hg19.genome" << endl << endl; // end the program here