Skip to content
Snippets Groups Projects
Commit 98532d54 authored by Aaron's avatar Aaron
Browse files

Added BEDGRAPH functionality to genomeCoverageBed. From Gordon Assaf, CSHL.

parent c619fea6
No related branches found
No related tags found
No related merge requests found
......@@ -15,15 +15,16 @@
/*
Constructor
*/
BedCoverage::BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, int &max) {
BedCoverage::BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, bool &bedGraph, int &max) {
this->bedFile = bedFile;
this->bedFile = bedFile;
this->genomeFile = genomeFile;
this->eachBase = eachBase;
this->eachBase = eachBase;
this->startSites = startSites;
this->max = max;
this->bedGraph = bedGraph;
this->max = max;
this->bed = new BedFile(bedFile);
this->bed = new BedFile(bedFile);
}
......@@ -139,14 +140,14 @@ void BedCoverage::CoverageBeds(istream &bedInput) {
// process the results of the last chromosome.
ReportChromCoverage(chromCov, chromSizes[currChrom], currChrom, chromDepthHist);
if (! this->eachBase) {
if (this->eachBase == false && this->bedGraph == false) {
ReportGenomeCoverage(chromSizes, chromDepthHist);
}
}
void BedCoverage::ReportChromCoverage(vector<DEPTH> &chromCov, int &chromSize, string &chrom, chromHistMap &chromDepthHist) {
void BedCoverage::ReportChromCoverage(const vector<DEPTH> &chromCov, int &chromSize, string &chrom, chromHistMap &chromDepthHist) {
if (this->eachBase) {
int depth = 0; // initialize the depth
......@@ -159,6 +160,9 @@ void BedCoverage::ReportChromCoverage(vector<DEPTH> &chromCov, int &chromSize, s
depth = depth - chromCov[pos].ends;
}
}
else if (this->bedGraph) {
ReportChromCoverageBedGraph(chromCov, chromSize, chrom);
}
else {
int depth = 0; // initialize the depth
......@@ -232,6 +236,46 @@ void BedCoverage::ReportGenomeCoverage(map<string, int> &chromSizes, chromHistMa
}
void BedCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, int &chromSize, string &chrom) {
int depth = 0; // initialize the depth
int lastStart = -1 ;
int lastDepth = -1 ;
for (int pos = 0; pos < chromSize; pos++) {
depth += chromCov[pos].starts;
if (depth == 0 && lastDepth != -1) {
// We've found a new block of zero coverage, so report
// the previous block of non-zero coverage.
cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth << endl;
lastDepth = -1;
lastStart = -1;
}
else if (depth > 0 && depth != lastDepth) {
// Coverage depth has changed, print the last interval coverage (if any)
if (lastDepth != -1) {
cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth << endl;
}
//Set current position as the new interval start + depth
lastDepth = depth;
lastStart = pos;
}
// Default: the depth has not changed, so we will not print anything.
// Proceed until the depth changes.
// Update depth
depth = depth - chromCov[pos].ends;
}
//Print information about the last position
if (lastDepth != -1) {
cout << chrom << "\t" << lastStart << "\t" << chromSize << "\t" << lastDepth << endl;
}
}
void BedCoverage::DetermineBedInput() {
if (bed->bedFile != "stdin") { // process a file
ifstream beds(bed->bedFile.c_str(), ios::in);
......
......@@ -34,14 +34,18 @@ class BedCoverage {
public:
// constructor
BedCoverage(string &, string &, bool &, bool &, int &);
BedCoverage(string &bedFile, string &genomeFile, bool &eachBase, bool &startSites, bool &bedGraph, int &max);
// destructor
~BedCoverage(void);
void CoverageBeds(istream &bedInput);
void ReportChromCoverage(vector<DEPTH> &, int &, string &, chromHistMap&);
void ReportGenomeCoverage(map<string, int> &, chromHistMap&);
void ReportChromCoverage(const vector<DEPTH> &, int &chromSize, string &chrom, chromHistMap&);
void ReportGenomeCoverage(map<string, int> &chromSizes, chromHistMap &chromDepthHist);
void ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, int &chromSize, string &chrom);
void DetermineBedInput();
......@@ -51,10 +55,10 @@ private:
string genomeFile;
bool eachBase;
bool startSites;
bool bedGraph;
int max;
// The BED file from which to compute coverage.
BedFile *bed;
chromDepthMap chromCov;
};
......@@ -34,10 +34,11 @@ int main(int argc, char* argv[]) {
string genomeFile;
int max = 999999999;
bool haveBed = false;
bool haveBed = false;
bool haveGenome = false;
bool startSites = false;
bool eachBase = false;
bool bedGraph = false;
bool eachBase = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
......@@ -75,6 +76,9 @@ int main(int argc, char* argv[]) {
else if(PARAMETER_CHECK("-d", 2, parameterLength)) {
eachBase = true;
}
else if(PARAMETER_CHECK("-bg", 3, parameterLength)) {
bedGraph = true;
}
else if(PARAMETER_CHECK("-max", 4, parameterLength)) {
if ((i+1) < argc) {
max = atoi(argv[i + 1]);
......@@ -92,10 +96,14 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl;
showHelp = true;
}
if (bedGraph && eachBase) {
cerr << endl << "*****" << endl << "*****ERROR: Use -d or -bedgraph, not both" << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
BedCoverage *bc = new BedCoverage(bedFile, genomeFile, eachBase, startSites, max);
BedCoverage *bc = new BedCoverage(bedFile, genomeFile, eachBase, startSites, bedGraph, max);
bc->DetermineBedInput();
return 0;
......@@ -119,8 +127,12 @@ void ShowHelp(void) {
cerr << "\t-d\t" << "Report the depth at each genome position." << endl;
cerr << "\t\tDefault behavior is to report a histogram." << endl << endl;
cerr << "\t-bg\t" << "Report depth in BedGraph format. For details, see:" << endl;
cerr << "\t\tgenome.ucsc.edu/goldenPath/help/bedgraph.html" << endl << endl;
cerr << "\t-max\t" << "Combine all positions with a depth >= max into" << endl;
cerr << "\t\ta single bin in the histogram." << endl;
cerr << "\t\ta single bin in the histogram. Irrelevant" << endl;
cerr << "\t\tfor -d and -bedGraph" << endl;
cerr << "\t\t- (INTEGER)" << endl << endl;
cerr << "Notes: " << endl;
......@@ -140,7 +152,7 @@ void ShowHelp(void) {
cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl;
cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl;
cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e /" << endl;
cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl;
cerr << "\t\"select chrom, size from hg19.chromInfo\" | awk 'NR>1' > hg19.genome" << endl << endl;
// end the program here
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment