Skip to content
Snippets Groups Projects
Commit 90464038 authored by Aaron's avatar Aaron
Browse files

Added feature to get distance to closest feature (-d). Thanks to Erik Arner.

parent 7b2ca585
No related branches found
No related tags found
No related merge requests found
......@@ -21,15 +21,16 @@ const int SLOPGROWTH = 2048000;
/*
Constructor
*/
BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool &forceStrand, string &tieMode) {
BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance) {
_bedAFile = bedAFile;
_bedBFile = bedBFile;
_forceStrand = forceStrand;
_tieMode = tieMode;
_bedAFile = bedAFile;
_bedBFile = bedBFile;
_forceStrand = forceStrand;
_tieMode = tieMode;
_reportDistance = reportDistance;
_bedA = new BedFile(bedAFile);
_bedB = new BedFile(bedBFile);
_bedA = new BedFile(bedAFile);
_bedB = new BedFile(bedBFile);
FindClosestBed();
}
......@@ -54,6 +55,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
vector<BED> closestB;
float maxOverlap = 0;
CHRPOS minDistance = INT_MAX;
vector<CHRPOS> distances;
if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) {
......@@ -67,7 +69,7 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
else
aFudgeStart = 0;
if ((a.start + slop) < (2 * MAXSLOP))
if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP))
aFudgeEnd = a.end + slop;
else
aFudgeEnd = 2 * MAXSLOP;
......@@ -87,34 +89,55 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
int overlapBases = (e - s); // the number of overlapping bases b/w a and b
int aLength = (a.end - a.start); // the length of a in b.p.
// there is overlap
if (s < e) {
// is there enough overlap (default ~ 1bp)
float overlap = (float) overlapBases / (float) aLength;
if ( overlap > 0 ) {
// is this hit the closest?
if (overlap > maxOverlap) {
maxOverlap = overlap;
closestB.clear();
closestB.push_back(*h);
maxOverlap = overlap;
distances.clear();
distances.push_back(0);
}
else if (overlap == maxOverlap) closestB.push_back(*h);
else if (overlap == maxOverlap) {
closestB.push_back(*h);
distances.push_back(0);
}
}
}
// the hit is to the "left" of A
else if (h->end < a.start){
if ((a.start - h->end) < minDistance) {
minDistance = a.start - h->end;
closestB.clear();
closestB.push_back(*h);
minDistance = a.start - h->end;
distances.clear();
distances.push_back(minDistance);
}
else if ((a.start - h->end) == minDistance) closestB.push_back(*h);
else if ((a.start - h->end) == minDistance) {
closestB.push_back(*h);
distances.push_back(minDistance);
}
}
// the hit is to the "right" of A
else {
if ((h->start - a.end) < minDistance) {
minDistance = h->start - a.end;
closestB.clear();
closestB.push_back(*h);
minDistance = h->start - a.end;
distances.clear();
distances.push_back(minDistance);
}
else if ((h->start - a.end) == minDistance) closestB.push_back(*h);
else if ((h->start - a.end) == minDistance) {
closestB.push_back(*h);
distances.push_back(minDistance);
}
}
}
// if no overlaps were found, we'll widen the range
......@@ -122,31 +145,51 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
slop += SLOPGROWTH;
}
}
// there is no feature in B on the same chromosome as A
else {
_bedA->reportBedTab(a);
_bedB->reportNullBedNewLine();
if (_reportDistance == true) {
_bedB->reportNullBedTab();
cout << -1 << endl;
}
else
_bedB->reportNullBedNewLine();
}
// report the closest feature(s) in B to the current A feature.
// obey the user's reporting request (_tieMode)
if (numOverlaps > 0) {
if (closestB.size() == 1) {
_bedA->reportBedTab(a);
_bedB->reportBedNewLine(closestB[0]);
if (closestB.size() == 1 || _tieMode == "first") {
_bedA->reportBedTab(a);
if (_reportDistance == true) {
_bedB->reportBedTab(closestB[0]);
cout << distances[0] << endl;
}
else
_bedB->reportBedNewLine(closestB[0]);
}
else {
if (_tieMode == "all") {
size_t i = 0;
for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) {
_bedA->reportBedTab(a);
_bedB->reportBedNewLine(*b);
_bedA->reportBedTab(a);
if (_reportDistance == true) {
_bedB->reportBedTab(*b);
cout << distances[i++] <<endl;
}
else
_bedB->reportBedNewLine(*b);
}
}
else if (_tieMode == "first") {
_bedA->reportBedTab(a);
_bedB->reportBedNewLine(closestB[0]);
}
else if (_tieMode == "last") {
_bedA->reportBedTab(a);
_bedB->reportBedNewLine(closestB[closestB.size()-1]);
_bedA->reportBedTab(a);
if (_reportDistance == true) {
_bedB->reportBedTab(closestB[closestB.size()-1]);
cout << distances[distances.size() - 1]<<endl;
}
else
_bedB->reportBedNewLine(closestB[closestB.size()-1]);
}
}
}
......
......@@ -27,7 +27,7 @@ class BedClosest {
public:
// constructor
BedClosest(string &bedAFile, string &bedBFile, bool &forceStrand, string &tieMode);
BedClosest(string &bedAFile, string &bedBFile, bool forceStrand, string &tieMode, bool reportDistance);
// destructor
~BedClosest(void);
......@@ -41,7 +41,8 @@ private:
string _bedAFile;
string _bedBFile;
string _tieMode;
bool _forceStrand;
bool _forceStrand;
bool _reportDistance;
BedFile *_bedA, *_bedB;
......
......@@ -33,10 +33,11 @@ int main(int argc, char* argv[]) {
string bedBFile;
string tieMode = "all";
bool haveBedA = false;
bool haveBedB = false;
bool haveTieMode = false;
bool forceStrand = false;
bool haveBedA = false;
bool haveBedB = false;
bool haveTieMode = false;
bool forceStrand = false;
bool reportDistance = false;
// check to see if we should print out some help
......@@ -75,6 +76,9 @@ int main(int argc, char* argv[]) {
else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true;
}
else if (PARAMETER_CHECK("-d", 2, parameterLength)) {
reportDistance = true;
}
else if (PARAMETER_CHECK("-t", 2, parameterLength)) {
if ((i+1) < argc) {
haveTieMode = true;
......@@ -97,7 +101,7 @@ int main(int argc, char* argv[]) {
}
if (!showHelp) {
BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode);
BedClosest *bc = new BedClosest(bedAFile, bedBFile, forceStrand, tieMode, reportDistance);
delete bc;
return 0;
}
......@@ -110,7 +114,8 @@ void ShowHelp(void) {
cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
cerr << "Authors: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
cerr << "\t Erik Arner, Riken" << endl << endl;
cerr << "Summary: For each feature in A, finds the closest " << endl;
cerr << "\t feature (upstream or downstream) in B." << endl << endl;
......@@ -121,6 +126,11 @@ void ShowHelp(void) {
cerr << "\t-s\t" << "Force strandedness. That is, find the closest feature in B" << endl;
cerr << "\t\tthat overlaps A on the same strand." << endl;
cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
cerr << "\t-d\t" << "In addition to the closest feature in B, " << endl;
cerr << "\t\treport its distance to A as an extra column." << endl;
cerr << "\t\t- The reported distance for overlapping features will be 0." << endl << endl;
cerr << "\t-t\t" << "How ties for closest feature are handled. This occurs when two" << endl;
cerr << "\t\tfeatures in B have exactly the same overlap with A." << endl;
......@@ -137,5 +147,4 @@ void ShowHelp(void) {
// end the program here
exit(1);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment