Skip to content
Snippets Groups Projects
Commit fdf2f98d authored by Aaron Quinlan's avatar Aaron Quinlan
Browse files

Merge pull request #9 from jakebiesinger/master

Signed distance for closestBed
parents a5b2d831 9713b9fb
No related branches found
No related tags found
No related merge requests found
......@@ -22,13 +22,16 @@ const int SLOPGROWTH = 2048000;
Constructor
*/
BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,
string &tieMode, bool reportDistance, bool ignoreOverlaps)
string &tieMode, bool reportDistance, bool signDistance, string &_strandedDistMode,
bool ignoreOverlaps)
: _bedAFile(bedAFile)
, _bedBFile(bedBFile)
, _tieMode(tieMode)
, _sameStrand(sameStrand)
, _diffStrand(diffStrand)
, _reportDistance(reportDistance)
, _signDistance(signDistance)
, _strandedDistMode(_strandedDistMode)
, _ignoreOverlaps(ignoreOverlaps)
{
_bedA = new BedFile(_bedAFile);
......@@ -56,7 +59,8 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
int numOverlaps = 0;
vector<BED> closestB;
CHRPOS minDistance = INT_MAX;
vector<CHRPOS> distances;
int32_t curDistance = INT_MAX;
vector<int32_t> distances;
// is there at least one feature in B on the same chrom
// as the current A feature?
......@@ -102,32 +106,48 @@ void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
}
// the hit is to the "left" of A
else if (h->end <= a.start) {
if ((a.start - h->end) < minDistance) {
minDistance = a.start - h->end;
curDistance = a.start - h->end;
if (_signDistance) {
if ((_strandedDistMode == "ref")
|| (_strandedDistMode == "a" && a.strand != "-")
|| (_strandedDistMode == "b" && h->strand == "-")) {
curDistance = -curDistance;
}
}
if (abs(curDistance) < minDistance) {
minDistance = abs(curDistance);
closestB.clear();
closestB.push_back(*h);
distances.clear();
distances.push_back(minDistance);
distances.push_back(curDistance);
}
else if ((a.start - h->end) == minDistance) {
else if (abs(curDistance) == minDistance) {
minDistance = abs(curDistance);
closestB.push_back(*h);
distances.push_back(minDistance);
distances.push_back(curDistance);
}
}
// the hit is to the "right" of A
else if (h->start >= a.end) {
if ((h->start - a.end) < minDistance) {
minDistance = h->start - a.end;
curDistance = h->start - a.end;
if (_signDistance) {
if ((_strandedDistMode == "a" && a.strand == "-")
|| (_strandedDistMode == "b" && h->strand != "-")) {
curDistance = -curDistance;
}
}
if (abs(curDistance) < minDistance) {
closestB.clear();
closestB.push_back(*h);
distances.clear();
distances.push_back(minDistance);
distances.push_back(curDistance);
}
else if ((h->start - a.end) == minDistance) {
else if (abs(curDistance) == minDistance) {
closestB.push_back(*h);
distances.push_back(minDistance);
distances.push_back(curDistance);
}
}
}
......
......@@ -29,7 +29,8 @@ public:
// constructor
BedClosest(string &bedAFile, string &bedBFile,
bool sameStrand, bool diffStrand, string &tieMode,
bool reportDistance, bool ignoreOverlaps);
bool reportDistance, bool signDistance, string &strandedDistMode,
bool ignoreOverlaps);
// destructor
~BedClosest(void);
......@@ -46,6 +47,8 @@ private:
bool _sameStrand;
bool _diffStrand;
bool _reportDistance;
bool _signDistance;
string _strandedDistMode;
bool _ignoreOverlaps;
BedFile *_bedA, *_bedB;
......
......@@ -32,6 +32,7 @@ int main(int argc, char* argv[]) {
string bedAFile;
string bedBFile;
string tieMode = "all";
string strandedDistMode = "ref";
bool haveBedA = false;
bool haveBedB = false;
......@@ -40,6 +41,8 @@ int main(int argc, char* argv[]) {
bool diffStrand = false;
bool ignoreOverlaps = false;
bool reportDistance = false;
bool signDistance = false;
bool haveStrandedDistMode = false;
// check to see if we should print out some help
......@@ -84,6 +87,15 @@ int main(int argc, char* argv[]) {
else if (PARAMETER_CHECK("-d", 2, parameterLength)) {
reportDistance = true;
}
else if (PARAMETER_CHECK("-D", 2, parameterLength)) {
if ((i+1) < argc) {
reportDistance = true;
signDistance = true;
haveStrandedDistMode = true;
strandedDistMode = argv[i + 1];
i++;
}
}
else if (PARAMETER_CHECK("-io", 3, parameterLength)) {
ignoreOverlaps = true;
}
......@@ -111,6 +123,12 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Request \"all\" or \"first\" or \"last\" for Tie Mode (-t)" << endl << "*****" << endl;
showHelp = true;
}
if (haveStrandedDistMode && (strandedDistMode != "a") && (strandedDistMode != "b")
&& (strandedDistMode != "ref")) {
cerr << endl << "*****" << endl << "*****ERROR: Request \"a\" or \"b\" or \"ref\" for Stranded Distance Mode (-D)" << endl << "*****" << endl;
showHelp = true;
}
if (sameStrand && diffStrand) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
......@@ -118,7 +136,7 @@ int main(int argc, char* argv[]) {
}
if (!showHelp) {
BedClosest *bc = new BedClosest(bedAFile, bedBFile, sameStrand, diffStrand, tieMode, reportDistance, ignoreOverlaps);
BedClosest *bc = new BedClosest(bedAFile, bedBFile, sameStrand, diffStrand, tieMode, reportDistance, signDistance, strandedDistMode, ignoreOverlaps);
delete bc;
return 0;
}
......@@ -144,13 +162,24 @@ void ShowHelp(void) {
cerr << "\t\tthat overlaps A on the _same_ strand." << endl;
cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
cerr << "\t-s\t" << "Require opposite strandedness. That is, find the closest feature in B" << endl;
cerr << "\t-S\t" << "Require opposite strandedness. That is, find the closest feature in B" << endl;
cerr << "\t\tthat overlaps A on the _opposite_ strand." << endl;
cerr << "\t\t- By default, overlaps are reported without respect to strand." << endl << endl;
cerr << "\t-d\t" << "In addition to the closest feature in B, " << endl;
cerr << "\t\treport its distance to A as an extra column." << endl;
cerr << "\t\t- The reported distance for overlapping features will be 0." << endl << endl;
cerr << "\t-D\t" << "Like -d, report the closest feature in B, and its distance to A" << endl;
cerr << "\t\tas an extra column. Unlike -d, use negative distances to report" << endl;
cerr << "\t\tupstream features. You must specify which orientation defines \"upstream\"." << endl;
cerr << "\t\tThe options are:" << endl;
cerr << "\t\t- \"ref\" Report distance with respect to the reference genome. " << endl;
cerr << "\t\t B features with a lower (start, stop) are upstream" << endl;
cerr << "\t\t- \"a\" Report distance with respect to A." << endl;
cerr << "\t\t When A is on the - strand, \"upstream\" means B has a higher (start,stop)." << endl;
cerr << "\t\t- \"b\" Report distance with respect to B." << endl;
cerr << "\t\t When B is on the - strand, \"upstream\" means A has a higher (start,stop)." << endl << endl;
cerr << "\t-no\t" << "Ignore features in B that overlap A. That is, we want close, but " << endl;
cerr << "\t\tnot touching features only." << endl << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment