Skip to content
Snippets Groups Projects
Commit 841c5596 authored by Aaron Quinlan's avatar Aaron Quinlan
Browse files

Added writeA (-wa) parameter to intersectBed

	-wa allows the original A entry to be written for each overlap with B.
parent f510c0b9
No related branches found
No related tags found
No related merge requests found
......@@ -18,12 +18,14 @@
Constructor
*/
BedIntersect::BedIntersect(string &bedAFile, string &bedBFile, bool &anyHit,
bool &writeB, float &overlapFraction, bool &noHit, bool &writeCount) {
bool &writeA, bool &writeB, float &overlapFraction,
bool &noHit, bool &writeCount) {
this->bedAFile = bedAFile;
this->bedBFile = bedBFile;
this->anyHit = anyHit;
this->noHit = noHit;
this->writeA = writeA;
this->writeB = writeB;
this->writeCount = writeCount;
this->overlapFraction = overlapFraction;
......@@ -152,22 +154,37 @@ void BedIntersect::IntersectBed() {
if (bedA->parseBedLine(a, bedFields, lineNum)) {
vector<BED> hits;
bedB->binKeeperFind(bedB->bedMap[a.chrom], a.start, a.end, hits);
int numOverlaps = 0;
for (vector<BED>::iterator h = hits.begin(); h != hits.end(); ++h) {
int s = max(a.start, h->start);
int e = min(a.end, h->end);
if (s < e) {
// is there enough overlap (default ~ 1bp)
if ( ((float)(e-s) / (float)(a.end - a.start)) > this->overlapFraction ) {
numOverlaps++;
if (!anyHit && !noHit && !writeCount) {
if (!writeB) {
reportAIntersect(a,s,e); cout << endl;
if (writeA) {
reportA(a); cout << endl;
}
else {
reportAIntersect(a,s,e); cout << endl;
}
}
else {
reportAIntersect(a,s,e); cout << "\t";
reportB(*h); cout << endl;
if (writeA) {
reportA(a); cout << "\t";
reportB(*h); cout << endl;
}
else {
reportAIntersect(a,s,e); cout << "\t";
reportB(*h); cout << endl;
}
}
}
}
......@@ -198,39 +215,53 @@ void BedIntersect::IntersectBed() {
if (bedA->parseBedLine(a, bedFields, lineNum)) {
vector<BED> hits;
bedB->binKeeperFind(bedB->bedMap[a.chrom], a.start, a.end, hits);
int numOverlaps = 0;
for (vector<BED>::iterator h = hits.begin(); h != hits.end(); ++h) {
int s = max(a.start, h->start);
int e = min(a.end, h->end);
if (s < e) {
// is there enough overlap (default ~ 1bp)
if ( ((float)(e-s) / (float)(a.end - a.start)) > this->overlapFraction ) {
numOverlaps++;
if (!anyHit && !noHit && !writeCount) {
if (!writeB) {
reportAIntersect(a,s,e); cout << endl;
if (writeA) {
reportA(a); cout << endl;
}
else {
reportAIntersect(a,s,e); cout << endl;
}
}
else {
reportAIntersect(a,s,e); cout << "\t";
reportB(*h); cout << endl;
if (writeA) {
reportA(a); cout << "\t";
reportB(*h); cout << endl;
}
else {
reportAIntersect(a,s,e); cout << "\t";
reportB(*h); cout << endl;
}
}
}
}
}
if (anyHit && (numOverlaps >= 1)) {
reportA(a); cout << endl;
}
else if (writeCount) {
reportA(a); cout << "\t" << numOverlaps << endl;
}
else if (noHit && (numOverlaps == 0)) {
reportA(a); cout << endl;
}
}
}
if (anyHit && (numOverlaps >= 1)) {
reportA(a); cout << endl;
}
else if (writeCount) {
reportA(a); cout << "\t" << numOverlaps << endl;
}
else if (noHit && (numOverlaps == 0)) {
reportA(a); cout << endl;
}
}
}
}
}
}
// END Intersect BED3
......
......@@ -25,7 +25,7 @@ class BedIntersect {
public:
// constructor
BedIntersect(string &, string &, bool &, bool &, float &, bool &, bool &);
BedIntersect(string &, string &, bool &, bool &, bool &, float &, bool &, bool &);
// destructor
~BedIntersect(void);
......@@ -44,6 +44,7 @@ private:
string bedBFile;
string notInBFile;
bool anyHit;
bool writeA;
bool writeB;
bool writeCount;
float overlapFraction;
......
......@@ -25,13 +25,17 @@ int main(int argc, char* argv[]) {
// input arguments
float overlapFraction = 1E-9;
int slop = 0;
bool haveBedA = false;
bool haveBedB = false;
bool noHit = false;
bool anyHit = false;
bool writeA = false;
bool writeB = false;
bool writeCount = false;
bool haveFraction = false;
bool haveSlop = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
......@@ -53,34 +57,38 @@ int main(int argc, char* argv[]) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-a", 2, parameterLength)) {
haveBedA = true;
bedAFile = argv[i + 1];
if ((i+1) < argc) {
haveBedA = true;
bedAFile = argv[i + 1];
}
i++;
}
else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
haveBedB = true;
bedBFile = argv[i + 1];
if ((i+1) < argc) {
haveBedB = true;
bedBFile = argv[i + 1];
}
i++;
}
else if(PARAMETER_CHECK("-u", 2, parameterLength)) {
anyHit = true;
i++;
}
else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
haveFraction = true;
overlapFraction = atof(argv[i + 1]);
i++;
}
else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {
writeA = true;
}
else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {
writeB = true;
i++;
}
else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
writeCount = true;
i++;
}
else if (PARAMETER_CHECK("-v", 2, parameterLength)) {
noHit = true;
i++;
}
else {
cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
......@@ -103,10 +111,15 @@ int main(int argc, char* argv[]) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -wb OR -c, not both." << endl << "*****" << endl;
showHelp = true;
}
if (anyHit && writeCount) {
cerr << endl << "*****" << endl << "*****ERROR: Request either -u OR -c, not both." << endl << "*****" << endl;
showHelp = true;
}
if (!showHelp) {
BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeB, overlapFraction, noHit, writeCount);
BedIntersect *bi = new BedIntersect(bedAFile, bedBFile, anyHit, writeA, writeB, overlapFraction, noHit, writeCount);
bi->IntersectBed();
return 0;
}
......@@ -133,9 +146,11 @@ void ShowHelp(void) {
cerr << "OPTIONS: " << endl;
cerr << "\t" << "-u\t\t\t" << "Write ORIGINAL a.bed entry ONCE if ANY overlap bed." << endl << "\t\t\t\tIn other words, ignore multiple overlaps." << endl << endl;
cerr << "\t" << "-v \t\t\t" << "Only report those entries in A that have NO OVERLAP in B." << endl << "\t\t\t\tSimilar to grep -v." << endl << endl;
cerr << "\t" << "-s (100000)\t\t" << "Slop added before and after each entry in A" << endl << "\t\t\t\tUseful for finding overlaps within N bases upstream and downstream." << endl << endl;
cerr << "\t" << "-f (e.g. 0.05)\t\t" << "Minimum overlap req'd as fraction of a.bed." << endl << "\t\t\t\tDefault is 1E-9 (effectively 1bp)." << endl << endl;
cerr << "\t" << "-c \t\t\t" << "For each entry in A, report the number of hits in B while restricting to -f." << endl << "\t\t\t\tReports 0 for A entries that have no overlap with B." << endl << endl;
cerr << "\t" << "-wb \t\t\t" << "Write the entry in B for each overlap." << endl << "\t\t\t\tUseful for knowing _what_ A overlaps. Restricted by -f." << endl << endl;
cerr << "\t" << "-wa \t\t\t" << "Write the original entry in A for each overlap." << endl << endl;
cerr << "\t" << "-wb \t\t\t" << "Write the original entry in B for each overlap." << endl << "\t\t\t\tUseful for knowing _what_ A overlaps. Restricted by -f." << endl << endl;
cerr << "NOTES: " << endl;
cerr << "\t" << "-i stdin\t\t" << "Allows intersectBed to read BED from stdin. E.g.: cat a.bed | intersectBed -a stdin -b B.bed" << endl << endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment