Skip to content
Snippets Groups Projects
Commit b5e9b670 authored by Aaron's avatar Aaron
Browse files

shuffleBed now uses the GenomeFile class.

parent ea0b66ae
No related branches found
No related tags found
No related merge requests found
...@@ -9,14 +9,14 @@ BIN_DIR = ../../bin/ ...@@ -9,14 +9,14 @@ BIN_DIR = ../../bin/
# ------------------- # -------------------
# define our includes # define our includes
# ------------------- # -------------------
INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/genomeFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/
# ---------------------------------- # ----------------------------------
# define our source and object files # define our source and object files
# ---------------------------------- # ----------------------------------
SOURCES= shuffleBedMain.cpp shuffleBed.cpp SOURCES= shuffleBedMain.cpp shuffleBed.cpp
OBJECTS= $(SOURCES:.cpp=.o) OBJECTS= $(SOURCES:.cpp=.o)
_EXT_OBJECTS=bedFile.o lineFileUtilities.o _EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= shuffleBed PROGRAM= shuffleBed
...@@ -37,6 +37,7 @@ $(BUILT_OBJECTS): $(SOURCES) ...@@ -37,6 +37,7 @@ $(BUILT_OBJECTS): $(SOURCES)
$(EXT_OBJECTS): $(EXT_OBJECTS):
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
@$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
clean: clean:
@echo "Cleaning up." @echo "Cleaning up."
......
...@@ -12,17 +12,16 @@ ...@@ -12,17 +12,16 @@
#include "lineFileUtilities.h" #include "lineFileUtilities.h"
#include "shuffleBed.h" #include "shuffleBed.h"
/*
Constructor
*/
BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, bool &haveSeed, bool &haveExclude, bool &sameChrom, int &seed) { BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, bool &haveSeed, bool &haveExclude, bool &sameChrom, int &seed) {
this->bedFile = bedFile;
this->genomeFile = genomeFile; this->bedFile = bedFile;
this->genomeFile = genomeFile;
this->excludeFile = excludeFile; this->excludeFile = excludeFile;
this->sameChrom = sameChrom; this->sameChrom = sameChrom;
this->haveExclude = haveExclude; this->haveExclude = haveExclude;
this->haveSeed = haveSeed; this->haveSeed = haveSeed;
this->numChroms = 0;
// use the supplied seed for the random // use the supplied seed for the random
// number generation if given. else, // number generation if given. else,
...@@ -35,7 +34,10 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, ...@@ -35,7 +34,10 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile,
srand((unsigned)time(0)); srand((unsigned)time(0));
} }
this->bed = new BedFile(bedFile); this->bed = new BedFile(bedFile);
this->genome = new GenomeFile(genomeFile);
this->chroms = genome->getChromList();
this->numChroms = genome->getNumberOfChroms();
if (this->haveExclude) { if (this->haveExclude) {
this->exclude = new BedFile(excludeFile); this->exclude = new BedFile(excludeFile);
...@@ -44,40 +46,13 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, ...@@ -44,40 +46,13 @@ BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile,
} }
/*
Destructor
*/
BedShuffle::~BedShuffle(void) { BedShuffle::~BedShuffle(void) {
} }
void BedShuffle::Shuffle(istream &bedInput) { void BedShuffle::Shuffle(istream &bedInput) {
// open the GENOME file for reading.
// if successful, load each chrom and it's size into
// the "chromSize" map. also compute the total size of the genome
// and store in "genomeSize"
ifstream genome(this->genomeFile.c_str(), ios::in);
if ( !genome ) {
cerr << "Error: The requested genome file (" <<this->genomeFile << ") could not be opened. Exiting!" << endl;
exit (1);
}
else {
string chrom;
unsigned int size;
while (genome >> chrom >> size) {
if (chrom.size() > 0 && size > 0) {
this->chromSizes[chrom] = size;
this->chroms.push_back(chrom);
this->numChroms++;
}
}
}
int lineNum = 0; int lineNum = 0;
string bedLine; // used to store the current (unparsed) line from the BED file. string bedLine; // used to store the current (unparsed) line from the BED file.
vector<string> bedFields; vector<string> bedFields;
...@@ -90,7 +65,6 @@ void BedShuffle::Shuffle(istream &bedInput) { ...@@ -90,7 +65,6 @@ void BedShuffle::Shuffle(istream &bedInput) {
BED bedEntry; // used to store the current BED line from the BED file. BED bedEntry; // used to store the current BED line from the BED file.
if (bed->parseLine(bedEntry, bedFields, lineNum)) { if (bed->parseLine(bedEntry, bedFields, lineNum)) {
// choose a new locus for this feat // choose a new locus for this feat
ChooseLocus(bedEntry); ChooseLocus(bedEntry);
bed->reportBedNewLine(bedEntry); bed->reportBedNewLine(bedEntry);
...@@ -103,28 +77,6 @@ void BedShuffle::Shuffle(istream &bedInput) { ...@@ -103,28 +77,6 @@ void BedShuffle::Shuffle(istream &bedInput) {
void BedShuffle::ShuffleWithExclusions(istream &bedInput) { void BedShuffle::ShuffleWithExclusions(istream &bedInput) {
// open the GENOME file for reading.
// if successful, load each chrom and it's size into
// the "chromSize" map. also compute the total size of the genome
// and store in "genomeSize".
ifstream genome(this->genomeFile.c_str(), ios::in);
if ( !genome ) {
cerr << "Error: The requested genome file (" <<this->genomeFile << ") could not be opened. Exiting!" << endl;
exit (1);
}
else {
string chrom;
unsigned int size;
while (genome >> chrom >> size) {
if (chrom.size() > 0 && size > 0) {
this->chromSizes[chrom] = size;
this->chroms.push_back(chrom);
this->numChroms++;
}
}
}
int lineNum = 0; int lineNum = 0;
string bedLine; // used to store the current (unparsed) line from the BED file. string bedLine; // used to store the current (unparsed) line from the BED file.
vector<string> bedFields; vector<string> bedFields;
...@@ -148,12 +100,14 @@ void BedShuffle::ShuffleWithExclusions(istream &bedInput) { ...@@ -148,12 +100,14 @@ void BedShuffle::ShuffleWithExclusions(istream &bedInput) {
exclude->FindOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end, bedEntry.strand, hits, false); exclude->FindOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end, bedEntry.strand, hits, false);
bool haveOverlap = false; bool haveOverlap = false;
for (vector<BED>::const_iterator h = hits.begin(); h != hits.end(); ++h) { vector<BED>::const_iterator hitsItr = hits.begin();
vector<BED>::const_iterator hitsEnd = hits.end();
for (; hitsItr != hitsEnd; ++hitsItr) {
int s = max(bedEntry.start, h->start); int s = max(bedEntry.start, hitsItr->start);
int e = min(bedEntry.end, h->end); int e = min(bedEntry.end, hitsItr->end);
if ( (e-s) > 0) { if ( (e - s) > 0) {
haveOverlap = true; haveOverlap = true;
break; /* stop looking. one overlap is enough*/ break; /* stop looking. one overlap is enough*/
} }
...@@ -175,12 +129,14 @@ void BedShuffle::ShuffleWithExclusions(istream &bedInput) { ...@@ -175,12 +129,14 @@ void BedShuffle::ShuffleWithExclusions(istream &bedInput) {
bedEntry.strand, hits, false); bedEntry.strand, hits, false);
haveOverlap = false; haveOverlap = false;
for (vector<BED>::const_iterator h = hits.begin(); h != hits.end(); ++h) { vector<BED>::const_iterator hitsItr = hits.begin();
vector<BED>::const_iterator hitsEnd = hits.end();
for (; hitsItr != hitsEnd; ++hitsItr) {
int s = max(bedEntry.start, h->start); int s = max(bedEntry.start, hitsItr->start);
int e = min(bedEntry.end, h->end); int e = min(bedEntry.end, hitsItr->end);
if ( (e-s) > 0) { if ( (e - s) > 0) {
haveOverlap = true; haveOverlap = true;
break; /* stop looking. one overlap is enough*/ break; /* stop looking. one overlap is enough*/
} }
...@@ -204,29 +160,27 @@ void BedShuffle::ShuffleWithExclusions(istream &bedInput) { ...@@ -204,29 +160,27 @@ void BedShuffle::ShuffleWithExclusions(istream &bedInput) {
void BedShuffle::ChooseLocus(BED &bedEntry) { void BedShuffle::ChooseLocus(BED &bedEntry) {
string chrom = bedEntry.chrom; string chrom = bedEntry.chrom;
int start = bedEntry.start; int start = bedEntry.start;
int end = bedEntry.end; int end = bedEntry.end;
int length = end - start; int length = end - start;
string randomChrom; string randomChrom;
int randomStart; int randomStart;
int chromSize; int chromSize;
if (!this->sameChrom) { if (!this->sameChrom) {
randomChrom = this->chroms[rand() % this->numChroms]; randomChrom = chroms[rand() % this->numChroms];
chromSize = this->chromSizes[randomChrom]; chromSize = genome->getChromSize(randomChrom);
randomStart = rand() % chromSize; randomStart = rand() % chromSize;
bedEntry.chrom = randomChrom; bedEntry.chrom = randomChrom;
bedEntry.start = randomStart; bedEntry.start = randomStart;
bedEntry.end = randomStart + length; bedEntry.end = randomStart + length;
} }
else { else {
chromSize = this->chromSizes[chrom]; chromSize = genome->getChromSize(chrom);
randomStart = rand() % chromSize; randomStart = rand() % chromSize;
bedEntry.start = randomStart; bedEntry.start = randomStart;
bedEntry.end = randomStart + length; bedEntry.end = randomStart + length;
} }
// ensure that the chosen location doesn't go past // ensure that the chosen location doesn't go past
...@@ -234,20 +188,18 @@ void BedShuffle::ChooseLocus(BED &bedEntry) { ...@@ -234,20 +188,18 @@ void BedShuffle::ChooseLocus(BED &bedEntry) {
// for a new spot. // for a new spot.
while (bedEntry.end > chromSize) { while (bedEntry.end > chromSize) {
if (!this->sameChrom) { if (!this->sameChrom) {
randomChrom = this->chroms[rand() % this->numChroms]; randomChrom = chroms[rand() % this->numChroms];
chromSize = this->chromSizes[randomChrom]; chromSize = genome->getChromSize(randomChrom);
randomStart = rand() % chromSize; randomStart = rand() % chromSize;
bedEntry.chrom = randomChrom; bedEntry.chrom = randomChrom;
bedEntry.start = randomStart; bedEntry.start = randomStart;
bedEntry.end = randomStart + length; bedEntry.end = randomStart + length;
} }
else { else {
chromSize = this->chromSizes[chrom]; chromSize = genome->getChromSize(chrom);
randomStart = rand() % chromSize; randomStart = rand() % chromSize;
bedEntry.start = randomStart; bedEntry.start = randomStart;
bedEntry.end = randomStart + length; bedEntry.end = randomStart + length;
} }
} }
} }
...@@ -260,13 +212,21 @@ void BedShuffle::DetermineBedInput() { ...@@ -260,13 +212,21 @@ void BedShuffle::DetermineBedInput() {
cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl; cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl;
exit (1); exit (1);
} }
if (this->haveExclude) { ShuffleWithExclusions(beds); } if (this->haveExclude) {
else {Shuffle(beds); } ShuffleWithExclusions(beds);
}
else {
Shuffle(beds);
}
} }
else { else {
// process stdin // process stdin
if (this->haveExclude) { ShuffleWithExclusions(cin); } if (this->haveExclude) {
else {Shuffle(cin); } ShuffleWithExclusions(cin);
}
else {
Shuffle(cin);
}
} }
} }
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
Licenced under the GNU General Public License 2.0+ license. Licenced under the GNU General Public License 2.0+ license.
******************************************************************************/ ******************************************************************************/
#include "bedFile.h" #include "bedFile.h"
#include "genomeFile.h"
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
...@@ -29,7 +31,8 @@ class BedShuffle { ...@@ -29,7 +31,8 @@ class BedShuffle {
public: public:
// constructor // constructor
BedShuffle(string &, string &, string &, bool &, bool &, bool &, int &); BedShuffle(string &bedFile, string &genomeFile, string &excludeFile,
bool &haveSeed, bool &haveExclude, bool &sameChrom, int &seed);
// destructor // destructor
~BedShuffle(void); ~BedShuffle(void);
...@@ -56,7 +59,8 @@ private: ...@@ -56,7 +59,8 @@ private:
BedFile *bed; BedFile *bed;
BedFile *exclude; BedFile *exclude;
map<string, int, less<string> > chromSizes; GenomeFile *genome;
vector<string> chroms; vector<string> chroms;
int numChroms; int numChroms;
}; };
...@@ -123,7 +123,7 @@ void ShowHelp(void) { ...@@ -123,7 +123,7 @@ void ShowHelp(void) {
cerr << "Summary: Randomly permute the locations of a BED file among a genome." << endl << endl; cerr << "Summary: Randomly permute the locations of a BED file among a genome." << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -g <genome> -i <bed>" << endl << endl; cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed> -g <genome>" << endl << endl;
cerr << "Options: " << endl; cerr << "Options: " << endl;
cerr << "\t-excl\t" << "A BED file of coordinates in which features in -i" << endl; cerr << "\t-excl\t" << "A BED file of coordinates in which features in -i" << endl;
......
...@@ -79,4 +79,8 @@ int GenomeFile::getChromSize(const string &chrom) { ...@@ -79,4 +79,8 @@ int GenomeFile::getChromSize(const string &chrom) {
vector<string> GenomeFile::getChromList() { vector<string> GenomeFile::getChromList() {
return _chromList; return _chromList;
} }
\ No newline at end of file
int GenomeFile::getNumberOfChroms() {
return _chromList.size();
}
...@@ -42,6 +42,7 @@ public: ...@@ -42,6 +42,7 @@ public:
int getChromSize(const string &chrom); int getChromSize(const string &chrom);
vector<string> getChromList(); vector<string> getChromList();
int getNumberOfChroms();
private: private:
string genomeFile; string genomeFile;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment