Skip to content
Snippets Groups Projects
Commit 7434d8a3 authored by nkindlon's avatar nkindlon
Browse files

Stop Chromsweeep cache scan when DB is after query; allow BAM unmapped reads/chrom in genome file.

parent 7bea8541
No related branches found
No related tags found
No related merge requests found
......@@ -23,12 +23,20 @@ NewGenomeFile::NewGenomeFile(const QuickString &genomeFilename)
NewGenomeFile::NewGenomeFile(const BamTools::RefVector &refVector)
: _maxId(-1)
{
for (size_t i = 0; i < refVector.size(); ++i) {
size_t i = 0;
for (; i < refVector.size(); ++i) {
QuickString chrom = refVector[i].RefName;
CHRPOS length = refVector[i].RefLength;
_maxId++;
_chromSizeIds[chrom] = pair<CHRPOS, CHRPOS>(length, _maxId);
_chromList.push_back(chrom);
}
// Special: BAM files can have unmapped reads, which show as no chromosome, or an empty chrom string.
// Add in an empty chrom so these don't error.
_maxId++;
_chromSizeIds[""] = pair<CHRPOS, int>(0, _maxId);
_chromList.push_back("");
}
// Destructor
......@@ -69,6 +77,13 @@ void NewGenomeFile::loadGenomeFileIntoMap() {
cerr << "Error: The genome file " << _genomeFileName << " has no valid entries. Exiting." << endl;
exit(1);
}
// Special: BAM files can have unmapped reads, which show as no chromosome, or an empty chrom string.
// Add in an empty chrom so these don't error.
_maxId++;
_chromSizeIds[""] = pair<CHRPOS, int>(0, _maxId);
_chromList.push_back("");
_startOffsets.push_back(_genomeLength); //insert the final length as the last element
//to help with the lower_bound call in the projectOnGenome method.
genFile.close();
......
......@@ -38,7 +38,7 @@ public:
CHRPOS getChromSize(const QuickString &chrom); // return the size of a chromosome
CHRPOS getChromId(const QuickString &chrom); // return chromosome's sort order
const vector<QuickString> &getChromList() const { return _chromList; } // return a list of chrom names
CHRPOS getNumberOfChroms() const { return _chromList.size(); }
CHRPOS getNumberOfChroms() const { return _chromList.size() -1; }//the -1 excludes the blank chrom added for unmapped reads
const QuickString &getGenomeFileName() const { return _genomeFileName; }
......
......@@ -99,7 +99,7 @@ void NewChromSweep::scanCache(int dbIdx, RecordKeyVector &retList) {
if (_currQueryRec->sameChrom(cacheRec) && !(_currQueryRec->after(cacheRec))) {
if (intersects(_currQueryRec, cacheRec)) {
retList.push_back(cacheRec);
}
} else break; // cacheRec is after the query rec, stop scanning.
cacheIter = _caches[dbIdx].next();
}
else {
......
......@@ -23,7 +23,7 @@ if true; then
echo "generating data..."
mkdir perfData
cd perfData
../$BT random -l 1000 -n 20000000 -g ../human.hg19.genome | sort -k1,1 -k2,2n > a10M.bed
../$BT random -l 1000 -n 10000000 -g ../human.hg19.genome | sort -k1,1 -k2,2n > a10M.bed
../$BT random -l 1000 -n 10000000 -g ../human.hg19.genome | sort -k1,1 -k2,2n > b10M.bed
cp a10M.bed a10M_gzipped.bed
gzip a10M_gzipped.bed
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment