From 5795975fd8b2c61f628abc9bafa51d6d6fe67cfa Mon Sep 17 00:00:00 2001 From: nkindlon <nek3d@virginia.edu> Date: Thu, 3 Oct 2013 19:20:21 -0400 Subject: [PATCH] added BamRecord::printUnmapped method for noHit and count functionality. --- src/utils/FileRecordTools/FileRecordMgr.cpp | 20 +++---- src/utils/FileRecordTools/RecordOutputMgr.cpp | 6 +- .../FileRecordTools/Records/BamRecord.cpp | 59 ++++++++++++++----- src/utils/FileRecordTools/Records/BamRecord.h | 7 ++- src/utils/FileRecordTools/Records/Record.cpp | 5 -- src/utils/FileRecordTools/Records/Record.h | 2 + 6 files changed, 64 insertions(+), 35 deletions(-) diff --git a/src/utils/FileRecordTools/FileRecordMgr.cpp b/src/utils/FileRecordTools/FileRecordMgr.cpp index 08998aaa..9bc93bdf 100644 --- a/src/utils/FileRecordTools/FileRecordMgr.cpp +++ b/src/utils/FileRecordTools/FileRecordMgr.cpp @@ -126,18 +126,16 @@ Record *FileRecordMgr::allocateAndGetNextRecord() } // In the rare case of Bam records where both the read and it's mate failed to map, // Ignore the record. Delete and return null - if (record->isUnmapped() && record->isMateUnmapped()) { - _recordMgr->deleteRecord(record); - return NULL; - } - if (!record->coordsValid()) { - cerr << "Error: Invalid record in file " << _filename << ". Record is " << endl << *record << endl; - exit(1); - } + if (!(record->isUnmapped() && record->isMateUnmapped())) { + if (!record->coordsValid()) { + cerr << "Error: Invalid record in file " << _filename << ". Record is " << endl << *record << endl; + exit(1); + } - //test for sorted order, if necessary. - if (_context->getSortedInput()) { - testInputSortOrder(record); + //test for sorted order, if necessary. + if (_context->getSortedInput()) { + testInputSortOrder(record); + } } assignChromId(record); _totalRecordLength += (unsigned long)(record->getEndPos() - record->getStartPos()); diff --git a/src/utils/FileRecordTools/RecordOutputMgr.cpp b/src/utils/FileRecordTools/RecordOutputMgr.cpp index 76f9a405..4f10ff3b 100644 --- a/src/utils/FileRecordTools/RecordOutputMgr.cpp +++ b/src/utils/FileRecordTools/RecordOutputMgr.cpp @@ -97,7 +97,11 @@ RecordOutputMgr::printBamType RecordOutputMgr::printBamRecord(RecordKeyList &key return BAM_AS_BAM; } else { if (!bamOutputOnly) { - static_cast<const BamRecord *>(record)->print(_outBuf, _currBlockList); + if (record->isUnmapped()) { + record->printUnmapped(_outBuf); + } else { + static_cast<const BamRecord *>(record)->print(_outBuf, _currBlockList); + } } return BAM_AS_BED; } diff --git a/src/utils/FileRecordTools/Records/BamRecord.cpp b/src/utils/FileRecordTools/Records/BamRecord.cpp index 39096456..0ea849a3 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.cpp +++ b/src/utils/FileRecordTools/Records/BamRecord.cpp @@ -89,24 +89,51 @@ void BamRecord::printRemainingBamFields(QuickString &outBuf, RecordKeyList *keyL outBuf.append(_endPos); outBuf.append("\t0,0,0", 6); outBuf.append('\t'); - outBuf.append((int)keyList->size()); - - vector<int> blockLengths; - vector<int> blockStarts; - for (RecordKeyList::const_iterator_type iter = keyList->begin(); iter != keyList->end(); iter = keyList->next()) { - const Record *block = iter->value(); - blockLengths.push_back(block->getEndPos() - block->getStartPos()); - blockStarts.push_back(block->getStartPos() - _bamAlignment.Position); - } - outBuf.append('\t'); - for (int i=0; i < (int)blockLengths.size(); i++) { - outBuf.append(blockLengths[i]); - outBuf.append(','); + int numBlocks = (int)keyList->size(); + + if (numBlocks > 0) { + outBuf.append(numBlocks); + + vector<int> blockLengths; + vector<int> blockStarts; + for (RecordKeyList::const_iterator_type iter = keyList->begin(); iter != keyList->end(); iter = keyList->next()) { + const Record *block = iter->value(); + blockLengths.push_back(block->getEndPos() - block->getStartPos()); + blockStarts.push_back(block->getStartPos() - _bamAlignment.Position); + } + + outBuf.append('\t'); + for (int i=0; i < (int)blockLengths.size(); i++) { + outBuf.append(blockLengths[i]); + outBuf.append(','); + } + outBuf.append('\t'); + for (int i=0; i < (int)blockStarts.size(); i++) { + outBuf.append( blockStarts[i]); + outBuf.append(','); + } } + else { + outBuf.append("1\t0,\t0,"); + } +} + +void BamRecord::printUnmapped(QuickString &outBuf) const { + outBuf.append(_chrName); + outBuf.append("\t-1\t-1\t"); + outBuf.append(_name); outBuf.append('\t'); - for (int i=0; i < (int)blockStarts.size(); i++) { - outBuf.append( blockStarts[i]); - outBuf.append(','); + outBuf.append(_score); + outBuf.append("\t.\t-1\t-1\t-1\t0,0,0\t0\t.\t."); // dot for strand, -1 for blockStarts and blockEnd +} + +bool BamRecord::sameChromIntersects(const Record *record, + bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const +{ + // Special: For BAM records that are unmapped, intersect should automatically return false + if (_isUnmapped || record->isUnmapped()) { + return false; } + return Record::sameChromIntersects(record, wantSameStrand, wantDiffStrand, overlapFraction, reciprocal); } diff --git a/src/utils/FileRecordTools/Records/BamRecord.h b/src/utils/FileRecordTools/Records/BamRecord.h index bd275e79..3029b28e 100644 --- a/src/utils/FileRecordTools/Records/BamRecord.h +++ b/src/utils/FileRecordTools/Records/BamRecord.h @@ -30,14 +30,17 @@ public: virtual void print(QuickString &outBuf, RecordKeyList *keyList) const; virtual void print(QuickString &outBuf, const QuickString & start, const QuickString & end, RecordKeyList *keyList) const; virtual void printNull(QuickString &outBuf) const; - void printRemainingBamFields(QuickString &outBuf, RecordKeyList *keyList) const; - + virtual void printRemainingBamFields(QuickString &outBuf, RecordKeyList *keyList) const; + virtual void printUnmapped(QuickString &outBuf) const; virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::BAM_RECORD_TYPE; } const BamTools::BamAlignment &getAlignment() const { return _bamAlignment; } int getBamChromId() const { return _bamChromId; } + virtual bool sameChromIntersects(const Record *otherRecord, + bool sameStrand, bool diffStrand, float overlapFraction, bool reciprocal) const; + protected: BamTools::BamAlignment _bamAlignment; int _bamChromId; //different from chromId, because BAM file may be in different order diff --git a/src/utils/FileRecordTools/Records/Record.cpp b/src/utils/FileRecordTools/Records/Record.cpp index e5bc1e6f..8ec1efd5 100644 --- a/src/utils/FileRecordTools/Records/Record.cpp +++ b/src/utils/FileRecordTools/Records/Record.cpp @@ -135,11 +135,6 @@ bool Record::intersects(const Record *record, bool Record::sameChromIntersects(const Record *record, bool wantSameStrand, bool wantDiffStrand, float overlapFraction, bool reciprocal) const { - // Special: For BAM records that are unmapped, intersect should automatically return false - if (_isUnmapped || record->_isUnmapped) { - return false; - } - //If user requested hits only on same strand, or only on different strands, //rule out different strandedness first. //If the strand is unknown in either case, then queries regarding strandedness diff --git a/src/utils/FileRecordTools/Records/Record.h b/src/utils/FileRecordTools/Records/Record.h index 9c5c3ff1..9f80a960 100644 --- a/src/utils/FileRecordTools/Records/Record.h +++ b/src/utils/FileRecordTools/Records/Record.h @@ -92,6 +92,8 @@ public: // Bam record. bool isUnmapped() const { return _isUnmapped; } bool isMateUnmapped() const { return _isMateUnmapped; } + virtual void printUnmapped(QuickString &outBuf) const {} + virtual bool operator < (const Record &other) const; -- GitLab