Skip to content
Snippets Groups Projects
Commit ca4265ec authored by nkindlon's avatar nkindlon
Browse files

fixed negative coords, zero len intervals, Bed4 format error

parent 36fdad10
No related branches found
No related tags found
No related merge requests found
......@@ -44,10 +44,10 @@ FileRecordMgr::~FileRecordMgr(){
bool FileRecordMgr::open(){
const QuickString &filename = _context->getInputFileName(_contextFileIdx);
_bufStreamMgr = new BufferedStreamMgr(filename);
_filename = _context->getInputFileName(_contextFileIdx);
_bufStreamMgr = new BufferedStreamMgr(_filename);
if (!_bufStreamMgr->init()) {
fprintf(stderr, "Error: unable to open file or unable to determine types for file %s.\n", filename.c_str());
cerr << "Error: unable to open file or unable to determine types for file " << _filename << endl;
delete _bufStreamMgr;
_bufStreamMgr = NULL;
exit(1);
......@@ -56,7 +56,7 @@ bool FileRecordMgr::open(){
_fileType = _bufStreamMgr->getTypeChecker().getFileType();
_recordType = _bufStreamMgr->getTypeChecker().getRecordType();
if (_fileType == FileRecordTypeChecker::UNKNOWN_FILE_TYPE || _recordType == FileRecordTypeChecker::UNKNOWN_RECORD_TYPE) {
fprintf(stderr, "Error: Unable to determine type for file %s.\n", filename.c_str());
cerr << "Error: Unable to determine type for file " << _filename << endl;
delete _bufStreamMgr;
_bufStreamMgr = NULL;
exit(1);
......@@ -64,11 +64,11 @@ bool FileRecordMgr::open(){
allocateFileReader();
_recordMgr = new RecordMgr(_recordType, _freeListBlockSize);
_fileReader->setFileName(filename.c_str());
_fileReader->setFileName(_filename.c_str());
_fileReader->setInputStream(_bufStreamMgr);
_fileReader->setContext(_context);
if (!_fileReader->open()) {
fprintf(stderr, "Error: Types determined but can't open file %s.\n", filename.c_str());
cerr << "Error: Types determined but can't open file " << _filename << endl;
delete _bufStreamMgr;
_bufStreamMgr = NULL;
exit(1);
......@@ -125,6 +125,11 @@ Record *FileRecordMgr::allocateAndGetNextRecord()
return NULL;
}
if (!record->coordsValid()) {
cerr << "Error: Invalid record in file " << _filename << ". Record is " << endl << *record << endl;
exit(1);
}
//test for sorted order, if necessary.
if (_context->getSortedInput()) {
testInputSortOrder(record);
......@@ -184,17 +189,16 @@ void FileRecordMgr::testInputSortOrder(Record *record)
void FileRecordMgr::sortError(const Record *record, bool genomeFileError)
{
if (genomeFileError) {
fprintf(stderr, "Error: Sorted input specified, but the file %s has the following record with a different sort order than the genomeFile %s:\n",
_context->getInputFileName(_contextFileIdx).c_str(), _context->getGenomeFile()->getGenomeFileName().c_str());
cerr << "Error: Sorted input specified, but the file " << _filename << " has the following record with a different sort order than the genomeFile " <<
_context->getGenomeFile()->getGenomeFileName() << endl;
} else {
fprintf(stderr, "Error: Sorted input specified, but the file %s has the following out of order record:\n", _context->getInputFileName(_contextFileIdx).c_str());
cerr << "Error: Sorted input specified, but the file " << _filename << " has the following out of order record" << endl;
}
QuickString errBuf;
record->print(errBuf);
fprintf(stderr, "%s\n", errBuf.c_str());
cerr << *record << endl;
exit(1);
}
void FileRecordMgr::deleteRecord(const Record *record) {
_recordMgr->deleteRecord(record);
}
......
......@@ -141,6 +141,7 @@ public:
void setFullBamFlags(bool flag) { _useFullBamTags = flag; }
private:
QuickString _filename;
BufferedStreamMgr *_bufStreamMgr;
int _contextFileIdx;
......
......@@ -113,6 +113,7 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi
_outBuf.append(_context->getHeader(_context->getQueryFileIdx()));
_context->setPrintHeader(false);
}
const_cast<Record *>(keyList.getKey())->undoZeroLength();
_currBlockList = blockList;
......@@ -157,15 +158,67 @@ void RecordOutputMgr::printRecord(RecordKeyList &keyList, RecordKeyList *blockLi
void RecordOutputMgr::reportOverlapDetail(const Record *keyRecord, const Record *hitRecord)
{
//get the max start and min end as strings.
const QuickString &startStr = keyRecord->getStartPos() > hitRecord->getStartPos() ? keyRecord->getStartPosStr() : hitRecord->getStartPosStr();
const QuickString &endStr = keyRecord->getEndPos() < hitRecord->getEndPos() ? keyRecord->getEndPosStr() : hitRecord->getEndPosStr();
const_cast<Record *>(hitRecord)->undoZeroLength();
int maxStart = max(keyRecord->getStartPos(), hitRecord->getStartPos());
int minEnd = min(keyRecord->getEndPos(), hitRecord->getEndPos());
const QuickString *startStr = NULL;
const QuickString *endStr = NULL;
int maxStart = 0;
int minEnd = 0;
int keyStart = keyRecord->getStartPos();
int keyEnd = keyRecord->getEndPos();
int hitStart = hitRecord->getStartPos();
int hitEnd = hitRecord->getEndPos();
if ( keyStart>= hitStart) {
//the key start is after the hit start, but we need to check and make sure the hit end is at least after the keyStart.
//The reason for this is that, in some rare cases, such as both the key and hit having been zero length intervals,
//the normal process for intersection that allows us to simply report the maxStart and minEnd do not necessarily apply.
if (hitEnd >= keyStart) {
//this is ok. We have a normal intersection where the key comes after the hit.
maxStart = keyStart;
startStr = &(keyRecord->getStartPosStr());
minEnd = min(keyEnd, hitEnd);
endStr = keyRecord->getEndPos() < hitRecord->getEndPos() ? &(keyRecord->getEndPosStr()) : &(hitRecord->getEndPosStr());
} else {
//this is the weird case of not a "real" intersection. The keyStart is greater than the hitEnd. So just report the key as is.
maxStart = keyStart;
minEnd = keyEnd;
startStr = &(keyRecord->getStartPosStr());
endStr = &(keyRecord->getEndPosStr());
}
} else {
//all of the above, but backwards. keyStart is before hitStart.
if (keyEnd >= hitStart) {
//normal intersection, key first
maxStart = hitStart;
startStr = &(hitRecord->getStartPosStr());
minEnd = min(keyEnd, hitEnd);
endStr = keyRecord->getEndPos() < hitRecord->getEndPos() ? &(keyRecord->getEndPosStr()) : &(hitRecord->getEndPosStr());
} else {
//this is the weird case of not a "real" intersection. The hitStart is greater than the keyEnd. So just report the hit as is.
maxStart = hitStart;
minEnd = hitEnd;
startStr = &(hitRecord->getStartPosStr());
endStr = &(hitRecord->getEndPosStr());
}
}
// const QuickString &startStr = keyRecord->getStartPos() > hitRecord->getStartPos() ? keyRecord->getStartPosStr() : hitRecord->getStartPosStr();
// const QuickString &endStr = keyRecord->getEndPos() < hitRecord->getEndPos() ? keyRecord->getEndPosStr() : hitRecord->getEndPosStr();
//
// int maxStart = max(keyRecord->getStartPos(), hitRecord->getStartPos());
// int minEnd = min(keyRecord->getEndPos(), hitRecord->getEndPos());
//
if (!_context->getWriteA() && !_context->getWriteB() && !_context->getWriteOverlap() && !_context->getLeftJoin()) {
printKey(keyRecord, startStr, endStr);
printKey(keyRecord, *startStr, *endStr);
newline();
}
else if ((_context->getWriteA() && _context->getWriteB()) || _context->getLeftJoin()) {
......@@ -179,7 +232,7 @@ void RecordOutputMgr::reportOverlapDetail(const Record *keyRecord, const Record
newline();
}
else if (_context->getWriteB()) {
printKey(keyRecord, startStr, endStr);
printKey(keyRecord, *startStr, *endStr);
tab();
hitRecord->print(_outBuf);
newline();
......
......@@ -23,6 +23,7 @@ bool Bed4Interval::initFromFile(SingleLineDelimTextFileReader *fileReader)
void Bed4Interval::print(QuickString &outBuf) const
{
Bed3Interval::print(outBuf);
outBuf.append('\t');
outBuf.append(_name);
}
......
......@@ -6,7 +6,8 @@ Record::Record()
: _chrId(-1),
_startPos(-1),
_endPos(-1),
_strand(UNKNOWN)
_strand(UNKNOWN),
_zeroLength(false)
{
}
......@@ -177,3 +178,37 @@ bool Record::sameChromIntersects(const Record *record,
return false;
}
bool Record::coordsValid() {
if (_startPos < 0 || _endPos < 0 || _endPos < _startPos) {
return false;
}
adjustZeroLength();
return true;
}
void Record::adjustZeroLength()
{
if (_startPos == _endPos) {
_zeroLength = true;
_startPos--;
_endPos++;
}
}
void Record::undoZeroLength()
{
if (_zeroLength) {
_startPos++;
_endPos--;
_zeroLength = false;
}
}
ostream &operator << (ostream &out, const Record &record)
{
QuickString errBuf;
record.print(errBuf);
out << errBuf;
return out;
}
......@@ -33,6 +33,8 @@ public:
virtual void print(QuickString &outBuf, int start, int end) const {}
virtual void print(QuickString &outBuf, const QuickString & start, const QuickString & end) const {}
virtual void printNull(QuickString &outBuf) const {}
friend ostream &operator << (ostream &out, const Record &record);
virtual const Record & operator=(const Record &);
virtual const QuickString &getChrName() const { return _chrName; }
......@@ -54,13 +56,14 @@ public:
virtual const QuickString &getEndPosStr() const { return _endPosStr; }
virtual void setEndPosStr(const QuickString &str) { _endPosStr = str; }
virtual bool getZeroLength() const { return _zeroLength; }
virtual void setZeroLength(bool val) { _zeroLength = val; }
virtual strandType getStrand() const { return _strand; }
virtual void setStrand(strandType val) { _strand = val; }
virtual void setStrand(char val);
virtual char getStrandChar() const;
//And we have a similar problem with name and score
virtual const QuickString &getName() const { return _name; }
virtual void setName(const QuickString &chr) { _name = chr; }
virtual void setName(const string &chr) { _name = chr; }
......@@ -73,6 +76,18 @@ public:
virtual FileRecordTypeChecker::RECORD_TYPE getType() const { return FileRecordTypeChecker::UNKNOWN_RECORD_TYPE; }
virtual bool coordsValid(); //test that no coords negative, end not less than start, check zeroLength (see below).
//Some files can have insertions of the form 2,2. If found this should translate to cover the base before and after,
//thus meaning the startPos is decremented and the endPos is incremented. This method will find and handle that case.
//Don't adjust the startPosStr and endPosStr strings because they aren't used in
//calculation. They're only used in output, and it would be slower to change them
//and then change them back.
virtual void adjustZeroLength();
virtual void undoZeroLength(); //change it back just before output;
virtual bool isZeroLength() const { return _zeroLength; }
virtual bool operator < (const Record &other) const;
virtual bool operator > (const Record &other) const;
......@@ -110,6 +125,7 @@ protected:
QuickString _name;
QuickString _score;
strandType _strand;
bool _zeroLength;
};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment