From ab96a16b4af2581ce5971da4dd426a7ea555ffdd Mon Sep 17 00:00:00 2001 From: Aaron <aaronquinlan@gmail.com> Date: Mon, 11 Jan 2010 15:50:21 -0500 Subject: [PATCH] Added Derek's changes to BamTools that were causing compile errors. --- src/utils/BamTools/BGZF.cpp | 54 ++++++++++----------- src/utils/BamTools/BGZF.h | 32 +++++++------ src/utils/BamTools/BamAux.h | 80 ++++++++++++++++++-------------- src/utils/BamTools/BamReader.cpp | 22 +++++---- src/utils/BamTools/BamWriter.cpp | 1 - 5 files changed, 101 insertions(+), 88 deletions(-) diff --git a/src/utils/BamTools/BGZF.cpp b/src/utils/BamTools/BGZF.cpp index d0242d6c..225ddf08 100644 --- a/src/utils/BamTools/BGZF.cpp +++ b/src/utils/BamTools/BGZF.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 8 December 2009 (DB) +// Last modified: 11 January 2010 (DB) // --------------------------------------------------------------------------- // BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -40,19 +40,24 @@ BgzfData::BgzfData(void) // destructor BgzfData::~BgzfData(void) { - if(CompressedBlock) delete [] CompressedBlock; - if(UncompressedBlock) delete [] UncompressedBlock; + if(CompressedBlock) { delete[] CompressedBlock; } + if(UncompressedBlock) { delete[] UncompressedBlock; } } // closes BGZF file void BgzfData::Close(void) { + // skip if file not open, otherwise set flag if (!IsOpen) { return; } IsOpen = false; - // flush the BGZF block - if ( IsWriteOnly ) { FlushBlock(); } + // flush the current BGZF block + if (IsWriteOnly) { FlushBlock(); } + // write an empty block (as EOF marker) + int blockLength = DeflateBlock(); + fwrite(CompressedBlock, 1, blockLength, Stream); + // flush and close fflush(Stream); fclose(Stream); @@ -63,8 +68,6 @@ int BgzfData::DeflateBlock(void) { // initialize the gzip header char* buffer = CompressedBlock; - unsigned int bufferSize = CompressedBlockSize; - memset(buffer, 0, 18); buffer[0] = GZIP_ID1; buffer[1] = (char)GZIP_ID2; @@ -79,9 +82,11 @@ int BgzfData::DeflateBlock(void) { // loop to retry for blocks that do not compress enough int inputLength = BlockOffset; int compressedLength = 0; + unsigned int bufferSize = CompressedBlockSize; while(true) { - + + // initialize zstream values z_stream zs; zs.zalloc = NULL; zs.zfree = NULL; @@ -89,7 +94,7 @@ int BgzfData::DeflateBlock(void) { zs.avail_in = inputLength; zs.next_out = (Bytef*)&buffer[BLOCK_HEADER_LENGTH]; zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; - + // initialize the zlib compression algorithm if(deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK) { printf("ERROR: zlib deflate initialization failed.\n"); @@ -171,8 +176,8 @@ void BgzfData::FlushBlock(void) { if(numBytesWritten != blockLength) { printf("ERROR: Expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten); exit(1); - } - + } + BlockAddress += blockLength; } } @@ -213,6 +218,7 @@ int BgzfData::InflateBlock(const int& blockLength) { void BgzfData::Open(const string& filename, const char* mode) { + // determine open mode if ( strcmp(mode, "rb") == 0 ) { IsWriteOnly = false; } else if ( strcmp(mode, "wb") == 0) { @@ -222,25 +228,21 @@ void BgzfData::Open(const string& filename, const char* mode) { exit(1); } - /*********************************************************** - ARQ, 2010-Jan-03: set Stream to a file, stdin, or stdout. - ************************************************************/ - if ( (filename != "stdin") && (filename != "stdout") ) { // BAM to/from file - // open the file using the requested mode + // open Stream to read to/write from file, stdin, or stdout + // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03) + if ( (filename != "stdin") && (filename != "stdout") ) { + // read/wrtie BGZF data to/from a file Stream = fopen(filename.c_str(), mode); } - else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { // BAM from stdin - // need freopen() as stdin is already open - Stream = freopen(NULL, mode, stdin); - } - else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { // BAM to stdout - // need freopen() as stdout is already open + else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) ) { + // read BGZF data from stdin + Stream = freopen(NULL, mode, stdin); + } + else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) ) { + // write BGZF data to stdout Stream = freopen(NULL, mode, stdout); } - /*********************************************************** - End ARQ, 2010-Jan-03 - ************************************************************/ - + if(!Stream) { printf("ERROR: Unable to open the BAM file %s\n", filename.c_str() ); exit(1); diff --git a/src/utils/BamTools/BGZF.h b/src/utils/BamTools/BGZF.h index 28965427..894e460e 100644 --- a/src/utils/BamTools/BGZF.h +++ b/src/utils/BamTools/BGZF.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 8 December 2009 (DB) +// Last modified: 11 January 2010 (DB) // --------------------------------------------------------------------------- // BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -26,18 +26,21 @@ #include "zlib.h" // Platform-specific type definitions -#ifdef _MSC_VER - typedef char int8_t; - typedef unsigned char uint8_t; - typedef short int16_t; - typedef unsigned short uint16_t; - typedef int int32_t; - typedef unsigned int uint32_t; - typedef long long int64_t; - typedef unsigned long long uint64_t; -#else - #include <stdint.h> -#endif +#ifndef BAMTOOLS_TYPES +#define BAMTOOLS_TYPES + #ifdef _MSC_VER + typedef char int8_t; + typedef unsigned char uint8_t; + typedef short int16_t; + typedef unsigned short uint16_t; + typedef int int32_t; + typedef unsigned int uint32_t; + typedef long long int64_t; + typedef unsigned long long uint64_t; + #else + #include <stdint.h> + #endif +#endif // BAMTOOLS_TYPES namespace BamTools { @@ -117,7 +120,6 @@ struct BgzfData { inline bool BgzfData::CheckBlockHeader(char* header) { - return (header[0] == GZIP_ID1 && header[1] == (char)GZIP_ID2 && header[2] == Z_DEFLATED && @@ -171,7 +173,7 @@ unsigned int BgzfData::UnpackUnsignedInt(char* buffer) { // unpacks a buffer into an unsigned short inline unsigned short BgzfData::UnpackUnsignedShort(char* buffer) { - union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)];} un; + union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un; un.value = 0; un.valueBuffer[0] = buffer[0]; un.valueBuffer[1] = buffer[1]; diff --git a/src/utils/BamTools/BamAux.h b/src/utils/BamTools/BamAux.h index 5f5eee38..fb046421 100644 --- a/src/utils/BamTools/BamAux.h +++ b/src/utils/BamTools/BamAux.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 8 December 2009 (DB) +// Last modified: 11 Janaury 2010 (DB) // --------------------------------------------------------------------------- // Provides the basic constants, data structures, etc. for using BAM files // *************************************************************************** @@ -12,6 +12,7 @@ #define BAMAUX_H // C inclues +#include <cstdio> #include <cstdlib> #include <cstring> @@ -22,6 +23,23 @@ #include <utility> #include <vector> +// Platform-specific type definitions +#ifndef BAMTOOLS_TYPES +#define BAMTOOLS_TYPES + #ifdef _MSC_VER + typedef char int8_t; + typedef unsigned char uint8_t; + typedef short int16_t; + typedef unsigned short uint16_t; + typedef int int32_t; + typedef unsigned int uint32_t; + typedef long long int64_t; + typedef unsigned long long uint64_t; + #else + #include <stdint.h> + #endif +#endif // BAMTOOLS_TYPES + namespace BamTools { // BAM constants @@ -73,41 +91,31 @@ struct BamAlignment { // Returns true if alignment is second mate on read bool IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); } - - /* - Aaron's Additions - */ - - - // Sets "PCR duplicate" bit to TRUE - void SetAsDuplicate(void) { AlignmentFlag |= DUPLICATE; } - // Sets "failed quality control" bit to TRUE - void SetAsFailedQC(void) { AlignmentFlag |= QC_FAILED; } - // Sets "alignment is first mate" bit to TRUE - void SetAsFirstMate(void) { AlignmentFlag |= READ_1; } - // Sets "alignment is mapped" bit to TRUE - void SetAsUnMapped(void) { AlignmentFlag |= UNMAPPED; } - // Sets "alignment's mate is mapped" bit to TRUE - void SetAsMateUnMapped(void) { AlignmentFlag |= MATE_UNMAPPED; } - // Sets "alignment's mate mapped to reverse strand" bit to TRUE - void SetAsMateReverseStrand(void) { AlignmentFlag |= MATE_REVERSE; } - // Sets "alignment part of paired-end read" bit to TRUE - void SetAsPaired(void) { AlignmentFlag |= PAIRED; } - // Sets "position is primary alignment (determined by external app)" to true. - void SetAsSecondaryAlignment(void) { AlignmentFlag |= SECONDARY; } - // Sets "alignment is part of read that satisfied paired-end resolution" bit to TRUE - void SetAsProperPair(void) { AlignmentFlag |= PROPER_PAIR; } - // Sets "alignment mapped to reverse strand" bit to TRUE - void SetAsReverseStrand(void) { AlignmentFlag |= REVERSE; } - // Sets "alignment is second mate on read" bit to TRUE - void SetAsSecondMate(void) { AlignmentFlag |= READ_2; } - + // Manipulate alignment flag + public: + // Sets "PCR duplicate" bit + void SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; } + // Sets "failed quality control" bit + void SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; } + // Sets "alignment is first mate" bit + void SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; } + // Sets "alignment's mate is mapped" bit + void SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; } + // Sets "alignment's mate mapped to reverse strand" bit + void SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; } + // Sets "alignment part of paired-end read" bit + void SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; } + // Sets "alignment is part of read that satisfied paired-end resolution" bit + void SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; } + // Sets "alignment mapped to reverse strand" bit + void SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; } + // Sets "position is primary alignment (determined by external app)" + void SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; } + // Sets "alignment is second mate on read" bit + void SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; } + // Sets "alignment is mapped" bit + void SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; } - void SetInsertSize(int32_t size) { InsertSize = size; } - /* - END: Aaron's Additions - */ - public: // get "RG" tag data @@ -233,7 +241,7 @@ struct BamAlignment { int32_t Position; // Position (0-based) where alignment starts uint16_t Bin; // Bin in BAM file where this alignment resides uint16_t MapQuality; // Mapping quality score - uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods for available queries + uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate std::vector<CigarOp> CigarData; // CIGAR operations for this alignment int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned int32_t MatePosition; // Position (0-based) where alignment's mate starts diff --git a/src/utils/BamTools/BamReader.cpp b/src/utils/BamTools/BamReader.cpp index 560b5679..5dd65f22 100644 --- a/src/utils/BamTools/BamReader.cpp +++ b/src/utils/BamTools/BamReader.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 8 December 2009 (DB) +// Last modified: 11 January 2010(DB) // --------------------------------------------------------------------------- // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -586,9 +586,11 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) { return false; } + size_t elementsRead = 0; + // see if index is valid BAM index char magic[4]; - fread(magic, 1, 4, indexStream); + elementsRead = fread(magic, 1, 4, indexStream); if (strncmp(magic, "BAI\1", 4)) { printf("Problem with index file - invalid format.\n"); fclose(indexStream); @@ -597,7 +599,7 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) { // get number of reference sequences uint32_t numRefSeqs; - fread(&numRefSeqs, 4, 1, indexStream); + elementsRead = fread(&numRefSeqs, 4, 1, indexStream); // intialize space for BamIndex data structure Index.reserve(numRefSeqs); @@ -607,7 +609,7 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) { // get number of bins for this reference sequence int32_t numBins; - fread(&numBins, 4, 1, indexStream); + elementsRead = fread(&numBins, 4, 1, indexStream); if (numBins > 0) { RefData& refEntry = References[i]; @@ -622,11 +624,11 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) { // get binID uint32_t binID; - fread(&binID, 4, 1, indexStream); + elementsRead = fread(&binID, 4, 1, indexStream); // get number of regionChunks in this bin uint32_t numChunks; - fread(&numChunks, 4, 1, indexStream); + elementsRead = fread(&numChunks, 4, 1, indexStream); // intialize ChunkVector ChunkVector regionChunks; @@ -638,8 +640,8 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) { // get chunk boundaries (left, right) uint64_t left; uint64_t right; - fread(&left, 8, 1, indexStream); - fread(&right, 8, 1, indexStream); + elementsRead = fread(&left, 8, 1, indexStream); + elementsRead = fread(&right, 8, 1, indexStream); // save ChunkPair regionChunks.push_back( Chunk(left, right) ); @@ -656,7 +658,7 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) { // get number of linear offsets int32_t numLinearOffsets; - fread(&numLinearOffsets, 4, 1, indexStream); + elementsRead = fread(&numLinearOffsets, 4, 1, indexStream); // intialize LinearOffsetVector LinearOffsetVector offsets; @@ -666,7 +668,7 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) { uint64_t linearOffset; for (int j = 0; j < numLinearOffsets; ++j) { // read a linear offset & store - fread(&linearOffset, 8, 1, indexStream); + elementsRead = fread(&linearOffset, 8, 1, indexStream); offsets.push_back(linearOffset); } diff --git a/src/utils/BamTools/BamWriter.cpp b/src/utils/BamTools/BamWriter.cpp index 1e30a567..c834d45a 100644 --- a/src/utils/BamTools/BamWriter.cpp +++ b/src/utils/BamTools/BamWriter.cpp @@ -276,7 +276,6 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) { // write the base qualities string baseQualities = al.Qualities; char* pBaseQualities = (char*)al.Qualities.data(); - for(unsigned int i = 0; i < queryLen; i++) { pBaseQualities[i] -= 33; } mBGZF.Write(pBaseQualities, queryLen); -- GitLab