From dc4984e6b639b01b9e6beb072a5fb9daa2fbe704 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Wed, 14 Apr 2010 21:37:41 -0400
Subject: [PATCH] Added new version of BamTools and fixed a couple minor bugs
 in intersectBed that were caused by the branchmerge.

---
 src/bamToBed/bamToBed.cpp         |   2 +-
 src/intersectBed/intersectBed.cpp |  41 +--
 src/utils/BamTools/BGZF.h         |  12 +-
 src/utils/BamTools/BamAux.h       | 583 +++++++++++++++++++-----------
 src/utils/BamTools/BamReader.cpp  | 435 +++++++++++++---------
 src/utils/BamTools/BamReader.h    |   8 +-
 src/utils/BamTools/BamWriter.cpp  | 154 ++++++--
 7 files changed, 774 insertions(+), 461 deletions(-)

diff --git a/src/bamToBed/bamToBed.cpp b/src/bamToBed/bamToBed.cpp
index d082f86e..0874510c 100755
--- a/src/bamToBed/bamToBed.cpp
+++ b/src/bamToBed/bamToBed.cpp
@@ -246,7 +246,7 @@ void PrintBed(const BamAlignment &bam,  const RefVector &refs, bool useEditDista
 	if (bam.IsSecondMate()) name += "/2";
 
 	// rip through the CIGAR string and reconstruct the alignment coordinates
-	unsigned int alignmentEnd = bam.GetAlignmentEnd();
+	unsigned int alignmentEnd = bam.GetEndPosition();
 	//ParseCigarBed(bam.CigarData, alignmentEnd);
 	//alignmentEnd += bam.Position;
 
diff --git a/src/intersectBed/intersectBed.cpp b/src/intersectBed/intersectBed.cpp
index eff1f6ce..1b8359bf 100755
--- a/src/intersectBed/intersectBed.cpp
+++ b/src/intersectBed/intersectBed.cpp
@@ -172,34 +172,8 @@ bool BedIntersect::FindOneOrMoreOverlap(const BED &a) {
 
 void BedIntersect::IntersectBed() {
 
-/*
-	// load the "B" bed file into a map so
-	// that we can easily compare "A" to it for overlaps
-	_bedB->loadBedFileIntoMap();
-	string bedLine;                                                                                                                    
-	int lineNum = 0;					// current input line number
-	vector<BED> hits;					// vector of potential hits
-	vector<string> bedFields;			// vector for a BED entry
-	
-	// reserve some space
-	hits.reserve(100);
-	bedFields.reserve(12);	
-		
-	// process each entry in A
-	while (getline(bedInput, bedLine)) {
-
-		lineNum++;
-		Tokenize(bedLine,bedFields);
-		BED a;
-		// find the overlaps with B if it's a valid BED entry. 
-		if (_bedA->parseLine(a, bedFields, lineNum)) {
-			FindOverlaps(a, hits);
-			hits.clear();
-		}
-		// reset for the next input line
-		bedFields.clear();
-	}
-	*/
+	// load the "B" file into a map in order to 
+	// compare each entry in A to it in search of overlaps.
 	_bedB->loadBedFileIntoMap();                                                                                                                 
 	
 	int lineNum = 0;
@@ -207,6 +181,7 @@ void BedIntersect::IntersectBed() {
 	hits.reserve(100);
 	BED a;	
 	
+	// open the "A" file, process each BED entry and searh for overlaps.
 	_bedA->Open();
 	while (_bedA->GetNextBed(a, lineNum) == true) {
 		FindOverlaps(a, hits);
@@ -269,7 +244,7 @@ void BedIntersect::IntersectBam(string bamFile) {
 						writer.SaveAlignment(bam);
 				}
 				else {
-					if (_noHit == false) 
+					if (_noHit == true) 
 						writer.SaveAlignment(bam);	
 				}
 			}
@@ -294,13 +269,11 @@ void BedIntersect::DetermineBedInput() {
 	if (_bedA->bedFile != "stdin") {   
 		// it's BED or GFF
 		if (_bamInput == false) { 
-			ifstream beds(_bedA->bedFile.c_str(), ios::in);
-			if ( !beds ) {
-				cerr << "Error: The requested bed file (" << _bedA->bedFile << ") could not be opened. Exiting!" << endl;
-				exit (1);
-			}
 			IntersectBed();
 		}
+		else {
+			IntersectBam(_bedA->bedFile);
+		}
 	}
 	// reading from stdin
 	else {  
diff --git a/src/utils/BamTools/BGZF.h b/src/utils/BamTools/BGZF.h
index 894e460e..e1adf25d 100644
--- a/src/utils/BamTools/BGZF.h
+++ b/src/utils/BamTools/BGZF.h
@@ -1,5 +1,5 @@
 // ***************************************************************************
-// BGZF.h (c) 2009 Derek Barnett, Michael Strömberg
+// BGZF.h (c) 2009 Derek Barnett, Michael Str�mberg
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
@@ -130,7 +130,7 @@ bool BgzfData::CheckBlockHeader(char* header) {
             BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN );
 }
 
-// packs an unsigned integer into the specified buffer
+// 'packs' an unsigned integer into the specified buffer
 inline
 void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
     buffer[0] = (char)value;
@@ -139,14 +139,14 @@ void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
     buffer[3] = (char)(value >> 24);
 }
 
-// packs an unsigned short into the specified buffer
+// 'packs' an unsigned short into the specified buffer
 inline
 void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) {
     buffer[0] = (char)value;
     buffer[1] = (char)(value >> 8);
 }
 
-// unpacks a buffer into a signed int
+// 'unpacks' a buffer into a signed int
 inline
 signed int BgzfData::UnpackSignedInt(char* buffer) {
     union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
@@ -158,7 +158,7 @@ signed int BgzfData::UnpackSignedInt(char* buffer) {
     return un.value;
 }
 
-// unpacks a buffer into an unsigned int
+// 'unpacks' a buffer into an unsigned int
 inline
 unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
     union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
@@ -170,7 +170,7 @@ unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
     return un.value;
 }
 
-// unpacks a buffer into an unsigned short
+// 'unpacks' a buffer into an unsigned short
 inline
 unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {
     union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
diff --git a/src/utils/BamTools/BamAux.h b/src/utils/BamTools/BamAux.h
index 97342bf9..3623f1c2 100644
--- a/src/utils/BamTools/BamAux.h
+++ b/src/utils/BamTools/BamAux.h
@@ -1,9 +1,9 @@
 // ***************************************************************************
-// BamAux.h (c) 2009 Derek Barnett, Michael Strömberg
+// BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 11 Janaury 2010 (DB)
+// Last modified: 14 April 2010 (DB)
 // ---------------------------------------------------------------------------
 // Provides the basic constants, data structures, etc. for using BAM files
 // ***************************************************************************
@@ -26,18 +26,18 @@
 // Platform-specific type definitions
 #ifndef BAMTOOLS_TYPES
 #define BAMTOOLS_TYPES
-	#ifdef _MSC_VER
-		typedef char                 int8_t;
-		typedef unsigned char       uint8_t;
-		typedef short               int16_t;
-		typedef unsigned short     uint16_t;
-		typedef int                 int32_t;
-		typedef unsigned int       uint32_t;
-		typedef long long           int64_t;
-		typedef unsigned long long uint64_t;
-	#else
-		#include <stdint.h>
-	#endif
+    #ifdef _MSC_VER
+        typedef char                 int8_t;
+        typedef unsigned char       uint8_t;
+        typedef short               int16_t;
+        typedef unsigned short     uint16_t;
+        typedef int                 int32_t;
+        typedef unsigned int       uint32_t;
+        typedef long long           int64_t;
+        typedef unsigned long long uint64_t;
+    #else
+        #include <stdint.h>
+    #endif
 #endif // BAMTOOLS_TYPES
 
 namespace BamTools {
@@ -62,198 +62,56 @@ const int BAM_LIDX_SHIFT    = 14;
 // Explicit variable sizes
 const int BT_SIZEOF_INT = 4;
 
-// ARQ: moved this here.
-struct CigarOp {
-    char     Type;   // Operation type (MIDNSHP)
-    uint32_t Length; // Operation length (number of bases)
-};
-
+struct CigarOp;
 
 struct BamAlignment {
 
-    // Queries against alignment flag
+    // constructors & destructor
     public:
-        // Returns true if this read is a PCR duplicate (determined by external app)
-        bool IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
-        // Returns true if this read failed quality control (determined by external app)
-        bool IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
-        // Returns true if alignment is first mate on read
-        bool IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
-        // Returns true if alignment is mapped
-        bool IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
-        // Returns true if alignment's mate is mapped
-        bool IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
-        // Returns true if alignment's mate mapped to reverse strand
-        bool IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }
-        // Returns true if alignment part of paired-end read
-        bool IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
-        // Returns true if this position is primary alignment (determined by external app)
-        bool IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY) == 0 ); }
-        // Returns true if alignment is part of read that satisfied paired-end resolution (determined by external app)
-        bool IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
-        // Returns true if alignment mapped to reverse strand
-        bool IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
-        // Returns true if alignment is second mate on read
-        bool IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
-
-	// Manipulate alignment flag
-	public:
-		// Sets "PCR duplicate" bit 
-        void SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }
-        // Sets "failed quality control" bit
-        void SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }
-        // Sets "alignment is first mate" bit
-        void SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }
-        // Sets "alignment's mate is mapped" bit
-        void SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
-        // Sets "alignment's mate mapped to reverse strand" bit
-        void SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }
-        // Sets "alignment part of paired-end read" bit
-        void SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }
-        // Sets "alignment is part of read that satisfied paired-end resolution" bit
-        void SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }
-        // Sets "alignment mapped to reverse strand" bit
-        void SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }
-		// Sets "position is primary alignment (determined by external app)"
-        void SetIsSecondaryAlignment(bool ok)  { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }
-        // Sets "alignment is second mate on read" bit
-        void SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }
-		// Sets "alignment is mapped" bit
-        void SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
-		
+        BamAlignment(void);
+        BamAlignment(const BamAlignment& other);
+        ~BamAlignment(void);
+
+    // Queries against alignment flags
+    public:        
+        bool IsDuplicate(void) const;		// Returns true if this read is a PCR duplicate       
+        bool IsFailedQC(void) const;		// Returns true if this read failed quality control      
+        bool IsFirstMate(void) const;		// Returns true if alignment is first mate on read        
+        bool IsMapped(void) const;		// Returns true if alignment is mapped        
+        bool IsMateMapped(void) const;		// Returns true if alignment's mate is mapped        
+        bool IsMateReverseStrand(void) const;	// Returns true if alignment's mate mapped to reverse strand        
+        bool IsPaired(void) const;		// Returns true if alignment part of paired-end read        
+        bool IsPrimaryAlignment(void) const;	// Returns true if reported position is primary alignment       
+        bool IsProperPair(void) const;		// Returns true if alignment is part of read that satisfied paired-end resolution     
+        bool IsReverseStrand(void) const;	// Returns true if alignment mapped to reverse strand
+        bool IsSecondMate(void) const;		// Returns true if alignment is second mate on read
+
+    // Manipulate alignment flags
+    public:        
+        void SetIsDuplicate(bool ok);		// Sets "PCR duplicate" flag        
+        void SetIsFailedQC(bool ok);		// Sets "failed quality control" flag        
+        void SetIsFirstMate(bool ok);		// Sets "alignment is first mate" flag        
+        void SetIsMateUnmapped(bool ok);	// Sets "alignment's mate is mapped" flag        
+        void SetIsMateReverseStrand(bool ok);	// Sets "alignment's mate mapped to reverse strand" flag        
+        void SetIsPaired(bool ok);		// Sets "alignment part of paired-end read" flag        
+	void SetIsProperPair(bool ok);		// Sets "alignment is part of read that satisfied paired-end resolution" flag        
+        void SetIsReverseStrand(bool ok);	// Sets "alignment mapped to reverse strand" flag        
+        void SetIsSecondaryAlignment(bool ok);	// Sets "position is primary alignment" flag        
+        void SetIsSecondMate(bool ok);		// Sets "alignment is second mate on read" flag        
+        void SetIsUnmapped(bool ok);		// Sets "alignment is mapped" flag
+
+    // Tag data access methods
     public:
+        bool GetEditDistance(uint8_t& editDistance) const;	// get "NM" tag data - contributed by Aaron Quinlan
+        bool GetReadGroup(std::string& readGroup) const;	// get "RG" tag data
 
-        // get "RG" tag data
-        bool GetReadGroup(std::string& readGroup) const {
-
-            if ( TagData.empty() ) { return false; }
-
-            // localize the tag data
-            char* pTagData = (char*)TagData.data();
-            const unsigned int tagDataLen = TagData.size();
-            unsigned int numBytesParsed = 0;
-
-            bool foundReadGroupTag = false;
-            while( numBytesParsed < tagDataLen ) {
-
-                const char* pTagType = pTagData;
-                const char* pTagStorageType = pTagData + 2;
-                pTagData       += 3;
-                numBytesParsed += 3;
-
-                // check the current tag
-                if ( std::strncmp(pTagType, "RG", 2) == 0 ) {
-                    foundReadGroupTag = true;
-                    break;
-                }
-
-                // get the storage class and find the next tag
-                SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );
-            }
-
-            // return if the read group tag was not present
-            if ( !foundReadGroupTag ) { return false; }
-
-            // assign the read group
-            const unsigned int readGroupLen = std::strlen(pTagData);
-            readGroup.resize(readGroupLen);
-            std::memcpy( (char*)readGroup.data(), pTagData, readGroupLen );
-            return true;
-        }
-
-        // get "NM" tag data - contributed by Aaron Quinlan
-        bool GetEditDistance(uint8_t& editDistance) const {
-
-            if ( TagData.empty() ) { return false; }
-
-            // localize the tag data
-            char* pTagData = (char*)TagData.data();
-            const unsigned int tagDataLen = TagData.size();
-            unsigned int numBytesParsed = 0;
-
-            bool foundEditDistanceTag = false;
-            while( numBytesParsed < tagDataLen ) {
-
-                const char* pTagType = pTagData;
-                const char* pTagStorageType = pTagData + 2;
-                pTagData       += 3;
-                numBytesParsed += 3;
-                // check the current tag
-                if ( strncmp(pTagType, "NM", 2) == 0 ) {
-                    foundEditDistanceTag = true;
-                    break;
-                }
-				
-                // get the storage class and find the next tag
-                SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );
-            }
-            // return if the edit distance tag was not present
-            if ( !foundEditDistanceTag ) { return false; }
-
-            // assign the editDistance value
-            memcpy(&editDistance, pTagData, 1);
-            return true;
-        }
-
-		// ARQ:
-		// compute and retuen the alignment end coordinate based on the CIGAR string.
-		int GetAlignmentEnd(bool usePadded = false) const {
-		    // initialize alignment end to starting position
-		    int alignEnd = Position;
-
-		    // iterate over cigar operations
-		    std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
-		    std::vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();
-		    for ( ; cigarIter != cigarEnd; ++cigarIter) {
-		        char cigarType = (*cigarIter).Type;
-		        if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {
-		            alignEnd += (*cigarIter).Length;
-		        }
-				else if (cigarType == 'I' && usePadded == true) {
-					alignEnd += (*cigarIter).Length;
-				}
-		    }
-		    return alignEnd;
-		} 
+    // Additional data access methods
+    public:
+		int GetEndPosition(bool usePadded = false) const;	// calculates alignment end position, based on starting position and CIGAR operations
 
+    // 'internal' utility methods 
     private:
-        static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
-			switch(storageType) {
-
-                case 'A':
-                case 'c':
-                case 'C':
-                        ++numBytesParsed;
-                        ++pTagData;
-                        break;
-
-                case 's':
-                case 'S':
-                case 'f':
-                        numBytesParsed += 2;
-                        pTagData       += 2;
-                        break;
-
-                case 'i':
-                case 'I':
-                        numBytesParsed += 4;
-                        pTagData       += 4;
-                        break;
-
-                case 'Z':
-                case 'H':
-                        while(*pTagData) {
-                            ++numBytesParsed;
-                            ++pTagData;
-                        }
-						++pTagData;
-                        break;
-
-                default:
-                        printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);
-                        exit(1);
-            }
-        }
+        static void SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);
 
     // Data members
     public:
@@ -274,48 +132,60 @@ struct BamAlignment {
         int32_t      InsertSize;        // Mate-pair insert size
 
     // Alignment flag query constants
+    // Use the get/set methods above instead
     private:
-        enum { PAIRED        = 1,
-               PROPER_PAIR   = 2,
-               UNMAPPED      = 4,
-               MATE_UNMAPPED = 8,
-               REVERSE       = 16,
-               MATE_REVERSE  = 32,
-               READ_1        = 64,
-               READ_2        = 128,
-               SECONDARY     = 256,
-               QC_FAILED     = 512,
-               DUPLICATE     = 1024
+        enum { PAIRED        = 1
+             , PROPER_PAIR   = 2
+             , UNMAPPED      = 4
+             , MATE_UNMAPPED = 8
+             , REVERSE       = 16
+             , MATE_REVERSE  = 32
+             , READ_1        = 64
+             , READ_2        = 128
+             , SECONDARY     = 256
+             , QC_FAILED     = 512
+             , DUPLICATE     = 1024 
              };
 };
 
 // ----------------------------------------------------------------
 // Auxiliary data structs & typedefs
 
+struct CigarOp {
+    char     Type;   // Operation type (MIDNSHP)
+    uint32_t Length; // Operation length (number of bases)
+};
+
 struct RefData {
+   
     // data members
     std::string RefName;          // Name of reference sequence
-    int         RefLength;        // Length of reference sequence
+    int32_t     RefLength;        // Length of reference sequence
     bool        RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence
+    
     // constructor
-    RefData(void)
-        : RefLength(0)
-        , RefHasAlignments(false)
+    RefData(const int32_t& length = 0, 
+            bool ok = false)
+        : RefLength(length)
+        , RefHasAlignments(ok)
     { }
 };
 
-typedef std::vector<RefData> RefVector;
+typedef std::vector<RefData>      RefVector;
 typedef std::vector<BamAlignment> BamAlignmentVector;
 
 // ----------------------------------------------------------------
 // Indexing structs & typedefs
 
 struct Chunk {
+
     // data members
     uint64_t Start;
     uint64_t Stop;
+
     // constructor
-    Chunk(const uint64_t& start = 0, const uint64_t& stop = 0)
+    Chunk(const uint64_t& start = 0, 
+          const uint64_t& stop = 0)
         : Start(start)
         , Stop(stop)
     { }
@@ -344,6 +214,289 @@ struct ReferenceIndex {
 
 typedef std::vector<ReferenceIndex> BamIndex;
 
+// ----------------------------------------------------------------
+// BamAlignment member methods
+
+// constructors & destructor
+inline 
+BamAlignment::BamAlignment(void) { }
+
+inline 
+BamAlignment::BamAlignment(const BamAlignment& other)
+    : Name(other.Name)
+    , Length(other.Length)
+    , QueryBases(other.QueryBases)
+    , AlignedBases(other.AlignedBases)
+    , Qualities(other.Qualities)
+    , TagData(other.TagData)
+    , RefID(other.RefID)
+    , Position(other.Position)
+    , Bin(other.Bin)
+    , MapQuality(other.MapQuality)
+    , AlignmentFlag(other.AlignmentFlag)
+    , CigarData(other.CigarData)
+    , MateRefID(other.MateRefID)
+    , MatePosition(other.MatePosition)
+    , InsertSize(other.InsertSize)
+{ }
+
+inline 
+BamAlignment::~BamAlignment(void) { }
+
+// Queries against alignment flags
+inline bool BamAlignment::IsDuplicate(void) const         { return ( (AlignmentFlag & DUPLICATE)     != 0 ); }
+inline bool BamAlignment::IsFailedQC(void) const          { return ( (AlignmentFlag & QC_FAILED)     != 0 ); }
+inline bool BamAlignment::IsFirstMate(void) const         { return ( (AlignmentFlag & READ_1)        != 0 ); }
+inline bool BamAlignment::IsMapped(void) const            { return ( (AlignmentFlag & UNMAPPED)      == 0 ); }
+inline bool BamAlignment::IsMateMapped(void) const        { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
+inline bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }
+inline bool BamAlignment::IsPaired(void) const            { return ( (AlignmentFlag & PAIRED)        != 0 ); }
+inline bool BamAlignment::IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY)     == 0 ); }
+inline bool BamAlignment::IsProperPair(void) const        { return ( (AlignmentFlag & PROPER_PAIR)   != 0 ); }
+inline bool BamAlignment::IsReverseStrand(void) const     { return ( (AlignmentFlag & REVERSE)       != 0 ); }
+inline bool BamAlignment::IsSecondMate(void) const        { return ( (AlignmentFlag & READ_2)        != 0 ); }
+
+// Manipulate alignment flags 
+inline void BamAlignment::SetIsDuplicate(bool ok)          { if (ok) AlignmentFlag |= DUPLICATE;     else AlignmentFlag &= ~DUPLICATE; }
+inline void BamAlignment::SetIsFailedQC(bool ok)           { if (ok) AlignmentFlag |= QC_FAILED;     else AlignmentFlag &= ~QC_FAILED; }
+inline void BamAlignment::SetIsFirstMate(bool ok)          { if (ok) AlignmentFlag |= READ_1;        else AlignmentFlag &= ~READ_1; }
+inline void BamAlignment::SetIsMateUnmapped(bool ok)       { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
+inline void BamAlignment::SetIsMateReverseStrand(bool ok)  { if (ok) AlignmentFlag |= MATE_REVERSE;  else AlignmentFlag &= ~MATE_REVERSE; }
+inline void BamAlignment::SetIsPaired(bool ok)             { if (ok) AlignmentFlag |= PAIRED;        else AlignmentFlag &= ~PAIRED; }
+inline void BamAlignment::SetIsProperPair(bool ok)         { if (ok) AlignmentFlag |= PROPER_PAIR;   else AlignmentFlag &= ~PROPER_PAIR; }
+inline void BamAlignment::SetIsReverseStrand(bool ok)      { if (ok) AlignmentFlag |= REVERSE;       else AlignmentFlag &= ~REVERSE; }
+inline void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY;     else AlignmentFlag &= ~SECONDARY; }
+inline void BamAlignment::SetIsSecondMate(bool ok)         { if (ok) AlignmentFlag |= READ_2;        else AlignmentFlag &= ~READ_2; }
+inline void BamAlignment::SetIsUnmapped(bool ok)           { if (ok) AlignmentFlag |= UNMAPPED;      else AlignmentFlag &= ~UNMAPPED; }
+
+// calculates alignment end position, based on starting position and CIGAR operations
+inline 
+int BamAlignment::GetEndPosition(bool usePadded) const {
+
+    // initialize alignment end to starting position
+    int alignEnd = Position;
+
+    // iterate over cigar operations
+    std::vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
+    std::vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();
+    for ( ; cigarIter != cigarEnd; ++cigarIter) {
+	const char cigarType = (*cigarIter).Type;
+	if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {
+	    alignEnd += (*cigarIter).Length;
+	} 
+        else if ( usePadded && cigarType == 'I' ) {
+            alignEnd += (*cigarIter).Length;
+        }
+    }
+    return alignEnd;
+}
+
+// get "NM" tag data - contributed by Aaron Quinlan
+// stores data in 'editDistance', returns success/fail
+inline 
+bool BamAlignment::GetEditDistance(uint8_t& editDistance) const {
+
+    if ( TagData.empty() ) { return false; }
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLen = TagData.size();
+    unsigned int numBytesParsed = 0;
+
+    bool foundEditDistanceTag = false;
+    while( numBytesParsed < tagDataLen ) {
+
+        const char* pTagType = pTagData;
+        const char* pTagStorageType = pTagData + 2;
+        pTagData       += 3;
+        numBytesParsed += 3;
+
+        // check the current tag
+        if ( strncmp(pTagType, "NM", 2) == 0 ) {
+            foundEditDistanceTag = true;
+            break;
+        }
+
+        // get the storage class and find the next tag
+        if (*pTagStorageType == '\0') { return false; }
+        SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );
+        if (*pTagData == '\0') { return false; }
+    }
+    // return if the edit distance tag was not present
+    if ( !foundEditDistanceTag ) { return false; }
+
+    // assign the editDistance value
+    std::memcpy(&editDistance, pTagData, 1);
+    return true;
+}
+
+// get "RG" tag data
+// stores data in 'readGroup', returns success/fail
+inline 
+bool BamAlignment::GetReadGroup(std::string& readGroup) const {
+
+    if ( TagData.empty() ) { return false; }
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLen = TagData.size();
+    unsigned int numBytesParsed = 0;
+
+    bool foundReadGroupTag = false;
+    while( numBytesParsed < tagDataLen ) {
+
+        const char* pTagType = pTagData;
+        const char* pTagStorageType = pTagData + 2;
+        pTagData       += 3;
+        numBytesParsed += 3;
+
+        // check the current tag
+        if ( std::strncmp(pTagType, "RG", 2) == 0 ) {
+            foundReadGroupTag = true;
+            break;
+        }
+
+        // get the storage class and find the next tag
+        if (*pTagStorageType == '\0') { return false; }
+        SkipToNextTag( *pTagStorageType, pTagData, numBytesParsed );
+        if (*pTagData == '\0') { return false; }
+    }
+
+    // return if the read group tag was not present
+    if ( !foundReadGroupTag ) { return false; }
+
+    // assign the read group
+    const unsigned int readGroupLen = std::strlen(pTagData);
+    readGroup.resize(readGroupLen);
+    std::memcpy( (char*)readGroup.data(), pTagData, readGroupLen );
+    return true;
+}
+
+inline
+void BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
+    
+    switch(storageType) {
+
+        case 'A':
+        case 'c':
+        case 'C':
+            ++numBytesParsed;
+            ++pTagData;
+            break;
+
+        case 's':
+        case 'S':
+            numBytesParsed += 2;
+            pTagData       += 2;
+            break;
+
+        case 'f':
+        case 'i':
+        case 'I':
+            numBytesParsed += 4;
+            pTagData       += 4;
+            break;
+
+        case 'Z':
+        case 'H':
+            while(*pTagData) {
+                ++numBytesParsed;
+                ++pTagData;
+            }
+        // ---------------------------
+        // Added: 3-25-2010 DWB
+        // Contributed: ARQ
+        // Fixed: error parsing variable length tag data
+            ++pTagData;
+        // ---------------------------
+            break;
+
+        default:
+            printf("ERROR: Unknown tag storage class encountered: [%c]\n", *pTagData);
+            exit(1);
+    }
+}
+
+// ----------------------------------------------------------------
+// Added: 3-35-2010 DWB
+// Fixed: Routines to provide endian-correctness
+// ----------------------------------------------------------------
+
+// returns true if system is big endian
+inline bool SystemIsBigEndian(void) {
+   const uint16_t one = 0x0001;
+   return ((*(char*) &one) == 0 );
+}
+
+// swaps endianness of 16-bit value 'in place'
+inline void SwapEndian_16(int16_t& x) {
+    x = ((x >> 8) | (x << 8));
+}
+
+inline void SwapEndian_16(uint16_t& x) {
+    x = ((x >> 8) | (x << 8));
+}
+
+// swaps endianness of 32-bit value 'in-place'
+inline void SwapEndian_32(int32_t& x) {
+    x = ( (x >> 24) | 
+         ((x << 8) & 0x00FF0000) | 
+         ((x >> 8) & 0x0000FF00) | 
+          (x << 24)
+        );
+}
+
+inline void SwapEndian_32(uint32_t& x) {
+    x = ( (x >> 24) | 
+         ((x << 8) & 0x00FF0000) | 
+         ((x >> 8) & 0x0000FF00) | 
+          (x << 24)
+        );
+}
+
+// swaps endianness of 64-bit value 'in-place'
+inline void SwapEndian_64(int64_t& x) {
+    x = ( (x >> 56) | 
+         ((x << 40) & 0x00FF000000000000ll) |
+         ((x << 24) & 0x0000FF0000000000ll) |
+         ((x << 8)  & 0x000000FF00000000ll) |
+         ((x >> 8)  & 0x00000000FF000000ll) |
+         ((x >> 24) & 0x0000000000FF0000ll) |
+         ((x >> 40) & 0x000000000000FF00ll) |
+          (x << 56)
+        );
+}
+
+inline void SwapEndian_64(uint64_t& x) {
+    x = ( (x >> 56) | 
+         ((x << 40) & 0x00FF000000000000ll) |
+         ((x << 24) & 0x0000FF0000000000ll) |
+         ((x << 8)  & 0x000000FF00000000ll) |
+         ((x >> 8)  & 0x00000000FF000000ll) |
+         ((x >> 24) & 0x0000000000FF0000ll) |
+         ((x >> 40) & 0x000000000000FF00ll) |
+          (x << 56)
+        );
+}
+
+// swaps endianness of 'next 2 bytes' in a char buffer (in-place)
+inline void SwapEndian_16p(char* data) {
+    uint16_t& value = (uint16_t&)*data; 
+    SwapEndian_16(value);
+}
+
+// swaps endianness of 'next 4 bytes' in a char buffer (in-place)
+inline void SwapEndian_32p(char* data) {
+    uint32_t& value = (uint32_t&)*data; 
+    SwapEndian_32(value);
+}
+
+// swaps endianness of 'next 8 bytes' in a char buffer (in-place)
+inline void SwapEndian_64p(char* data) {
+    uint64_t& value = (uint64_t&)*data; 
+    SwapEndian_64(value);
+}
+
 } // namespace BamTools
 
 #endif // BAMAUX_H
diff --git a/src/utils/BamTools/BamReader.cpp b/src/utils/BamTools/BamReader.cpp
index ab8d044f..a2f975f9 100644
--- a/src/utils/BamTools/BamReader.cpp
+++ b/src/utils/BamTools/BamReader.cpp
@@ -1,9 +1,9 @@
 // ***************************************************************************
-// BamReader.cpp (c) 2009 Derek Barnett, Michael Strömberg
+// BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 11 January 2010(DB)
+// Last modified: 14 April 2010 (DB)
 // ---------------------------------------------------------------------------
 // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
 // Institute.
@@ -23,13 +23,23 @@
 using namespace BamTools;
 using namespace std;
 
+namespace BamTools {
+  struct BamAlignmentSupportData {
+      string   AllCharData;
+      uint32_t BlockLength;
+      uint32_t NumCigarOperations;
+      uint32_t QueryNameLength;
+      uint32_t QuerySequenceLength;
+  };
+} // namespace BamTools
+
 struct BamReader::BamReaderPrivate {
 
     // -------------------------------
     // data members
     // -------------------------------
 
-    // general data
+    // general file data
     BgzfData  mBGZF;
     string    HeaderText;
     BamIndex  Index;
@@ -38,6 +48,9 @@ struct BamReader::BamReaderPrivate {
     int64_t   AlignmentsBeginOffset;
     string    Filename;
     string    IndexFilename;
+    
+    // system data
+    bool IsBigEndian;
 
     // user-specified region values
     bool IsRegionSpecified;
@@ -68,10 +81,7 @@ struct BamReader::BamReaderPrivate {
     bool GetNextAlignment(BamAlignment& bAlignment);
 
     // access auxiliary data
-    const string GetHeaderText(void) const;
-    const int GetReferenceCount(void) const;
-    const RefVector GetReferenceData(void) const;
-    const int GetReferenceID(const string& refName) const;
+    int GetReferenceID(const string& refName) const;
 
     // index operations
     bool CreateIndex(void);
@@ -84,8 +94,8 @@ struct BamReader::BamReaderPrivate {
 
     // calculate bins that overlap region ( left to reference end for now )
     int BinsFromRegion(int refID, int left, uint16_t[MAX_BIN]);
-    // calculates alignment end position based on starting position and provided CIGAR operations
-    int CalculateAlignmentEnd(const int& position, const std::vector<CigarOp>& cigarData);
+    // fills out character data for BamAlignment data
+    bool BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData);
     // calculate file offset for first alignment chunk overlapping 'left'
     int64_t GetOffset(int refID, int left);
     // checks to see if alignment overlaps current region
@@ -93,7 +103,7 @@ struct BamReader::BamReaderPrivate {
     // retrieves header text from BAM file
     void LoadHeaderData(void);
     // retrieves BAM alignment under file pointer
-    bool LoadNextAlignment(BamAlignment& bAlignment);
+    bool LoadNextAlignment(BamAlignment& bAlignment, BamAlignmentSupportData& supportData);
     // builds reference data structure from BAM file
     void LoadReferenceData(void);
 
@@ -111,8 +121,6 @@ struct BamReader::BamReaderPrivate {
     bool LoadIndex(void);
     // simplifies index by merging 'chunks'
     void MergeChunks(void);
-    // round-up 32-bit integer to next power-of-2
-    void Roundup32(int& value);
     // saves index to BAM index file
     bool WriteIndex(void);
 };
@@ -142,10 +150,10 @@ bool BamReader::Rewind(void) { return d->Rewind(); }
 bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }
 
 // access auxiliary data
-const string    BamReader::GetHeaderText(void) const { return d->HeaderText; }
-const int       BamReader::GetReferenceCount(void) const { return d->References.size(); }
+const string BamReader::GetHeaderText(void) const { return d->HeaderText; }
+int BamReader::GetReferenceCount(void) const { return d->References.size(); }
 const RefVector BamReader::GetReferenceData(void) const { return d->References; }
-const int       BamReader::GetReferenceID(const string& refName) const { return d->GetReferenceID(refName); }
+int BamReader::GetReferenceID(const string& refName) const { return d->GetReferenceID(refName); }
 
 // index operations
 bool BamReader::CreateIndex(void) { return d->CreateIndex(); }
@@ -163,7 +171,9 @@ BamReader::BamReaderPrivate::BamReaderPrivate(void)
     , CurrentLeft(0)
     , DNA_LOOKUP("=ACMGRSVTWYHKDBN")
     , CIGAR_LOOKUP("MIDNSHP")
-{ }
+{ 
+    IsBigEndian = SystemIsBigEndian();
+}
 
 // destructor
 BamReader::BamReaderPrivate::~BamReaderPrivate(void) {
@@ -193,6 +203,137 @@ int BamReader::BamReaderPrivate::BinsFromRegion(int refID, int left, uint16_t li
     return i;
 }
 
+bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment, const BamAlignmentSupportData& supportData) {
+  
+    // calculate character lengths/offsets
+    const unsigned int dataLength     = supportData.BlockLength - BAM_CORE_SIZE;
+    const unsigned int seqDataOffset  = supportData.QueryNameLength + (supportData.NumCigarOperations * 4);
+    const unsigned int qualDataOffset = seqDataOffset + (supportData.QuerySequenceLength+1)/2;
+    const unsigned int tagDataOffset  = qualDataOffset + supportData.QuerySequenceLength;
+    const unsigned int tagDataLength  = dataLength - tagDataOffset;
+      
+    // set up char buffers
+    const char* allCharData = supportData.AllCharData.data();
+    const char* seqData     = ((const char*)allCharData) + seqDataOffset;
+    const char* qualData    = ((const char*)allCharData) + qualDataOffset;
+    char* tagData     = ((char*)allCharData) + tagDataOffset;
+  
+    // save query sequence
+    bAlignment.QueryBases.clear();
+    bAlignment.QueryBases.reserve(supportData.QuerySequenceLength);
+    for (unsigned int i = 0; i < supportData.QuerySequenceLength; ++i) {
+        char singleBase = DNA_LOOKUP[ ( ( seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+        bAlignment.QueryBases.append(1, singleBase);
+    }
+  
+    // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
+    bAlignment.Qualities.clear();
+    bAlignment.Qualities.reserve(supportData.QuerySequenceLength);
+    for (unsigned int i = 0; i < supportData.QuerySequenceLength; ++i) {
+        char singleQuality = (char)(qualData[i]+33);
+        bAlignment.Qualities.append(1, singleQuality);
+    }
+    
+    // parse CIGAR to build 'AlignedBases'
+    bAlignment.AlignedBases.clear();
+    bAlignment.AlignedBases.reserve(supportData.QuerySequenceLength);
+    
+    int k = 0;
+    vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();
+    vector<CigarOp>::const_iterator cigarEnd  = bAlignment.CigarData.end();
+    for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+        
+        const CigarOp& op = (*cigarIter);
+        switch(op.Type) {
+          
+            case ('M') :
+            case ('I') :
+                bAlignment.AlignedBases.append(bAlignment.QueryBases.substr(k, op.Length)); // for 'M', 'I' - write bases
+                // fall through
+            
+            case ('S') :
+                k += op.Length;                                     // for 'S' - soft clip, skip over query bases
+                break;
+                
+            case ('D') :
+                bAlignment.AlignedBases.append(op.Length, '-');     // for 'D' - write gap character
+                break;
+                
+            case ('P') :
+                bAlignment.AlignedBases.append( op.Length, '*' );   // for 'P' - write padding character
+                break;
+                
+            case ('N') :
+                bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in original query sequence
+                // k+=op.Length; 
+                break;
+                
+            case ('H') :
+                break;  // for 'H' - hard clip, do nothing to AlignedBases, move to next op
+                
+            default:
+                printf("ERROR: Invalid Cigar op type\n"); // shouldn't get here
+                exit(1);
+        }
+    }
+ 
+    // -----------------------
+    // Added: 3-25-2010 DWB
+    // Fixed: endian-correctness for tag data
+    // -----------------------
+    if ( IsBigEndian ) {
+        int i = 0;
+        while ( (unsigned int)i < tagDataLength ) {
+          
+            i += 2; // skip tag type (e.g. "RG", "NM", etc)
+            uint8_t type = toupper(tagData[i]);     // lower & upper case letters have same meaning 
+            ++i;                                    // skip value type
+    
+            switch (type) {
+                
+                case('A') :
+                case('C') : 
+                    ++i;
+                    break;
+
+                case('S') : 
+                    SwapEndian_16p(&tagData[i]); 
+                    i+=2; // sizeof(uint16_t)
+                    break;
+                    
+                case('F') :
+                case('I') : 
+                    SwapEndian_32p(&tagData[i]);
+                    i+=4; // sizeof(uint32_t)
+                    break;
+                
+                case('D') : 
+                    SwapEndian_64p(&tagData[i]);
+                    i+=8; // sizeof(uint64_t) 
+                    break;
+                
+                case('H') :
+                case('Z') : 
+                    while (tagData[i]) { ++i; }
+                    ++i; // increment one more for null terminator
+                    break;
+                
+                default : 
+                    printf("ERROR: Invalid tag value type\n"); // shouldn't get here
+                    exit(1);
+            }
+        }
+    }
+    
+    // store TagData
+    bAlignment.TagData.clear();
+    bAlignment.TagData.resize(tagDataLength);
+    memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLength);
+    
+    // return success
+    return true;
+}
+
 // populates BAM index data structure from BAM file data
 bool BamReader::BamReaderPrivate::BuildIndex(void) {
 
@@ -323,24 +464,6 @@ bool BamReader::BamReaderPrivate::BuildIndex(void) {
     return Rewind();
 }
 
-// calculates alignment end position based on starting position and provided CIGAR operations
-int BamReader::BamReaderPrivate::CalculateAlignmentEnd(const int& position, const vector<CigarOp>& cigarData) {
-
-    // initialize alignment end to starting position
-    int alignEnd = position;
-
-    // iterate over cigar operations
-    vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
-    vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
-    for ( ; cigarIter != cigarEnd; ++cigarIter) {
-        char cigarType = (*cigarIter).Type;
-        if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) {
-            alignEnd += (*cigarIter).Length;
-        }
-    }
-    return alignEnd;
-}
-
 
 // clear index data structure
 void BamReader::BamReaderPrivate::ClearIndex(void) {
@@ -361,17 +484,17 @@ bool BamReader::BamReaderPrivate::CreateIndex(void) {
     // clear out index
     ClearIndex();
 
-	// build (& save) index from BAM file
+    // build (& save) index from BAM file
     bool ok = true;
     ok &= BuildIndex();
     ok &= WriteIndex();
 
-	// return success/fail
+    // return success/fail
     return ok;
 }
 
 // returns RefID for given RefName (returns References.size() if not found)
-const int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const {
+int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const {
 
     // retrieve names from reference data
     vector<string> refNames;
@@ -388,20 +511,26 @@ const int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) con
 // get next alignment (from specified region, if given)
 bool BamReader::BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
 
+    BamAlignmentSupportData supportData;
+  
     // if valid alignment available
-    if ( LoadNextAlignment(bAlignment) ) {
+    if ( LoadNextAlignment(bAlignment, supportData) ) {
 
         // if region not specified, return success
-        if ( !IsRegionSpecified ) { return true; }
+        if ( !IsRegionSpecified ) { 
+          bool ok = BuildCharData(bAlignment, supportData);
+          return ok; 
+        }
 
         // load next alignment until region overlap is found
         while ( !IsOverlap(bAlignment) ) {
             // if no valid alignment available (likely EOF) return failure
-            if ( !LoadNextAlignment(bAlignment) ) { return false; }
+            if ( !LoadNextAlignment(bAlignment, supportData) ) { return false; }
         }
 
         // return success (alignment found that overlaps region)
-        return true;
+        bool ok = BuildCharData(bAlignment, supportData);
+        return ok;
     }
 
     // no valid alignment
@@ -485,16 +614,13 @@ void BamReader::BamReaderPrivate::InsertLinearOffset(LinearOffsetVector& offsets
                                                      const uint64_t&     lastOffset)
 {
     // get converted offsets
-    int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
-    int endOffset   = ( CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData) - 1) >> BAM_LIDX_SHIFT;
+    int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
+    int endOffset   = (bAlignment.GetEndPosition() - 1) >> BAM_LIDX_SHIFT;
 
     // resize vector if necessary
     int oldSize = offsets.size();
     int newSize = endOffset + 1;
-    if ( oldSize < newSize ) {        
-        Roundup32(newSize);
-        offsets.resize(newSize, 0);
-    }
+    if ( oldSize < newSize ) { offsets.resize(newSize, 0); }
 
     // store offset
     for(int i = beginOffset + 1; i <= endOffset ; ++i) {
@@ -513,8 +639,8 @@ bool BamReader::BamReaderPrivate::IsOverlap(BamAlignment& bAlignment) {
     // read starts after left boundary
     if ( bAlignment.Position >= CurrentLeft) { return true; }
 
-    // return whether alignment end overlaps left boundary
-    return ( CalculateAlignmentEnd(bAlignment.Position, bAlignment.CigarData) >= CurrentLeft );
+    // return whether alignment end overlaps left boundary
+    return ( bAlignment.GetEndPosition() >= CurrentLeft );
 }
 
 // jumps to specified region(refID, leftBound) in BAM file, returns success/fail
@@ -523,22 +649,22 @@ bool BamReader::BamReaderPrivate::Jump(int refID, int position) {
     // if data exists for this reference and position is valid    
     if ( References.at(refID).RefHasAlignments && (position <= References.at(refID).RefLength) ) {
 
-		// set current region
+        // set current region
         CurrentRefID = refID;
         CurrentLeft  = position;
         IsRegionSpecified = true;
 
-		// calculate offset
+        // calculate offset
         int64_t offset = GetOffset(CurrentRefID, CurrentLeft);
 
-		// if in valid offset, return failure
+        // if in valid offset, return failure
         if ( offset == -1 ) { return false; }
 
-		// otherwise return success of seek operation
+        // otherwise return success of seek operation
         else { return mBGZF.Seek(offset); }
     }
 
-	// invalid jump request parameters, return failure
+    // invalid jump request parameters, return failure
     return false;
 }
 
@@ -559,8 +685,9 @@ void BamReader::BamReaderPrivate::LoadHeaderData(void) {
 
     // get BAM header text length
     mBGZF.Read(buffer, 4);
-    const unsigned int headerTextLength = BgzfData::UnpackUnsignedInt(buffer);
-
+    unsigned int headerTextLength = BgzfData::UnpackUnsignedInt(buffer);
+    if ( IsBigEndian ) { SwapEndian_32(headerTextLength); }
+    
     // get BAM header text
     char* headerText = (char*)calloc(headerTextLength + 1, 1);
     mBGZF.Read(headerText, headerTextLength);
@@ -586,8 +713,8 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
         return false;
     }
 
-	size_t elementsRead = 0;
-	
+    size_t elementsRead = 0;
+        
     // see if index is valid BAM index
     char magic[4];
     elementsRead = fread(magic, 1, 4, indexStream);
@@ -600,7 +727,8 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
     // get number of reference sequences
     uint32_t numRefSeqs;
     elementsRead = fread(&numRefSeqs, 4, 1, indexStream);
-
+    if ( IsBigEndian ) { SwapEndian_32(numRefSeqs); }
+    
     // intialize space for BamIndex data structure
     Index.reserve(numRefSeqs);
 
@@ -610,6 +738,7 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
         // get number of bins for this reference sequence
         int32_t numBins;
         elementsRead = fread(&numBins, 4, 1, indexStream);
+        if ( IsBigEndian ) { SwapEndian_32(numBins); }
 
         if (numBins > 0) {
             RefData& refEntry = References[i];
@@ -630,6 +759,11 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
             uint32_t numChunks;
             elementsRead = fread(&numChunks, 4, 1, indexStream);
 
+            if ( IsBigEndian ) { 
+              SwapEndian_32(binID);
+              SwapEndian_32(numChunks);
+            }
+            
             // intialize ChunkVector
             ChunkVector regionChunks;
             regionChunks.reserve(numChunks);
@@ -643,6 +777,11 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
                 elementsRead = fread(&left, 8, 1, indexStream);
                 elementsRead = fread(&right, 8, 1, indexStream);
 
+                if ( IsBigEndian ) {
+                    SwapEndian_64(left);
+                    SwapEndian_64(right);
+                }
+                
                 // save ChunkPair
                 regionChunks.push_back( Chunk(left, right) );
             }
@@ -659,6 +798,7 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
         // get number of linear offsets
         int32_t numLinearOffsets;
         elementsRead = fread(&numLinearOffsets, 4, 1, indexStream);
+        if ( IsBigEndian ) { SwapEndian_32(numLinearOffsets); }
 
         // intialize LinearOffsetVector
         LinearOffsetVector offsets;
@@ -669,6 +809,7 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
         for (int j = 0; j < numLinearOffsets; ++j) {
             // read a linear offset & store
             elementsRead = fread(&linearOffset, 8, 1, indexStream);
+            if ( IsBigEndian ) { SwapEndian_64(linearOffset); }
             offsets.push_back(linearOffset);
         }
 
@@ -685,131 +826,76 @@ bool BamReader::BamReaderPrivate::LoadIndex(void) {
 }
 
 // populates BamAlignment with alignment data under file pointer, returns success/fail
-bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) {
+bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment, BamAlignmentSupportData& supportData) {
 
     // read in the 'block length' value, make sure it's not zero
     char buffer[4];
     mBGZF.Read(buffer, 4);
-    const unsigned int blockLength = BgzfData::UnpackUnsignedInt(buffer);
-    if ( blockLength == 0 ) { return false; }
-
-    // keep track of bytes read as method progresses
-    int bytesRead = 4;
+    supportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);
+    if ( IsBigEndian ) { SwapEndian_32(supportData.BlockLength); }
+    if ( supportData.BlockLength == 0 ) { return false; }
 
     // read in core alignment data, make sure the right size of data was read
     char x[BAM_CORE_SIZE];
     if ( mBGZF.Read(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) { return false; }
-    bytesRead += BAM_CORE_SIZE;
 
-    // set BamAlignment 'core' data and character data lengths
-    unsigned int tempValue;
-    unsigned int queryNameLength;
-    unsigned int numCigarOperations;
-    unsigned int querySequenceLength;
-
-    bAlignment.RefID    = BgzfData::UnpackSignedInt(&x[0]);
+    if ( IsBigEndian ) {
+        for ( int i = 0; i < BAM_CORE_SIZE; i+=sizeof(uint32_t) ) { 
+          SwapEndian_32p(&x[i]); 
+        }
+    }
+    
+    // set BamAlignment 'core' and 'support' data
+    bAlignment.RefID    = BgzfData::UnpackSignedInt(&x[0]);  
     bAlignment.Position = BgzfData::UnpackSignedInt(&x[4]);
-
-    tempValue             = BgzfData::UnpackUnsignedInt(&x[8]);
+    
+    unsigned int tempValue = BgzfData::UnpackUnsignedInt(&x[8]);
     bAlignment.Bin        = tempValue >> 16;
     bAlignment.MapQuality = tempValue >> 8 & 0xff;
-    queryNameLength       = tempValue & 0xff;
+    supportData.QueryNameLength = tempValue & 0xff;
 
-    tempValue                = BgzfData::UnpackUnsignedInt(&x[12]);
+    tempValue = BgzfData::UnpackUnsignedInt(&x[12]);
     bAlignment.AlignmentFlag = tempValue >> 16;
-    numCigarOperations       = tempValue & 0xffff;
+    supportData.NumCigarOperations = tempValue & 0xffff;
 
-    querySequenceLength     = BgzfData::UnpackUnsignedInt(&x[16]);
+    supportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]);
     bAlignment.MateRefID    = BgzfData::UnpackSignedInt(&x[20]);
     bAlignment.MatePosition = BgzfData::UnpackSignedInt(&x[24]);
     bAlignment.InsertSize   = BgzfData::UnpackSignedInt(&x[28]);
-
-    // calculate lengths/offsets
-    const unsigned int dataLength      = blockLength - BAM_CORE_SIZE;
-    const unsigned int cigarDataOffset = queryNameLength;
-    const unsigned int seqDataOffset   = cigarDataOffset + (numCigarOperations * 4);
-    const unsigned int qualDataOffset  = seqDataOffset + (querySequenceLength+1)/2;
-    const unsigned int tagDataOffset   = qualDataOffset + querySequenceLength;
-    const unsigned int tagDataLen      = dataLength - tagDataOffset;
-
-    // set up destination buffers for character data
-    char* allCharData   = (char*)calloc(sizeof(char), dataLength);
-    uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset);
-    char* seqData       = ((char*)allCharData) + seqDataOffset;
-    char* qualData      = ((char*)allCharData) + qualDataOffset;
-    char* tagData       = ((char*)allCharData) + tagDataOffset;
-
-    // get character data - make sure proper data size was read
+    
+    // store 'all char data' and cigar ops
+    const unsigned int dataLength      = supportData.BlockLength - BAM_CORE_SIZE;
+    const unsigned int cigarDataOffset = supportData.QueryNameLength;
+    
+    char*     allCharData = (char*)calloc(sizeof(char), dataLength);
+    uint32_t* cigarData   = (uint32_t*)(allCharData + cigarDataOffset);
+    
+    // read in character data - make sure proper data size was read
     if ( mBGZF.Read(allCharData, dataLength) != (signed int)dataLength) { return false; }
     else {
-
-        bytesRead += dataLength;
-
-        // clear out any previous string data
-        bAlignment.Name.clear();
-        bAlignment.QueryBases.clear();
-        bAlignment.Qualities.clear();
-        bAlignment.AlignedBases.clear();
+     
+        // store alignment name and length
+        bAlignment.Name.assign((const char*)(allCharData));
+        bAlignment.Length = supportData.QuerySequenceLength;
+      
+        // store remaining 'allCharData' in supportData structure
+        supportData.AllCharData.assign((const char*)allCharData, dataLength);
+        
+        // save CigarOps for BamAlignment
         bAlignment.CigarData.clear();
-        bAlignment.TagData.clear();
-
-        // save name
-        bAlignment.Name = (string)((const char*)(allCharData));
-
-        // save query sequence
-        for (unsigned int i = 0; i < querySequenceLength; ++i) {
-            char singleBase = DNA_LOOKUP[ ( ( seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
-            bAlignment.QueryBases.append( 1, singleBase );
-        }
-
-        // save sequence length
-        bAlignment.Length = bAlignment.QueryBases.length();
-
-        // save qualities, convert from numeric QV to FASTQ character
-        for (unsigned int i = 0; i < querySequenceLength; ++i) {
-            char singleQuality = (char)(qualData[i]+33);
-            bAlignment.Qualities.append( 1, singleQuality );
-        }
+        for (unsigned int i = 0; i < supportData.NumCigarOperations; ++i) {
 
-        // save CIGAR-related data;
-        int k = 0;
-        for (unsigned int i = 0; i < numCigarOperations; ++i) {
-
-            // build CigarOp struct
+            // swap if necessary
+            if ( IsBigEndian ) { SwapEndian_32(cigarData[i]); }
+          
+            // build CigarOp structure
             CigarOp op;
             op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT);
             op.Type   = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ];
 
             // save CigarOp
             bAlignment.CigarData.push_back(op);
-
-            // build AlignedBases string
-            switch (op.Type) {
-                case ('M') :
-                case ('I') : bAlignment.AlignedBases.append( bAlignment.QueryBases.substr(k, op.Length) ); // for 'M', 'I' - write bases
-                case ('S') : k += op.Length;                                                               // for 'S' - skip over query bases
-                             break;
-
-                case ('D') : bAlignment.AlignedBases.append( op.Length, '-' );	// for 'D' - write gap character
-                             break;
-
-                case ('P') : bAlignment.AlignedBases.append( op.Length, '*' );	// for 'P' - write padding character;
-                             break;
-
-                case ('N') : bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in query sequence
-                             //k += op.Length;
-                             break;
-
-                case ('H') : break; 					        // for 'H' - do nothing, move to next op
-
-                default    : printf("ERROR: Invalid Cigar op type\n"); // shouldn't get here
-                             exit(1);
-            }
         }
-
-        // read in the tag data
-        bAlignment.TagData.resize(tagDataLen);
-        memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLen);
     }
 
     free(allCharData);
@@ -822,7 +908,8 @@ void BamReader::BamReaderPrivate::LoadReferenceData(void) {
     // get number of reference sequences
     char buffer[4];
     mBGZF.Read(buffer, 4);
-    const unsigned int numberRefSeqs = BgzfData::UnpackUnsignedInt(buffer);
+    unsigned int numberRefSeqs = BgzfData::UnpackUnsignedInt(buffer);
+    if ( IsBigEndian ) { SwapEndian_32(numberRefSeqs); }
     if (numberRefSeqs == 0) { return; }
     References.reserve((int)numberRefSeqs);
 
@@ -831,13 +918,15 @@ void BamReader::BamReaderPrivate::LoadReferenceData(void) {
 
         // get length of reference name
         mBGZF.Read(buffer, 4);
-        const unsigned int refNameLength = BgzfData::UnpackUnsignedInt(buffer);
+        unsigned int refNameLength = BgzfData::UnpackUnsignedInt(buffer);
+        if ( IsBigEndian ) { SwapEndian_32(refNameLength); }
         char* refName = (char*)calloc(refNameLength, 1);
 
         // get reference name and reference sequence length
         mBGZF.Read(refName, refNameLength);
         mBGZF.Read(buffer, 4);
-        const int refLength = BgzfData::UnpackSignedInt(buffer);
+        int refLength = BgzfData::UnpackSignedInt(buffer);
+        if ( IsBigEndian ) { SwapEndian_32(refLength); }
 
         // store data for reference
         RefData aReference;
@@ -946,17 +1035,6 @@ bool BamReader::BamReaderPrivate::Rewind(void) {
     return mBGZF.Seek(AlignmentsBeginOffset);
 }
 
-// rounds value up to next power-of-2 (used in index building)
-void BamReader::BamReaderPrivate::Roundup32(int& value) {    
-    --value;
-    value |= value >> 1;
-    value |= value >> 2;
-    value |= value >> 4;
-    value |= value >> 8;
-    value |= value >> 16;
-    ++value;
-}
-
 // saves index data to BAM index file (".bai"), returns success/fail
 bool BamReader::BamReaderPrivate::WriteIndex(void) {
 
@@ -972,6 +1050,7 @@ bool BamReader::BamReaderPrivate::WriteIndex(void) {
 
     // write number of reference sequences
     int32_t numReferenceSeqs = Index.size();
+    if ( IsBigEndian ) { SwapEndian_32(numReferenceSeqs); }
     fwrite(&numReferenceSeqs, 4, 1, indexStream);
 
     // iterate over reference sequences
@@ -986,6 +1065,7 @@ bool BamReader::BamReaderPrivate::WriteIndex(void) {
 
         // write number of bins
         int32_t binCount = binMap.size();
+        if ( IsBigEndian ) { SwapEndian_32(binCount); }
         fwrite(&binCount, 4, 1, indexStream);
 
         // iterate over bins
@@ -994,14 +1074,16 @@ bool BamReader::BamReaderPrivate::WriteIndex(void) {
         for ( ; binIter != binEnd; ++binIter ) {
 
             // get bin data (key and chunk vector)
-            const uint32_t& binKey = (*binIter).first;
+            uint32_t binKey = (*binIter).first;
             const ChunkVector& binChunks = (*binIter).second;
 
             // save BAM bin key
+            if ( IsBigEndian ) { SwapEndian_32(binKey); }
             fwrite(&binKey, 4, 1, indexStream);
 
             // save chunk count
             int32_t chunkCount = binChunks.size();
+            if ( IsBigEndian ) { SwapEndian_32(chunkCount); }
             fwrite(&chunkCount, 4, 1, indexStream);
 
             // iterate over chunks
@@ -1011,9 +1093,14 @@ bool BamReader::BamReaderPrivate::WriteIndex(void) {
 
                 // get current chunk data
                 const Chunk& chunk    = (*chunkIter);
-                const uint64_t& start = chunk.Start;
-                const uint64_t& stop  = chunk.Stop;
+                uint64_t start = chunk.Start;
+                uint64_t stop  = chunk.Stop;
 
+                if ( IsBigEndian ) {
+                    SwapEndian_64(start);
+                    SwapEndian_64(stop);
+                }
+                
                 // save chunk offsets
                 fwrite(&start, 8, 1, indexStream);
                 fwrite(&stop,  8, 1, indexStream);
@@ -1022,6 +1109,7 @@ bool BamReader::BamReaderPrivate::WriteIndex(void) {
 
         // write linear offsets size
         int32_t offsetSize = offsets.size();
+        if ( IsBigEndian ) { SwapEndian_32(offsetSize); }
         fwrite(&offsetSize, 4, 1, indexStream);
 
         // iterate over linear offsets
@@ -1030,7 +1118,8 @@ bool BamReader::BamReaderPrivate::WriteIndex(void) {
         for ( ; offsetIter != offsetEnd; ++offsetIter ) {
 
             // write linear offset value
-            const uint64_t& linearOffset = (*offsetIter);
+            uint64_t linearOffset = (*offsetIter);
+            if ( IsBigEndian ) { SwapEndian_64(linearOffset); }
             fwrite(&linearOffset, 8, 1, indexStream);
         }
     }
diff --git a/src/utils/BamTools/BamReader.h b/src/utils/BamTools/BamReader.h
index 6332cde7..8047d7a7 100644
--- a/src/utils/BamTools/BamReader.h
+++ b/src/utils/BamTools/BamReader.h
@@ -1,9 +1,9 @@
 // ***************************************************************************
-// BamReader.h (c) 2009 Derek Barnett, Michael Strömberg
+// BamReader.h (c) 2009 Derek Barnett, Michael Str�mberg
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 8 December 2009 (DB)
+// Last modified: 30 March 2010 (DB)
 // ---------------------------------------------------------------------------
 // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
 // Institute.
@@ -59,11 +59,11 @@ class BamReader {
         // returns SAM header text
         const std::string GetHeaderText(void) const;
         // returns number of reference sequences
-        const int GetReferenceCount(void) const;
+        int GetReferenceCount(void) const;
         // returns vector of reference objects
         const BamTools::RefVector GetReferenceData(void) const;
         // returns reference id (used for BamReader::Jump()) for the given reference name
-        const int GetReferenceID(const std::string& refName) const;
+        int GetReferenceID(const std::string& refName) const;
 
         // ----------------------
         // BAM index operations
diff --git a/src/utils/BamTools/BamWriter.cpp b/src/utils/BamTools/BamWriter.cpp
index d3363807..2cd2742d 100644
--- a/src/utils/BamTools/BamWriter.cpp
+++ b/src/utils/BamTools/BamWriter.cpp
@@ -1,9 +1,9 @@
 // ***************************************************************************
-// BamWriter.cpp (c) 2009 Michael Strömberg, Derek Barnett
+// BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 8 December 2009 (DB)
+// Last modified: 30 March 2010 (DB)
 // ---------------------------------------------------------------------------
 // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
 // Institute.
@@ -21,9 +21,14 @@ struct BamWriter::BamWriterPrivate {
 
     // data members
     BgzfData mBGZF;
-
+    bool IsBigEndian;
+    
+    
     // constructor / destructor
-    BamWriterPrivate(void) { }
+    BamWriterPrivate(void) { 
+      IsBigEndian = SystemIsBigEndian();  
+    }
+    
     ~BamWriterPrivate(void) {
         mBGZF.Close();
     }
@@ -139,27 +144,34 @@ void BamWriter::BamWriterPrivate::EncodeQuerySequence(const string& query, strin
     while(*pQuery) {
 
         switch(*pQuery) {
+            
             case '=':
-                    nucleotideCode = 0;
-                    break;
+                nucleotideCode = 0;
+                break;
+                
             case 'A':
-                    nucleotideCode = 1;
-                    break;
+                nucleotideCode = 1;
+                break;
+            
             case 'C':
-                    nucleotideCode = 2;
-                    break;
+                nucleotideCode = 2;
+                break;
+            
             case 'G':
-                    nucleotideCode = 4;
-                    break;
+                nucleotideCode = 4;
+                break;
+            
             case 'T':
-                    nucleotideCode = 8;
-                    break;
+                nucleotideCode = 8;
+                break;
+            
             case 'N':
-                    nucleotideCode = 15;
-                    break;
+                nucleotideCode = 15;
+                break;
+            
             default:
-                    printf("ERROR: Only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery);
-                    exit(1);
+                printf("ERROR: Only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery);
+                exit(1);
         }
 
         // pack the nucleotide code
@@ -193,7 +205,8 @@ void BamWriter::BamWriterPrivate::Open(const string& filename, const string& sam
     mBGZF.Write(BAM_SIGNATURE, SIGNATURE_LENGTH);
 
     // write the SAM header text length
-    const unsigned int samHeaderLen = samHeader.size();
+    uint32_t samHeaderLen = samHeader.size();
+    if ( IsBigEndian ) { SwapEndian_32(samHeaderLen); }
     mBGZF.Write((char*)&samHeaderLen, BT_SIZEOF_INT);
 
     // write the SAM header text
@@ -202,7 +215,8 @@ void BamWriter::BamWriterPrivate::Open(const string& filename, const string& sam
     }
 
     // write the number of reference sequences
-    const unsigned int numReferenceSequences = referenceSequences.size();
+    uint32_t numReferenceSequences = referenceSequences.size();
+    if ( IsBigEndian ) { SwapEndian_32(numReferenceSequences); }
     mBGZF.Write((char*)&numReferenceSequences, BT_SIZEOF_INT);
 
     // =============================
@@ -213,14 +227,17 @@ void BamWriter::BamWriterPrivate::Open(const string& filename, const string& sam
     for(rsIter = referenceSequences.begin(); rsIter != referenceSequences.end(); rsIter++) {
 
         // write the reference sequence name length
-        const unsigned int referenceSequenceNameLen = rsIter->RefName.size() + 1;
+        uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;
+        if ( IsBigEndian ) { SwapEndian_32(referenceSequenceNameLen); }
         mBGZF.Write((char*)&referenceSequenceNameLen, BT_SIZEOF_INT);
 
         // write the reference sequence name
         mBGZF.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
 
         // write the reference sequence length
-        mBGZF.Write((char*)&rsIter->RefLength, BT_SIZEOF_INT);
+        int32_t referenceLength = rsIter->RefLength;
+        if ( IsBigEndian ) { SwapEndian_32(referenceLength); }
+        mBGZF.Write((char*)&referenceLength, BT_SIZEOF_INT);
     }
 }
 
@@ -243,10 +260,17 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
     const unsigned int encodedQueryLen = encodedQuery.size();
 
     // store the tag data length
+    // -------------------------------------------
+    // Modified: 3-25-2010 DWB
+    // Contributed: ARQ
+    // Fixed: "off by one" error when parsing tags in files produced by BamWriter
     const unsigned int tagDataLength = al.TagData.size();
-
+    // original line: 
+    // const unsigned int tagDataLength = al.TagData.size() + 1;     
+    // -------------------------------------------
+    
     // assign the BAM core data
-    unsigned int buffer[8];
+    uint32_t buffer[8];
     buffer[0] = al.RefID;
     buffer[1] = al.Position;
     buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | nameLen;
@@ -257,18 +281,40 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
     buffer[7] = al.InsertSize;
 
     // write the block size
-    const unsigned int dataBlockSize = nameLen + packedCigarLen + encodedQueryLen + queryLen + tagDataLength;
-    const unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize;
+    unsigned int dataBlockSize = nameLen + packedCigarLen + encodedQueryLen + queryLen + tagDataLength;
+    unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize;
+    if ( IsBigEndian ) { SwapEndian_32(blockSize); }
     mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);
 
     // write the BAM core
+    if ( IsBigEndian ) { 
+        for ( int i = 0; i < 8; ++i ) { 
+            SwapEndian_32(buffer[i]); 
+        } 
+    }
     mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);
 
     // write the query name
     mBGZF.Write(al.Name.c_str(), nameLen);
 
     // write the packed cigar
-    mBGZF.Write(packedCigar.data(), packedCigarLen);
+    if ( IsBigEndian ) {
+      
+        char* cigarData = (char*)calloc(sizeof(char), packedCigarLen);
+        memcpy(cigarData, packedCigar.data(), packedCigarLen);
+        
+        for (unsigned int i = 0; i < packedCigarLen; ++i) {
+            if ( IsBigEndian ) { 
+              SwapEndian_32p(&cigarData[i]); 
+            }
+        }
+        
+        mBGZF.Write(cigarData, packedCigarLen);
+        free(cigarData);
+        
+    } else { 
+        mBGZF.Write(packedCigar.data(), packedCigarLen);
+    }
 
     // write the encoded query sequence
     mBGZF.Write(encodedQuery.data(), encodedQueryLen);
@@ -280,5 +326,57 @@ void BamWriter::BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
     mBGZF.Write(pBaseQualities, queryLen);
 
     // write the read group tag
-    mBGZF.Write(al.TagData.data(), tagDataLength);
+    if ( IsBigEndian ) {
+      
+        char* tagData = (char*)calloc(sizeof(char), tagDataLength);
+        memcpy(tagData, al.TagData.data(), tagDataLength);
+      
+        int i = 0;
+        while ( (unsigned int)i < tagDataLength ) {
+            
+            i += 2;                                 // skip tag type (e.g. "RG", "NM", etc)
+            uint8_t type = toupper(tagData[i]);     // lower & upper case letters have same meaning 
+            ++i;                                    // skip value type
+            
+            switch (type) {
+              
+                case('A') :
+                case('C') : 
+                    ++i;
+                    break;
+                            
+                case('S') : 
+                    SwapEndian_16p(&tagData[i]); 
+                    i+=2; // sizeof(uint16_t)
+                    break;
+                            
+                case('F') :
+                case('I') : 
+                    SwapEndian_32p(&tagData[i]);
+                    i+=4; // sizeof(uint32_t)
+                    break;
+                            
+                case('D') : 
+                    SwapEndian_64p(&tagData[i]);
+                    i+=8; // sizeof(uint64_t)
+                    break;
+                            
+                case('H') :
+                case('Z') : 
+                    while (tagData[i]) { ++i; }
+                    ++i; // increment one more for null terminator
+                    break;
+                            
+                default : 
+                    printf("ERROR: Invalid tag value type\n"); // shouldn't get here
+                    free(tagData);
+                    exit(1); 
+            }
+        }
+        
+        mBGZF.Write(tagData, tagDataLength);
+        free(tagData);
+    } else {
+        mBGZF.Write(al.TagData.data(), tagDataLength);
+    }
 }
-- 
GitLab