From 6078f7bba7b6d7d6055967e3970cf5cfd3a4a5c1 Mon Sep 17 00:00:00 2001
From: Aaron <aaronquinlan@gmail.com>
Date: Wed, 5 Jan 2011 22:10:28 -0500
Subject: [PATCH] Allowed 8 or 9 column GFF/GTF files.  Many thanks to Michael
 Hoffman.

---
 src/utils/bedFile/bedFile.h | 106 +++++++++++++++++++++++++++---------
 1 file changed, 79 insertions(+), 27 deletions(-)

diff --git a/src/utils/bedFile/bedFile.h b/src/utils/bedFile/bedFile.h
index ddf29d3d..8f26a024 100644
--- a/src/utils/bedFile/bedFile.h
+++ b/src/utils/bedFile/bedFile.h
@@ -460,7 +460,7 @@ private:
                         if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
                     }
                     // it's GFF, assuming columns columns 4 and 5 are numeric and we have 9 fields total.
-                    else if ((numFields == 9) && isInteger(lineVector[3]) && isInteger(lineVector[4])) {
+                    else if ((numFields >= 8) && isInteger(lineVector[3]) && isInteger(lineVector[4])) {
                         setGff(true);
                         setFileType(GFF_FILETYPE);
                         setBedType(numFields);       // we now expect numFields columns in each line
@@ -612,7 +612,7 @@ private:
     template <typename T>
     inline bool parseGffLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
         if (numFields == this->bedType) {
-            if (this->bedType == 9 && _isGff) {
+            if (this->bedType >= 8 && _isGff) {
                 bed.chrom = lineVector[0];
                 // substract 1 to force the start to be BED-style
                 bed.start  = atoi(lineVector[3].c_str()) - 1;
@@ -622,11 +622,13 @@ private:
                 bed.strand = lineVector[6].c_str();
                 bed.otherFields.push_back(lineVector[1]);  // add GFF "source". unused in BED
                 bed.otherFields.push_back(lineVector[7]);  // add GFF "fname". unused in BED
-                bed.otherFields.push_back(lineVector[8]);  // add GFF "group". unused in BED
+                // handle the optional 9th field.
+                if (this->bedType == 9)
+                    bed.otherFields.push_back(lineVector[8]);  // add GFF "group". unused in BED
             }
             else {
                 cerr << "Error: unexpected number of fields at line: " << lineNum <<
-                        ".  Verify that your files are TAB-delimited and that your GFF file has 9 fields.  Exiting..." << endl;
+                        ".  Verify that your files are TAB-delimited and that your GFF file has 8 or 9 fields.  Exiting..." << endl;
                 exit(1);
             }
             if (bed.start > bed.end) {
@@ -647,8 +649,8 @@ private:
             cerr << "Differing number of GFF fields encountered at line: " << lineNum << ".  Exiting..." << endl;
             exit(1);
         }
-        else if ((numFields < 9) && (numFields != 0)) {
-            cerr << "TAB delimited GFF file with 9 fields is required at line: "<< lineNum << ".  Exiting..." << endl;
+        else if ((numFields < 8) && (numFields != 0)) {
+            cerr << "TAB delimited GFF file with 8 or 9 fields is required at line: "<< lineNum << ".  Exiting..." << endl;
             exit(1);
         }
         return false;
@@ -704,11 +706,21 @@ public:
             }
         }
         // GFF
-        else if (this->bedType == 9) {
-            printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                             bed.name.c_str(), bed.start+1, bed.end,
-                                                             bed.score.c_str(), bed.strand.c_str(),
-                                                             bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
+        else if (_isGff == true) {
+            // "GFF-8"
+            if (this->bedType == 8) {
+                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
+                                                                 bed.name.c_str(), bed.start+1, bed.end,
+                                                                 bed.score.c_str(), bed.strand.c_str(),
+                                                                 bed.otherFields[1].c_str());
+            }
+            // "GFF-9"
+            else if (this->bedType == 9) {
+                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
+                                                                 bed.name.c_str(), bed.start+1, bed.end,
+                                                                 bed.score.c_str(), bed.strand.c_str(),
+                                                                 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
+            }
         }
     }
 
@@ -762,12 +774,22 @@ public:
             }
             printf("\n");
         }
-        //GFF
-        else if (this->bedType == 9) {
-            printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                             bed.name.c_str(), bed.start+1, bed.end,
-                                                             bed.score.c_str(), bed.strand.c_str(),
-                                                             bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
+        // GFF
+        else if (_isGff == true) {
+            // "GFF-8"
+            if (this->bedType == 8) {
+                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
+                                                                 bed.name.c_str(), bed.start+1, bed.end,
+                                                                 bed.score.c_str(), bed.strand.c_str(),
+                                                                 bed.otherFields[1].c_str());
+            }
+            // "GFF-9"
+            else if (this->bedType == 9) {
+                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
+                                                                 bed.name.c_str(), bed.start+1, bed.end,
+                                                                 bed.score.c_str(), bed.strand.c_str(),
+                                                                 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
+            }
         }
     }
 
@@ -821,11 +843,21 @@ public:
             }
         }
         // GFF
-        else if (this->bedType == 9) {
-            printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
+        else if (_isGff == true) {
+            // "GFF-8"
+            if (this->bedType == 8) {
+                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
+                                                             bed.name.c_str(), start+1, end,
+                                                             bed.score.c_str(), bed.strand.c_str(),
+                                                             bed.otherFields[1].c_str());
+            }
+            // "GFF-9"
+            else if (this->bedType == 9) {
+                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
                                                              bed.name.c_str(), start+1, end,
                                                              bed.score.c_str(), bed.strand.c_str(),
                                                              bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
+            }
         }
     }
 
@@ -881,11 +913,21 @@ public:
             printf("\n");
         }
         // GFF
-        else if (this->bedType == 9) {  // add 1 to the start for GFF
-            printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
+        else if (_isGff == true) {
+            // "GFF-9"
+            if (this->bedType == 8) {
+                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
+                                                             bed.name.c_str(), start+1, end,
+                                                             bed.score.c_str(), bed.strand.c_str(),
+                                                             bed.otherFields[1].c_str());
+            }
+            // "GFF-8"
+            else if (this->bedType == 9) {
+                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
                                                              bed.name.c_str(), start+1, end,
                                                              bed.score.c_str(), bed.strand.c_str(),
                                                              bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
+            }
         }
     }
 
@@ -895,7 +937,7 @@ public:
     */
     void reportNullBedTab() {
 
-        if (_isGff == false) {
+        if (_isGff == false && _isVcf == false) {
             if (this->bedType == 3) {
                 printf (".\t-1\t-1\t");
             }
@@ -915,8 +957,13 @@ public:
                 }
             }
         }
-        else if (this->bedType == 9) {
-            printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\t");
+        else if (_isGff == true && _isVcf == false) {
+            if (this->bedType == 8) {
+                printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t");
+            }
+            else if (this->bedType == 9) {
+                printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\t");
+            }
         }
     }
 
@@ -926,7 +973,7 @@ public:
     */
     void reportNullBedNewLine() {
 
-        if (_isGff == false) {
+        if (_isGff == false && _isVcf == false) {
             if (this->bedType == 3) {
                 printf (".\t-1\t-1\n");
             }
@@ -947,8 +994,13 @@ public:
                 printf("\n");
             }
         }
-        else if (this->bedType == 9) {
-            printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\n");
+        else if (_isGff == true && _isVcf == false) {
+            if (this->bedType == 8) {
+                printf (".\t.\t.\t-1\t-1\t-1\t.\t.\n");
+            }
+            else if (this->bedType == 9) {
+                printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\n");
+            }
         }
     }
 
-- 
GitLab