From 5ba347e100fc48df4db79f4e388203b42b67c6a9 Mon Sep 17 00:00:00 2001
From: Neil Kindlon <nek3d@d-128-109-67.bootp.Virginia.EDU>
Date: Fri, 7 Nov 2014 13:56:31 -0500
Subject: [PATCH] Fixed bug 100, allowing scientific notation for coords, not
 returning INT_MIN for invalid numbers

---
 src/utils/general/ParseTools.cpp | 29 ++++++++++++++++++++++++++---
 test/merge/expFormat.bed         |  1 +
 test/merge/test-merge.sh         | 10 ++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 test/merge/expFormat.bed

diff --git a/src/utils/general/ParseTools.cpp b/src/utils/general/ParseTools.cpp
index 04782a71..e85065ad 100644
--- a/src/utils/general/ParseTools.cpp
+++ b/src/utils/general/ParseTools.cpp
@@ -2,6 +2,9 @@
 #include <climits>
 #include <cctype>
 #include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <sstream>
 
 //This functions recognizes only numbers with digits, plus sign, minus sign, decimal point, e, or E. Hexadecimal and pointers not currently supported.
 bool isNumeric(const QuickString &str) {
@@ -19,12 +22,26 @@ int str2chrPos(const QuickString &str) {
 }
 
 int str2chrPos(const char *str, size_t ulen) {
+
 	if (ulen == 0) {
 		ulen = strlen(str);
 	}
+
+	//first test for exponents / scientific notation
+	bool hasExponent = false;
+	for (size_t i=0; i < ulen; i++) {
+		if (str[i] == 'e' || str[i] == 'E') {
+			std::istringstream ss(str);
+			double retVal;
+			ss >> retVal;
+			return (int)retVal;
+		}
+	}
+
 	int len=(int)ulen;
 	if (len < 1 || len > 10) {
-		return INT_MIN; //can't do more than 9 digits and a minus sign
+		fprintf(stderr, "***** ERROR: too many digits/characters for integer conversion in string %s. Exiting...\n", str);
+		exit(1);
 	}
 
 	register int sum=0;
@@ -39,9 +56,15 @@ int str2chrPos(const char *str, size_t ulen) {
 
 	for (int i=startPos; i < len; i++) {
 		char currChar = str[i];
+		if (currChar == 'e' || currChar == 'E') {
+			//default to atoi for scientific notation
+			return atoi(str);
+		}
 		if (!isdigit(currChar)) {
-			return INT_MIN;
+			fprintf(stderr, "***** ERROR: illegal character '%c' found in integer conversion of string %s. Exiting...\n", currChar, str);
+			exit(1);
 		}
+
 		int dig = currChar - 48; //ascii code for zero.
 		int power = len -i -1;
 
@@ -77,7 +100,7 @@ int str2chrPos(const char *str, size_t ulen) {
 			sum += dig *1000000000;
 			break;
 		default:
-			return INT_MIN;
+			return 0;
 			break;
 		}
 	}
diff --git a/test/merge/expFormat.bed b/test/merge/expFormat.bed
new file mode 100644
index 00000000..d7bd5182
--- /dev/null
+++ b/test/merge/expFormat.bed
@@ -0,0 +1 @@
+chr1	8e02	830
diff --git a/test/merge/test-merge.sh b/test/merge/test-merge.sh
index 586940bf..c6c80495 100644
--- a/test/merge/test-merge.sh
+++ b/test/merge/test-merge.sh
@@ -518,3 +518,13 @@ chr1	30	100" > exp
 $BT merge -i a.bed -iobuf 8192 > obs
 check exp obs
 rm exp obs
+
+###########################################################
+#  Test that scientific notation is allowed for coordinates
+###########################################################
+echo "    merge.t43...\c"
+echo \
+"chr1	800	830" > exp
+$BT merge -i expFormat.bed > obs
+check exp obs
+rm obs exp
-- 
GitLab