From 5ba347e100fc48df4db79f4e388203b42b67c6a9 Mon Sep 17 00:00:00 2001 From: Neil Kindlon <nek3d@d-128-109-67.bootp.Virginia.EDU> Date: Fri, 7 Nov 2014 13:56:31 -0500 Subject: [PATCH] Fixed bug 100, allowing scientific notation for coords, not returning INT_MIN for invalid numbers --- src/utils/general/ParseTools.cpp | 29 ++++++++++++++++++++++++++--- test/merge/expFormat.bed | 1 + test/merge/test-merge.sh | 10 ++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 test/merge/expFormat.bed diff --git a/src/utils/general/ParseTools.cpp b/src/utils/general/ParseTools.cpp index 04782a71..e85065ad 100644 --- a/src/utils/general/ParseTools.cpp +++ b/src/utils/general/ParseTools.cpp @@ -2,6 +2,9 @@ #include <climits> #include <cctype> #include <cstring> +#include <cstdio> +#include <cstdlib> +#include <sstream> //This functions recognizes only numbers with digits, plus sign, minus sign, decimal point, e, or E. Hexadecimal and pointers not currently supported. bool isNumeric(const QuickString &str) { @@ -19,12 +22,26 @@ int str2chrPos(const QuickString &str) { } int str2chrPos(const char *str, size_t ulen) { + if (ulen == 0) { ulen = strlen(str); } + + //first test for exponents / scientific notation + bool hasExponent = false; + for (size_t i=0; i < ulen; i++) { + if (str[i] == 'e' || str[i] == 'E') { + std::istringstream ss(str); + double retVal; + ss >> retVal; + return (int)retVal; + } + } + int len=(int)ulen; if (len < 1 || len > 10) { - return INT_MIN; //can't do more than 9 digits and a minus sign + fprintf(stderr, "***** ERROR: too many digits/characters for integer conversion in string %s. Exiting...\n", str); + exit(1); } register int sum=0; @@ -39,9 +56,15 @@ int str2chrPos(const char *str, size_t ulen) { for (int i=startPos; i < len; i++) { char currChar = str[i]; + if (currChar == 'e' || currChar == 'E') { + //default to atoi for scientific notation + return atoi(str); + } if (!isdigit(currChar)) { - return INT_MIN; + fprintf(stderr, "***** ERROR: illegal character '%c' found in integer conversion of string %s. Exiting...\n", currChar, str); + exit(1); } + int dig = currChar - 48; //ascii code for zero. int power = len -i -1; @@ -77,7 +100,7 @@ int str2chrPos(const char *str, size_t ulen) { sum += dig *1000000000; break; default: - return INT_MIN; + return 0; break; } } diff --git a/test/merge/expFormat.bed b/test/merge/expFormat.bed new file mode 100644 index 00000000..d7bd5182 --- /dev/null +++ b/test/merge/expFormat.bed @@ -0,0 +1 @@ +chr1 8e02 830 diff --git a/test/merge/test-merge.sh b/test/merge/test-merge.sh index 586940bf..c6c80495 100644 --- a/test/merge/test-merge.sh +++ b/test/merge/test-merge.sh @@ -518,3 +518,13 @@ chr1 30 100" > exp $BT merge -i a.bed -iobuf 8192 > obs check exp obs rm exp obs + +########################################################### +# Test that scientific notation is allowed for coordinates +########################################################### +echo " merge.t43...\c" +echo \ +"chr1 800 830" > exp +$BT merge -i expFormat.bed > obs +check exp obs +rm obs exp -- GitLab