From 4813964287ca704010bb70716ce66132ff632eee Mon Sep 17 00:00:00 2001
From: ermueller <erik@east.de>
Date: Wed, 2 Sep 2015 16:15:29 +0200
Subject: [PATCH] Reqorked isotopic formula annotation, formulas now follow
 Hill system

---
 R/Isotopic_Annotation.R | 53 ++++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/R/Isotopic_Annotation.R b/R/Isotopic_Annotation.R
index 544f97f..8d08545 100644
--- a/R/Isotopic_Annotation.R
+++ b/R/Isotopic_Annotation.R
@@ -7,7 +7,7 @@
 #' @param intensity_precision The difference that is accepted between the calculated and observed intensity of a possible isotopic peak. Further details down below.
 #' @param conflict Either "isotopic"(Peak formulas are always chosen if they fit the requirements for an isotopic peak)
 #' 		  or "strict"(Peaks are only marked as isotopic when there hasn't been a formula assigned before.)
-#' @param isolationWindow The isolation window in Da
+#' @param isolationWindow The width of the isolation window in Da 
 #' @param evalMode Currently no function yet, but planned
 #' @param plotSpectrum A boolean specifiying whether the spectrumshould be plotted
 #' @param settings Options to be used for processing. Defaults to the options loaded via
@@ -21,7 +21,7 @@
 #  "check" checks matched peaks with formulas for isotopes and removes them if no isotopic peaks have been found for all formulas. The formula is also
 #  adjusted, if the one with matching isotopes doesn't have the lowest dppm. "complete" does both.}
 #' }
-#' @return The \code{msmsWorkspace} with .
+#' @return The \code{msmsWorkspace} with annotated isolation peaks
 #' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
 #' @author Erik Mueller, UFZ
 #' @export
@@ -135,36 +135,57 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 5000, intensity_pre
 				pattern <- as.data.frame(isopattern(formula, isotopes = isotopes)[[1]])
 				mass <- findMz.formula(formula,"")$mzCenter
 				
-				# Find index of nonisotopic molecule
+				# Find index of nonisotopic molecule and
+				# normalize abundance that nonisotopic molecule has always "1"
 				mainIndex <- which.min(abs(pattern[,"m/z"]-mass))
+				pattern[,"abundance"] <- round(pattern[,"abundance"] * (100/pattern[mainIndex,"abundance"]),3)
 				
+				# Find all isotope atom names (only in colnames of patterns, sadly)
 				isoCols <- colnames(pattern)[3:ncol(pattern)]
 				
-				pattern[,"abundance"] <- round(pattern[,"abundance"] * (100/pattern[mainIndex,"abundance"]),3)
-				defAtoms <- which(isoCols %in% defIsotopes)
+				# Order the formulas to be in Hill system:
+				# First the Cs, then the Hs, then lexicographical
+				# First: Split isotope names so that there only are atoms
+				# Find position of C and H
 				
-				# Indices of isotope notation string to find atomname
+				splitNames <- gsub('^([0-9]+)(.+)$', '\\2', isoCols)
+				CHPos <- c(which(splitNames == "C"),which(splitNames == "H"))
 				
-				atomIndices <- regexpr("[A-Z][a-z]?",isoCols)
+				# Account for special case: No Cs or Hs
+				if(length(CHPos)){
+					# If there are Cs and Hs, overwrite the positions so the always get sorted to the front
+					splitNames[CHPos] <- ""
+				}
 				
+				# Default isotopes are always first in the colnames, so no need to order internally between isotopes
+				atomOrder <- order(splitNames)
 				
-				newnames <- vector()
+				# If there are Cs and Hs, the order must be preserved
+				if(length(CHPos)){
+					atomOrder[1:length(CHPos)] <- CHPos
+				}
+				# else it is already ordered
 				
-				for(i in 1:length(atomIndices)){
-					if(i %in% defAtoms){
-						newnames[i] <- substr(isoCols[i],atomIndices[i],nchar(isoCols[i]))
+				# Mark new names for formula creation.
+				newnames <- unname(sapply(isoCols, function(currentCol){
+					if(currentCol %in% defIsotopes){
+						# "Default" isotope (no isotope mass in formula, e.g. "C")
+						return(gsub('^(.{0})([0-9]+)(.+)$', '\\3', currentCol))
 					}else{
-						lhs <- paste0('^(.{', atomIndices[i]-1, '})(.+)$')
-						newnames[i] <- gsub(lhs, '\\1]\\2', isoCols[i])
-						newnames[i] <- gsub('^(.{0})(.+)$', '\\1[\\2', newnames[i])
+						# Other isotopes (isotope mass in formula, e.g. "[13]C")
+						return(gsub('^(.{0})([0-9]+)(.+)$', '\\1[\\2]\\3', currentCol))
 					}
-				}
+				}))
 				
+				# Generate the formula for every pattern
 				pattern$formula <- apply(pattern,1,function(p){
-					paste0(sapply(3:ncol(pattern),function(isoIndex){
+					paste0(sapply(atomOrder+2,function(isoIndex){
 						if(p[isoIndex] == 0){ 
 							return("")
 						}
+						if(p[isoIndex] == 1){
+							return(newnames[isoIndex-2])
+						}
 						return(paste0(newnames[isoIndex-2],p[isoIndex]))
 					}),collapse="")
 				})
-- 
GitLab