checkIsotopes finished

ccf689c8 · ermueller · aa602447 · ccf689c8 · ccf689c8 · ccf689c8
Commit ccf689c8 authored 9 years ago by ermueller
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -35,6 +35,7 @@ Suggests:
    ontoCAT,
    RUnit
 Collate:
+	'alternateAnalyze.R'
    'createMassBank.R'
    'formulaCalculator.R'
    'leCsvAccess.R'
@@ -54,5 +55,6 @@ Collate:
    'validateMassBank.R'
    'tools.R'
    'msmsRead.R'
+	'Isotopic_Annotation.R'
 	'zzz.R'
    
--- a/NAMESPACE
+++ b/NAMESPACE
-# Generated by roxygen2 (4.1.0): do not edit by hand
+# Generated by roxygen2 (4.1.1): do not edit by hand

 S3method(c,msmsWSspecs)
 export(CTS.externalIdSubset)
@@ -13,9 +13,11 @@ export(addProperty)
 export(aggregateSpectra)
 export(analyzeMsMs)
 export(analyzeMsMs.formula)
+export(analyzeMsMs.formula.optimized)
 export(analyzeMsMs.intensity)
 export(annotator.default)
 export(archiveResults)
+export(checkIsotopes)
 export(checkSpectra)
 export(cleanElnoise)
 export(combineMultiplicities)
@@ -40,6 +42,7 @@ export(findMsMsHR.direct)
 export(findMsMsHR.mass)
 export(findMsMsHR.ticms2)
 export(findMsMsHR.ticms2.d)
+export(findMsMsHRperxcms)
 export(findMsMsHRperxcms.direct)
 export(findMz)
 export(findMz.formula)

--- a/R/Isotopic_Annotation.R
+++ b/R/Isotopic_Annotation.R
+#' Checks for isotopes in a \code{msmsWorkspace}
+#' 
+#' @param w A \code{msmsWorkspace} to work with.
+#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' @param intensity_cutoff The cutoff (as an absolute intensity value) under which isotopic peaks shouldn't be checked for or accepted as valid.
+#' @param intensity_precision The difference that is accepted between the calculated and observed intensity of a possible isotopic peak. Further details down below.
+#' @param conflict Either "isotopic"(Peak formulas are always chosen if they fit the requirements for an isotopic peak)
+#' 		  or "strict"(Peaks are only marked as isotopic when there hasn't been a formula assigned before.)
+#' @param isolationWindow The isolation window in Da
+#' @param evalMode Currently no function yet, but planned
+#' @param plotSpectrum A boolean specifiying whether the spectrumshould be plotted
+#' @param settings Options to be used for processing. Defaults to the options loaded via
+#' 			\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
+#' @details text describing parameter inputs in more detail.
+#' \itemize{
+#'  \item{\code{intensity_precision}}{This parameter determines how strict the intensity values should adhere to the calculated intensity in relation to the parent peak.
+#'  Options for this parameter are \code{"none"}, where the intensity is irrelevant, \code{"low"}, which has an error margin of 70\% and \code{"high"}, where the error margin 
+#'  is set to 35\%. The recommended setting is \code{"low"}, but can be changed to adjust to the intensity precision of the mass spectrometer.}
+#  \item{\code{evalMode}}{This parameter sets what should be done after the isotopic check. The option "add" adds failpeaks if they are isotopic peaks of previously matched peaks.
+#  "check" checks matched peaks with formulas for isotopes and removes them if no isotopic peaks have been found for all formulas. The formula is also
+#  adjusted, if the one with matching isotopes doesn't have the lowest dppm. "complete" does both.}
+#' }
+#' @return The \code{msmsWorkspace} with .
+#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
+#' @author Erik Mueller, UFZ
+#' @export
+checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 5000, intensity_precision = "low", conflict = "dppm", 
+							isolationWindow = 4, evalMode = "complete", plotSpectrum = TRUE, settings = getOption("RMassBank")){
+
+	# Load library and data
+	require(enviPat)
+	data("isotopes")
+	
+	
+	if(!(intensity_precision %in% c("none","low","high"))){
+		stop('intensity_precision must be specified as either "none", "low" or "high"')
+	}
+	
+	switch(intensity_precision, 
+		none={
+			precisionVal <- Inf
+		},
+		low={
+			precisionVal <- 0.7
+		},
+		high={
+			precisionVal <- 0.35
+		}
+	)
+	
+	# Load filtersettings
+	filterSettings = settings$filterSettings
+	
+	# Assign formula additions according to code
+	if(mode == "pH") {
+		allowed_additions <- "H"
+		mode.charge <- 1
+	} else if(mode == "pNa") {
+		allowed_additions <- "Na"
+		mode.charge <- 1
+	} else if(mode == "pM") {
+		allowed_additions <- ""
+		mode.charge <- 1
+	} else if(mode == "mM") {
+		allowed_additions <- ""
+		mode.charge <- -1
+	} else if(mode == "mH") {
+		allowed_additions <- "H-1"
+		mode.charge <- -1
+	} else if(mode == "mFA") {
+		allowed_additions <- "C2H3O2"
+		mode.charge <- -1
+	} else if(mode == "pNH4") {
+		allowed_additions <- "NH4"
+		mode.charge <- 1
+	} else{
+		stop("mode = \"", mode, "\" not defined")
+	}
+	
+	if(!(evalMode %in% c("complete"))){
+	    stop('evalMode must currently be specified as "complete"')
+	}
+	
+	evalMode <- c("add","check")
+	
+	
+	# "default" isotopes (i.e. those with the highest abundance)
+	defIsotopes <- c("107Ag", "27Al", "40Ar", "75As", "197Au", "11B", "138Ba", "9Be", "209Bi",
+	"79Br", "12C", "40Ca", "114Cd", "140Ce", "35Cl", "59Co", "52Cr", "133Cs", "63Cu", "164Dy",
+	"166Er", "153Eu", "19F", "56Fe", "69Ga", "158Gd", "74Ge", "1H", "4He", "180Hf", "202Hg",
+	"165Ho", "127I", "115In", "193Ir", "39K", "84Kr", "139La", "7Li", "175Lu", "24Mg", "55Mn",
+	"98Mo", "14N", "23Na", "93Nb", "142Nd", "20Ne", "58Ni", "16O", "192Os", "31P", "231Pa",
+	"208Pb", "106Pd", "141Pr", "195Pt", "85Rb", "187Re", "103Rh", "102Ru", "32S", "121Sb",
+	"45Sc", "80Se", "28Si", "152Sm", "120Sn", "86Sr", "88Sr", "181Ta", "159Tb", "130Te",
+	"232Th", "48Ti", "205Tl", "169Tm", "238U", "51V", "184W", "132Xe", "89Y", "174Yb",
+	"64Zn", "90Zr")
+
+	
+	# Get the ppm limit from the settings
+	ppmlimit <- 2.25 * filterSettings$ppmFine
+	
+	# Get the cutoff from the settings
+	cut <- filterSettings$fineCut
+	cut_ratio <- filterSettings$fineCutRatio
+
+	# Extract matched and unmatched peaks from the aggregated peaks
+	matchedPeaks <- peaksMatched(w)
+	unmatchedPeaks <- peaksUnmatched(w)
+	
+	wEnv <- environment()
+	
+	# lapply over all runs
+	lapply(w@spectra, function(spec){
+		
+		# Find parent formula and cpdID
+		parent_formula <- add.formula(spec@formula, allowed_additions)
+		id <- as.numeric(spec@id)
+		
+
+		# lapply over all extracted MS2 spectra
+		lapply(spec@children, function(msmsdata){
+			
+			# Extract currently relevant peaks
+			currentMPeaks <- matchedPeaks[(matchedPeaks$cpdID == id) & (matchedPeaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
+			currentUPeaks <- unmatchedPeaks[(unmatchedPeaks$cpdID == id) & (unmatchedPeaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
+			
+			rownames(currentMPeaks) <- 1:nrow(currentMPeaks)
+			rownames(currentUPeaks) <- 1:nrow(currentUPeaks)
+			
+			# Generate isotopic patterns of the matched peaks
+			# Sort pattern by abundance
+			isoMPatterns <- lapply(currentMPeaks$formula, function(formula){
+				# Find pattern
+				pattern <- as.data.frame(isopattern(formula, isotopes = isotopes)[[1]])
+				mass <- findMz.formula(formula,"")$mzCenter
+				
+				# Find index of nonisotopic molecule
+				mainIndex <- which.min(abs(pattern[,"m/z"]-mass))
+				
+				isoCols <- colnames(pattern)[3:ncol(pattern)]
+				
+				pattern[,"abundance"] <- round(pattern[,"abundance"] * (100/pattern[mainIndex,"abundance"]),3)
+				defAtoms <- which(isoCols %in% defIsotopes)
+				
+				# Indices of isotope notation string to find atomname
+				
+				atomIndices <- regexpr("[A-Z][a-z]?",isoCols)
+				
+				
+				newnames <- vector()
+				
+				for(i in 1:length(atomIndices)){
+					if(i %in% defAtoms){
+						newnames[i] <- substr(isoCols[i],atomIndices[i],nchar(isoCols[i]))
+					}else{
+						lhs <- paste0('^(.{', atomIndices[i]-1, '})(.+)$')
+						newnames[i] <- gsub(lhs, '\\1]\\2', isoCols[i])
+						newnames[i] <- gsub('^(.{0})(.+)$', '\\1[\\2', newnames[i])
+					}
+				}
+				
+				pattern$formula <- apply(pattern,1,function(p){
+					paste0(sapply(3:ncol(pattern),function(isoIndex){
+						if(p[isoIndex] == 0){ 
+							return("")
+						}
+						return(paste0(newnames[isoIndex-2],p[isoIndex]))
+					}),collapse="")
+				})
+				pattern <- pattern[order(pattern[,"abundance"],decreasing = T),][-mainIndex,]
+			})
+			
+			names(isoMPatterns) <- currentMPeaks$formula
+			
+			
+			# Order patterns by abundance and make sure that the normal abundance is at 100%
+			
+			# Calculate the expected intensities according to the abundance
+			# See which expected isotope peaks have an intensity higher than the cutoff
+			
+			for(foundAnnotation in 1:nrow(currentMPeaks)){
+				intensities <- vector()
+				
+				for(pattern in 1:nrow(isoMPatterns[[foundAnnotation]])){
+					intensities[pattern]  <- currentMPeaks$int[foundAnnotation] * isoMPatterns[[foundAnnotation]]$abundance[pattern]/100
+				}
+				
+				roundedInt <- round(intensities,digits=-2)
+				keepIndex <- which(roundedInt >= intensity_cutoff)
+				isoMPatterns[[foundAnnotation]] <- isoMPatterns[[foundAnnotation]][keepIndex,,drop=FALSE]
+				if(nrow(isoMPatterns[[foundAnnotation]])){
+					isoMPatterns[[foundAnnotation]]$minintensity <- roundedInt[keepIndex] - roundedInt[keepIndex] * precisionVal
+					isoMPatterns[[foundAnnotation]]$maxintensity <- roundedInt[keepIndex] + roundedInt[keepIndex] * precisionVal
+					# Calculate the expected mz range of the isotope peak
+					isoMPatterns[[foundAnnotation]] <- cbind(isoMPatterns[[foundAnnotation]],t(sapply(isoMPatterns[[foundAnnotation]][,"m/z"], ppm, dppm=ppmlimit,l=T)))
+				}
+			}
+			
+			# Which isotope patterns still have theoretical intensities above the cutoff?
+			peaksToCheck <- which(as.logical(sapply(isoMPatterns,nrow)))
+			
+			# Now, look for isotopic patterns in unmatched peaks with all specified parameters
+			if("add" %in% evalMode){
+				# Which peaks have no correct formula annotation as of now?
+				currentUPeaks <- currentUPeaks[which(is.na(currentUPeaks$dppm) | abs(currentUPeaks$dppmBest) > ppmlimit/2.25),]
+				
+				# If there are any peaks without annotation:
+				if(nrow(currentUPeaks)){
+					j <- 1
+					UPList <- list()
+					# Iterate over all patterns that have relevant intensities
+					for(i in peaksToCheck){
+						UPList[[j]] <- list()
+						# Iterate over every isotopic permutation that is still in the pattern
+						for(k in 1:nrow(isoMPatterns[[i]])){
+							# Find peaks that fit the specified intensities and m/z
+							pIndex <- which(currentUPeaks[,"mzFound"] < isoMPatterns[[i]][k,"1"] & currentUPeaks[,"mzFound"] > isoMPatterns[[i]][k,"2"]
+													& currentUPeaks[,"intensity"] < isoMPatterns[[i]][k,"maxintensity"] & currentUPeaks[,"intensity"] > isoMPatterns[[i]][k,"minintensity"]
+												)
+							# Note these Peaks
+							UPList[[j]][[k]] <- currentUPeaks[pIndex,]
+							# If there are any: Change parameters in the aggregated matrix
+							if(nrow(UPList[[j]][[k]])){ 
+								UPList[[j]][[k]]$dppm <- (currentUPeaks[pIndex,]$mzFound / isoMPatterns[[i]][k,"m/z"] - 1) * 1e6
+								UPList[[j]][[k]]$mzCalc <- isoMPatterns[[i]][k,"m/z"]
+								UPList[[j]][[k]]$formula <- isoMPatterns[[i]][k,"formula"]
+								UPList[[j]][[k]]$matchedReanalysis <- NA
+								UPList[[j]][[k]]$filterOK <- TRUE
+								UPList[[j]][[k]]$good <- TRUE
+								UPList[[j]][[k]]$dppmBest <- UPList[[j]][[k]]$dppm
+								UPList[[j]][[k]]$formulaCount <- 1
+							}
+						}
+						j <- j + 1
+					}
+				} else{
+					# If there are no peaks, fake an empty dataset
+					UPList <- list(list(data.frame()),list(data.frame()))
+				}
+				
+				
+				additionMatrix <- do.call(rbind, unlist(UPList,recursive=FALSE))
+				if(nrow(additionMatrix)){
+					# Add all the peaks that could be annotated as isotopic
+					wEnv$w@aggregated[additionMatrix$index,] <- additionMatrix
+				}
+				
+				# If conflict is "strict", end here
+				if(conflict == "strict"){
+					if(plotSpectrum){
+						plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
+						if(nrow(additionMatrix)){
+							points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
+						}
+					}
+					return(0)
+				}
+			}
+			
+			
+			# Now check the matched peaks for the patterns
+			j <- 1
+			MPList <- list()
+			for(i in peaksToCheck){
+				MPList[[j]] <- list()
+				for(k in 1:nrow(isoMPatterns[[i]])){
+					pIndex <- which(currentMPeaks[,"mzFound"] < isoMPatterns[[i]][k,"1"] & currentMPeaks[,"mzFound"] > isoMPatterns[[i]][k,"2"]
+											& currentMPeaks[,"intensity"] < isoMPatterns[[i]][k,"maxintensity"] & currentMPeaks[,"intensity"] > isoMPatterns[[i]][k,"minintensity"]
+											& currentMPeaks[,"filterOK"])
+					MPList[[j]][[k]] <- currentMPeaks[pIndex,]
+
+					if(nrow(MPList[[j]][[k]])){ 
+						MPList[[j]][[k]]$dppm <- (currentMPeaks[pIndex,]$mzFound / isoMPatterns[[i]][k,"m/z"] - 1) * 1e6
+						MPList[[j]][[k]]$mzCalc <- isoMPatterns[[i]][k,"m/z"]
+						MPList[[j]][[k]]$formula <- isoMPatterns[[i]][k,"formula"]
+						MPList[[j]][[k]]$matchedReanalysis <- NA
+						MPList[[j]][[k]]$filterOK <- TRUE
+						MPList[[j]][[k]]$good <- TRUE
+						MPList[[j]][[k]]$dppmBest <- MPList[[j]][[k]]$dppm
+						MPList[[j]][[k]]$formulaCount <- 1
+					}
+				}
+				j <- j + 1
+			}
+			
+			# Peakindices of peaksToCheck where no isotopes have been found in Unmatched Peaks
+			noIsoPeaksUindex <- which(!sapply(UPList, function(x) any(sapply(x,nrow))))
+			
+			# Peakindices of peaksToCheck where isotopes have been found in Unmatched Peaks
+			IsoPeaksUindex <- setdiff(1:length(peaksToCheck),noIsoPeaksUindex)
+			
+			# Peakindices of peaksToCheck where no isotopes have been found in Matched Peaks
+			noIsoPeaksMindex <- which(!sapply(MPList, function(x) any(sapply(x,nrow))))
+			
+			# Peakindices of peaksToCheck where isotopes have been found in Matched Peaks
+			IsoPeaksMindex <- setdiff(1:length(peaksToCheck),noIsoPeaksMindex)
+			
+			if("add" %in% evalMode && conflict=="isotopic"){
+			    correctionMatrix <- do.call(rbind, unlist(MPList,recursive=FALSE))
+			    correctionMatrix <- correctionMatrix[order(abs(correctionMatrix$dppm)),]
+			    for(ind in unique(correctionMatrix$index)){
+			        if(length(which(correctionMatrix$index==ind))>1){
+			            correctionMatrix <- correctionMatrix[-which(correctionMatrix$index==ind)[-1],]
+			        }
+			    }
+			    
+			    if(nrow(correctionMatrix)){
+			        
+			        # Peaks that are changed but also seem to have isotopes themselves
+			        confPeaksIndex <- which(correctionMatrix$index %in% currentMPeaks[peaksToCheck[IsoPeaksMindex],"index"])
+			        conflictedMatrix <- correctionMatrix[confPeaksIndex,,drop=FALSE]
+			        
+			        if(length(confPeaksIndex)){
+			            correctionMatrix <- correctionMatrix[-confPeaksIndex,]
+			        }
+			        
+			        
+			        if(nrow(correctionMatrix)){
+			            wEnv$w@aggregated[correctionMatrix$index,] <- correctionMatrix
+			        }
+			    }
+			    
+			    if(!("check" %in% evalMode)){
+			        if(plotSpectrum){
+			            plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
+			            if(nrow(additionMatrix)){
+			                points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
+			            }
+			            if(nrow(correctionMatrix)){
+			                points(correctionMatrix$mzFound, correctionMatrix$intensity,type="h", col="yellow", lwd=3)
+			            }
+			            if(nrow(conflictedMatrix)){
+			                points(conflictedMatrix$mzFound, conflictedMatrix$intensity,type="h", col="red", lwd=3)
+			            }
+			        }
+			        return(0)
+			    }
+			}
+			
+			if("check" %in% evalMode){
+			    # Matched Peaks where no isotopes have been found
+			    noIsoPeaksMatrix <- currentMPeaks[peaksToCheck[noIsoPeaksMindex[which(noIsoPeaksMindex %in% noIsoPeaksUindex)]],]
+			    
+			    # Matched Peaks where no isotopes have been found and which haven't been marked as isotopic themselves
+			    noIsoPeaksMatrix <- noIsoPeaksMatrix[which(!grepl("[",wEnv$w@aggregated$formula[noIsoPeaksMatrix$index],fixed=TRUE)),]
+			    if(plotSpectrum){
+			        plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
+			    }
+			    
+			    if(nrow(noIsoPeaksMatrix)){
+			        wEnv$w@aggregated[noIsoPeaksMatrix$index,]$good <- FALSE
+			        wEnv$w@aggregated[noIsoPeaksMatrix$index,]$filterOK <- FALSE
+			        wEnv$w@aggregated[noIsoPeaksMatrix$index,]$matchedReanalysis <- FALSE
+			        if(plotSpectrum){
+			            points(noIsoPeaksMatrix$mzFound, noIsoPeaksMatrix$intensity, type="h", col="orange", lwd=3)
+			        }
+			    }
+			    
+			    if("add" %in% evalMode && conflict=="isotopic" && plotSpectrum){
+			        if(nrow(additionMatrix)){
+			            points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
+			        }
+			        if(nrow(correctionMatrix)){
+			            points(correctionMatrix$mzFound, correctionMatrix$intensity,type="h", col="yellow", lwd=3)
+			        }
+			        if(nrow(conflictedMatrix)){
+			            points(conflictedMatrix$mzFound, conflictedMatrix$intensity,type="h", col="red", lwd=3)
+			        }
+			    }
+			}
+			return(0)
+			
+		})
+	})
+	return(w)
+}
--- a/R/alternateAnalyze.R
+++ b/R/alternateAnalyze.R
+fragDataIndexing <- function(fragData){
+  index <- vector()
+  fragDataWholeMass <- floor(fragData$mass*100)
+  
+  k <- 0
+  
+  for(i in 1:length(fragDataWholeMass)){
+    while(k <= fragDataWholeMass[i]){
+      index[k] <- i
+      k <- k + 1
+    }
+  }
+
+  return(index)
+}
+
+is.sub.formula <- function(formula, targetAtomList){
+  atomList <- formulastring.to.list(formula)
+  if(!all(names(atomList) %in% names(targetAtomList))){
+    return(FALSE)
+  }
+  
+  for(atom in names(atomList)){
+    if(atomList[[atom]] > targetAtomList[[atom]]){
+      return(FALSE)
+    }
+  }
+  return(TRUE)
+}
+
+newStep2WorkFlow <- function(w, mode="pH", 
+                             confirmMode=FALSE, progressbar = "progressBarHook", 
+                             settings = getOption("RMassBank"), analyzeMethod="formula", fragdataFile = NA){
+  ##Load the fragment data (locally or from RMassBank package)
+  if(is.na(fragdataFile)){
+    fragData <- nfragData
+  } else{
+    fragData <- read.csv(fragdataFile,colClasses = c("character","numeric"))
+  }
+  
+  ##Progress bar
+  nLen <- length(w@files)
+  nProg <- 0
+  message("msmsWorkflow: Step 2. First analysis pre recalibration")
+  pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+  
+  ##Index the fragment data (for time reasons, "which" is very slow for large matrices)
+  fragDataIndex <- fragDataIndexing(fragData)
+  
+  
+  
+  w@analyzedSpecs <- lapply(w@specs, function(spec) {
+    #print(spec$id)
+    s <- analyzeMsMs.optimized(spec, mode=mode, detail=TRUE, run="preliminary",
+                     filterSettings = settings$filterSettings,
+                     spectraList = settings$spectraList, method = analyzeMethod, fragData = fragData, 
+                     fragDataIndex = fragDataIndex)
+    # Progress:
+    nProg <<- nProg + 1
+    pb <- do.call(progressbar, list(object=pb, value= nProg))
+    
+    return(s)
+  })
+  
+  for(f in w@files)
+    w@analyzedSpecs[[basename(as.character(f))]]$name <- basename(as.character(f))
+  
+  suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
+  return(w)
+}
+
+
+analyzeMsMs.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+                        filterSettings = getOption("RMassBank")$filterSettings,
+                        spectraList = getOption("RMassBank")$spectraList, method="formula", fragData,
+                        fragDataIndex)
+{
+  .checkMbSettings()
+  
+  if(msmsPeaks$foundOK == FALSE)
+    return(list(foundOK = FALSE, id=msmsPeaks$id))
+  
+  if(method=="formula")
+  {
+    return(analyzeMsMs.formula.optimized(msmsPeaks, mode, detail, run, filterSettings,
+                               spectraList, fragData, fragDataIndex
+    ))
+  }
+  else if(method == "intensity")
+  {
+    return(analyzeMsMs.intensity(msmsPeaks, mode, detail, run, filterSettings,
+                                 spectraList
+    ))
+  }
+}
+
+
+#' @export
+analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
+                                filterSettings = getOption("RMassBank")$filterSettings,
+                                spectraList = getOption("RMassBank")$spectraList, fragData, fragDataIndex)
+{
+  cut <- 0
+  cut_ratio <- 0
+  if(run=="preliminary")
+  {
+    mzColname <- "mz"
+    filterMode <- "coarse"
+    cut <- filterSettings$prelimCut
+    if(is.na(cut))
+    {
+      if(mode %in% c("pH", "pM", "pNa", "pNH4"))
+        cut <- 1e4
+      else if(mode %in% c("mH", "mFA","mM"))
+        cut <- 0
+      else stop(paste("The ionization mode", mode, "is unknown."))
+    }
+    cutRatio <- filterSettings$prelimCutRatio
+  } else{
+    mzColname <- "mzRecal"
+    filterMode <- "fine"
+    cut <- filterSettings$fineCut
+    cut_ratio <- filterSettings$fineCutRatio
+    if(is.na(cut)) cut <- 0
+  }
+  
+  # find whole spectrum of parent peak, so we have reasonable data to feed into
+  # MolgenMsMs
+  parentSpectrum <- msmsPeaks$parentPeak
+  
+  
+  # Check whether the spectra can be fitted to the spectra list correctly!
+  if(nrow(msmsPeaks$childHeaders) != length(spectraList))
+  {
+    warning(paste0(
+      "The spectra count of the substance ", msmsPeaks$id, " (", nrow(msmsPeaks$childHeaders), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)."
+    ))
+    return(list(specOK=FALSE))
+    
+  }
+  
+  # here follows the Rcdk analysis
+  #------------------------------------
+  parentPeaks <- data.frame(mzFound=msmsPeaks$mz$mzCenter, 
+                            formula=msmsPeaks$formula,
+                            dppm=0,
+                            x1=0,x2=0,x3=0)
+  
+  # define the adduct additions
+  if(mode == "pH") {
+    allowed_additions <- "H"
+    mode.charge <- 1
+  } else if(mode == "pNa") {
+    allowed_additions <- "Na"
+    mode.charge <- 1
+  } else if(mode == "pM") {
+    allowed_additions <- ""
+    mode.charge <- 1
+  } else if(mode == "mM") {
+    allowed_additions <- ""
+    mode.charge <- -1
+  } else if(mode == "mH") {
+    allowed_additions <- "H-1"
+    mode.charge <- -1
+  } else if(mode == "mFA") {
+    allowed_additions <- "C2H3O2"
+    mode.charge <- -1
+  } else if(mode == "pNH4") {
+    allowed_additions <- "NH4"
+    mode.charge <- 1
+  } else{
+    stop("mode = \"", mode, "\" not defined")
+  }
+  
+  
+  # the ppm range is two-sided here.
+  # The range is slightly expanded because dppm calculation of
+  # generate.formula starts from empirical mass, but dppm cal-
+  # culation of the evaluation starts from theoretical mass.
+  # So we don't miss the points on 'the border'.
+  
+  if(run=="preliminary")
+    ppmlimit <- 2 * max(filterSettings$ppmLowMass, filterSettings$ppmHighMass)
+  else
+    ppmlimit <- 2.25 * filterSettings$ppmFine
+  
+  parent_formula <- add.formula(msmsPeaks$formula, allowed_additions)
+  dbe_parent <- dbe(parent_formula)
+  # check whether the formula is valid, i.e. has no negative or zero element numbers.
+  #print(parent_formula)
+  if(!is.valid.formula(parent_formula))
+    return(list(specOK=FALSE))
+  limits <- to.limits.rcdk(parent_formula)
+  
+  parentAtomList <- formulastring.to.list(parent_formula)
+  
+  rAtoms <- which(c(!grepl("Br",parent_formula),!grepl("N",parent_formula), !grepl("S",parent_formula),!grepl("Cl",parent_formula)))
+  
+  if(length(rAtoms)){
+    newfragData <- fragData[-which(apply(occurrenceMatrix[,rAtoms,drop=FALSE], 1, any)),]
+    newfragDataIndex <- fragDataIndexing(newfragData)
+  } else{
+    newfragData <- fragData
+    newfragDataIndex <- fragDataIndex
+  }
+  
+  
+  # On each spectrum the following function analyzeTandemShot will be applied.
+  # It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
+  # filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
+  # < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
+  # with insufficient match accuracy or no match.
+  
+  analyzeTandemShot <- function(shot_mat)
+  {
+    shot <- as.data.frame(shot_mat)
+    shot_orig <- shot
+    # Filter out low intensity peaks:
+    shot_lo <- shot[(shot$int < cut) | (shot$int < max(shot$int) * cut_ratio),]
+    shot <- shot[(shot$int >= cut) & (shot$int > max(shot$int) * cut_ratio),]
+    shot_full <- shot
+    
+    # Is there still anything left?
+    if(nrow(shot)==0)
+      return(list(specOK=FALSE))
+    
+    # Filter out satellite peaks:
+    shot <- filterPeakSatellites(shot, filterSettings)
+    shot_satellite_n <- setdiff(row.names(shot_full), row.names(shot))
+    shot_satellite <- shot_full[shot_satellite_n,]
+    
+    # Is there still anything left?
+    if(nrow(shot)==0)
+      return(list(specOK=FALSE))
+    
+    if(max(shot$int) < as.numeric(filterSettings$specOkLimit))
+      return(list(specOK=FALSE))
+    # Crop to 4 digits (necessary because of the recalibrated values)
+    shot[,mzColname] <- round(shot[,mzColname], 5)
+    
+    
+    # check whether formula can be fitted into precalculated fragment data
+    usePrecalcData <- is.sub.formula(parent_formula,list("C"=50,"H"=70,"N"=50,"O"=50,"S"=10,"Cl"=10,"Br"=10))
+    
+    
+    if(usePrecalcData){
+      
+      # if yes, split the peaks into 2 sets: Those which can be identified using the
+      # fragment data, and those which can (by maximum fragment Data m/z - 0.5)
+      
+      precalcIndex <- which(shot[,mzColname] < (max(newfragData$mass)-0.5))
+      smallShots <- shot[precalcIndex,]
+      bigShots <- shot[-precalcIndex,]
+      
+      # for smaller peaks use the precalculated fragment data
+      if(nrow(smallShots)){
+        system.time(peakmatrixSmall <- lapply(smallShots[,mzColname],function(mass){
+          mass.calc <- mass + mode.charge * .emass
+          maxminMass <- ppm(mass.calc, ppmlimit, l=TRUE)
+          # retrieve only possibly relevant rows of the fragment Data (use the index)
+          indices <- newfragDataIndex[floor(maxminMass[2]*100)]:newfragDataIndex[ceiling(maxminMass[1]*100)]
+          
+          fragmentedFragmentData <- newfragData[indices,]
+          
+          # narrow it down further using the ppm
+          fragmentedFragmentData <- fragmentedFragmentData[which(fragmentedFragmentData$mass > maxminMass[2] & fragmentedFragmentData $mass < maxminMass[1]),]
+          
+          # return nothing if the narrowed down data is empty at this point
+          if(!nrow(fragmentedFragmentData)){
+            return(t(c(mzFound=as.numeric(as.character(mass)),
+                       formula=NA, mzCalc=NA)))
+          }
+          
+          
+          # find out if the narrowed down fragments have a sub-formula of the parentformula
+          # return the indexes of relevant formulas
+          fragmentedFragmentData <- fragmentedFragmentData[which(sapply(fragmentedFragmentData$formula, function(currentFormula) is.sub.formula(currentFormula,parentAtomList))),]
+          
+          # return nothing if the narrowed down data is empty at this point
+          if(!nrow(fragmentedFragmentData)){
+            return(t(c(mzFound=as.numeric(as.character(mass)),
+                       formula=NA, mzCalc=NA)))
+          }
+          
+          return(t(sapply(1:nrow(fragmentedFragmentData), function(f)
+          {
+            mzCalc <- fragmentedFragmentData$mass[f] - mode.charge * .emass 
+            c(mzFound=as.numeric(as.character(mass)),
+              formula=fragmentedFragmentData$formula[f], 
+              mzCalc=mzCalc)
+          })))
+          
+        }))
+      } else{
+        peakmatrixSmall <- list()
+      }
+      
+      # for bigger peaks use generate.formula from rcdk
+      if(nrow(bigShots)){
+        system.time(peakmatrixBig <- lapply(bigShots[,mzColname], function(mass){
+          # Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
+          # finally back-correct calculated masses for the charge
+          mass.calc <- mass + mode.charge * .emass
+          peakformula <- tryCatch(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE), 
+                                                   limits, charge=0), error=function(e) NA)
+          #peakformula <- tryCatch( 
+          #  generate.formula(mass, 
+          #                   ppm(mass, ppmlimit, p=TRUE),
+          #                   limits, charge=1),
+          #error= function(e) list())
+          if(!is.list(peakformula)){
+            return(t(c(mzFound=as.numeric(as.character(mass)),
+                       formula=NA, mzCalc=NA)))
+          }else{
+            return(t(sapply(peakformula, function(f)
+            {
+              mzCalc <- f@mass - mode.charge * .emass 
+              c(mzFound=as.numeric(as.character(mass)),
+                formula=f@string, 
+                mzCalc=mzCalc)
+            })))
+          }
+        }))
+      } else{
+        peakmatrixBig <- list()
+      }
+      
+      peakmatrix <- c(peakmatrixSmall,peakmatrixBig)
+      
+    } else{
+        system.time(peakmatrix <- lapply(shot[,mzColname], function(mass){
+          # Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
+          # finally back-correct calculated masses for the charge
+          mass.calc <- mass + mode.charge * .emass
+          peakformula <- tryCatch(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE), 
+                                                   limits, charge=0), error=function(e) NA)
+          #peakformula <- tryCatch( 
+          #  generate.formula(mass, 
+          #                   ppm(mass, ppmlimit, p=TRUE),
+          #                   limits, charge=1),
+          #error= function(e) list())
+          if(!is.list(peakformula)){
+            return(t(c(mzFound=as.numeric(as.character(mass)),
+                       formula=NA, mzCalc=NA)))
+          }else
+          {
+            return(t(sapply(peakformula, function(f)
+            {
+              mzCalc <- f@mass - mode.charge * .emass 
+              c(mzFound=mass,
+                formula=f@string, 
+                mzCalc=mzCalc)
+            })))
+          }
+        }))
+    }
+    
+    childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
+    childPeaks$mzFound <- as.numeric(as.character(childPeaks$mzFound))
+    childPeaks$formula <- as.character(childPeaks$formula)
+    childPeaks$mzCalc <- as.numeric(as.character(childPeaks$mzCalc))
+    childPeaks$dppm <- (childPeaks$mzFound / childPeaks$mzCalc - 1) * 1e6
+    # delete the NA data out again, because MolgenMsMs doesn't have them
+    # here and they will be re-added later
+    # (this is just left like this for "historical" reasons)
+    childPeaks <- childPeaks[!is.na(childPeaks$formula),]
+    # check if a peak was recognized (here for the first time,
+    # otherwise the next command would fail)
+    if(nrow(childPeaks)==0)
+      return(list(specOK=FALSE))
+    
+    # now apply the rule-based filters to get rid of total junk:
+    # dbe >= -0.5, dbe excess over mother cpd < 3
+    childPeaks$dbe <- unlist(lapply(childPeaks$formula, dbe))
+    #iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
+    childPeaks <- childPeaks[childPeaks$dbe >= filterSettings$dbeMinLimit,] 
+    # & dbe < dbe_parent + 3)
+    
+    # check if a peak was recognized
+    if(nrow(childPeaks)==0)
+      return(list(specOK=FALSE))
+    
+    # trim mz to 5 digits
+    shot[,mzColname] <- round(shot[,mzColname], 5)
+    
+    childPeaksInt <- merge(childPeaks, shot, by.x = "mzFound", by.y = mzColname, all.x = TRUE, all.y = FALSE )
+    # find the best ppm value
+    bestPpm <- aggregate(childPeaksInt$dppm, list(childPeaksInt$mzFound),
+                         function(dppm) dppm[[which.min(abs(dppm))]])
+    colnames(bestPpm) <- c("mzFound", "dppmBest")
+    childPeaksInt <- merge(childPeaksInt, bestPpm, by="mzFound", all.x=TRUE)
+    # count formulas found per mass
+    countFormulasTab <- xtabs( ~formula + mzFound, data=childPeaksInt)
+    countFormulas <- colSums(countFormulasTab)
+    childPeaksInt$formulaCount <- countFormulas[as.character(childPeaksInt$mzFound)]
+    # filter results
+    childPeaksFilt <- filterLowaccResults(childPeaksInt, filterMode, filterSettings)
+    childPeaksGood <- childPeaksFilt[["TRUE"]]
+    childPeaksBad <- childPeaksFilt[["FALSE"]]
+    # count formulas within new limits
+    # (the results of the "old" count stay in childPeaksInt and are returned
+    # in $childPeaks)
+    if(!is.null(childPeaksGood))
+    {
+      countFormulasTab <- xtabs( ~formula + mzFound, data=childPeaksGood)
+      countFormulas <- colSums(countFormulasTab)
+      childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mzFound)]
+    }
+    
+    childPeaksUnmatched <- merge(childPeaks, shot, by.x = "mzFound", by.y = mzColname, 
+                                 all.x = TRUE, all.y = TRUE )
+    childPeaksUnmatched$dppmBest <- NA
+    childPeaksUnmatched$formulaCount <- 0
+    childPeaksUnmatched$good <- FALSE
+    childPeaksUnmatched <- childPeaksUnmatched[is.na(childPeaksUnmatched$mzCalc),]
+    
+    # return list:
+    rl <- list(
+      specOK = !is.null(childPeaksGood),
+      parent = parentPeaks,
+      childFilt = childPeaksGood,
+      childRaw=shot_orig
+    )
+    # if "detail" is set to TRUE, return more detailed results including
+    # all the deleted peaks and the stages when they were culled
+    if(detail)
+    {
+      rl$childRawLow = shot_lo
+      rl$childRawSatellite = shot_satellite
+      rl$childRawOK = shot
+      rl$child =childPeaksInt
+      rl$childBad = childPeaksBad
+      rl$childUnmatched = childPeaksUnmatched
+    }
+    return(rl)
+  }
+  shots <- lapply(msmsPeaks$peaks, analyzeTandemShot)
+  #browser()
+  shots <- mapply(function(shot, scan, info)
+  {
+    shot$scan <- scan
+    shot$info <- info
+    shot$header <- msmsPeaks$childHeaders[as.character(scan),]
+    return(shot)
+  }, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
+  
+  mzranges <- t(sapply(shots, function(p) {
+    if(!is.null(p$childRaw)){
+      return(range(p$childRaw[,mzColname]))
+    } else {
+      return(c(NA,NA))
+    }
+  }))
+  
+  mzmin <- min(mzranges[,1], na.rm=TRUE)
+  mzmax <- max(mzranges[,2], na.rm=TRUE)
+  
+  return(list(
+    msmsdata=shots,
+    mzrange=c(mzmin, mzmax),
+    id=msmsPeaks$id,
+    mode=mode,
+    parentHeader = msmsPeaks$parentHeader,
+    parentMs = msmsPeaks$parentPeak,
+    formula = msmsPeaks$formula,
+    foundOK = TRUE))
+}
+
+findPeaksInFracData <- function(mass, fracData, fracDataIndex){
+  
+}
\ No newline at end of file
--- a/R/sysData.rda
+++ b/R/sysData.rda
--- a/man/checkIsotopes.Rd
+++ b/man/checkIsotopes.Rd
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/Isotopic_Annotation.R
+\name{checkIsotopes}
+\alias{checkIsotopes}
+\title{Checks for isotopes in a \code{msmsWorkspace}}
+\usage{
+checkIsotopes(w, mode = "pH", intensity_cutoff = 5000,
+  intensity_precision = "low", conflict = "dppm", isolationWindow = 4,
+  evalMode = "complete", plotSpectrum = TRUE,
+  settings = getOption("RMassBank"))
+}
+\arguments{
+\item{w}{A \code{msmsWorkspace} to work with.}
+
+\item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions
+([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).}
+
+\item{intensity_cutoff}{The cutoff (as an absolute intensity value) under which isotopic peaks shouldn't be checked for or accepted as valid.}
+
+\item{intensity_precision}{The difference that is accepted between the calculated and observed intensity of a possible isotopic peak. Further details down below.}
+
+\item{conflict}{Either "isotopic"(Peak formulas are always chosen if they fit the requirements for an isotopic peak)
+or "strict"(Peaks are only marked as isotopic when there hasn't been a formula assigned before.)}
+
+\item{isolationWindow}{The isolation window in Da}
+
+\item{evalMode}{Currently no function yet, but planned}
+
+\item{plotSpectrum}{A boolean specifiying whether the spectrumshould be plotted}
+
+\item{settings}{Options to be used for processing. Defaults to the options loaded via
+\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.}
+}
+\value{
+The \code{msmsWorkspace} with .
+}
+\description{
+Checks for isotopes in a \code{msmsWorkspace}
+}
+\details{
+text describing parameter inputs in more detail.
+\itemize{
+ \item{\code{intensity_precision}}{This parameter determines how strict the intensity values should adhere to the calculated intensity in relation to the parent peak.
+ Options for this parameter are \code{"none"}, where the intensity is irrelevant, \code{"low"}, which has an error margin of 70\% and \code{"high"}, where the error margin
+ is set to 35\%. The recommended setting is \code{"low"}, but can be changed to adjust to the intensity precision of the mass spectrometer.}
+}
+}
+\author{
+Michael Stravs, Eawag <michael.stravs@eawag.ch>
+
+Erik Mueller, UFZ
+}
+