Skip to content
Snippets Groups Projects
Commit ccf689c8 authored by ermueller's avatar ermueller
Browse files

checkIsotopes finished

parent aa602447
No related branches found
No related tags found
No related merge requests found
......@@ -35,6 +35,7 @@ Suggests:
ontoCAT,
RUnit
Collate:
'alternateAnalyze.R'
'createMassBank.R'
'formulaCalculator.R'
'leCsvAccess.R'
......@@ -54,5 +55,6 @@ Collate:
'validateMassBank.R'
'tools.R'
'msmsRead.R'
'Isotopic_Annotation.R'
'zzz.R'
# Generated by roxygen2 (4.1.0): do not edit by hand
# Generated by roxygen2 (4.1.1): do not edit by hand
S3method(c,msmsWSspecs)
export(CTS.externalIdSubset)
......@@ -13,9 +13,11 @@ export(addProperty)
export(aggregateSpectra)
export(analyzeMsMs)
export(analyzeMsMs.formula)
export(analyzeMsMs.formula.optimized)
export(analyzeMsMs.intensity)
export(annotator.default)
export(archiveResults)
export(checkIsotopes)
export(checkSpectra)
export(cleanElnoise)
export(combineMultiplicities)
......@@ -40,6 +42,7 @@ export(findMsMsHR.direct)
export(findMsMsHR.mass)
export(findMsMsHR.ticms2)
export(findMsMsHR.ticms2.d)
export(findMsMsHRperxcms)
export(findMsMsHRperxcms.direct)
export(findMz)
export(findMz.formula)
......
#' Checks for isotopes in a \code{msmsWorkspace}
#'
#' @param w A \code{msmsWorkspace} to work with.
#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions
#' ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
#' @param intensity_cutoff The cutoff (as an absolute intensity value) under which isotopic peaks shouldn't be checked for or accepted as valid.
#' @param intensity_precision The difference that is accepted between the calculated and observed intensity of a possible isotopic peak. Further details down below.
#' @param conflict Either "isotopic"(Peak formulas are always chosen if they fit the requirements for an isotopic peak)
#' or "strict"(Peaks are only marked as isotopic when there hasn't been a formula assigned before.)
#' @param isolationWindow The isolation window in Da
#' @param evalMode Currently no function yet, but planned
#' @param plotSpectrum A boolean specifiying whether the spectrumshould be plotted
#' @param settings Options to be used for processing. Defaults to the options loaded via
#' \code{\link{loadRmbSettings}} et al. Refer to there for specific settings.
#' @details text describing parameter inputs in more detail.
#' \itemize{
#' \item{\code{intensity_precision}}{This parameter determines how strict the intensity values should adhere to the calculated intensity in relation to the parent peak.
#' Options for this parameter are \code{"none"}, where the intensity is irrelevant, \code{"low"}, which has an error margin of 70\% and \code{"high"}, where the error margin
#' is set to 35\%. The recommended setting is \code{"low"}, but can be changed to adjust to the intensity precision of the mass spectrometer.}
# \item{\code{evalMode}}{This parameter sets what should be done after the isotopic check. The option "add" adds failpeaks if they are isotopic peaks of previously matched peaks.
# "check" checks matched peaks with formulas for isotopes and removes them if no isotopic peaks have been found for all formulas. The formula is also
# adjusted, if the one with matching isotopes doesn't have the lowest dppm. "complete" does both.}
#' }
#' @return The \code{msmsWorkspace} with .
#' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
#' @author Erik Mueller, UFZ
#' @export
checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 5000, intensity_precision = "low", conflict = "dppm",
isolationWindow = 4, evalMode = "complete", plotSpectrum = TRUE, settings = getOption("RMassBank")){
# Load library and data
require(enviPat)
data("isotopes")
if(!(intensity_precision %in% c("none","low","high"))){
stop('intensity_precision must be specified as either "none", "low" or "high"')
}
switch(intensity_precision,
none={
precisionVal <- Inf
},
low={
precisionVal <- 0.7
},
high={
precisionVal <- 0.35
}
)
# Load filtersettings
filterSettings = settings$filterSettings
# Assign formula additions according to code
if(mode == "pH") {
allowed_additions <- "H"
mode.charge <- 1
} else if(mode == "pNa") {
allowed_additions <- "Na"
mode.charge <- 1
} else if(mode == "pM") {
allowed_additions <- ""
mode.charge <- 1
} else if(mode == "mM") {
allowed_additions <- ""
mode.charge <- -1
} else if(mode == "mH") {
allowed_additions <- "H-1"
mode.charge <- -1
} else if(mode == "mFA") {
allowed_additions <- "C2H3O2"
mode.charge <- -1
} else if(mode == "pNH4") {
allowed_additions <- "NH4"
mode.charge <- 1
} else{
stop("mode = \"", mode, "\" not defined")
}
if(!(evalMode %in% c("complete"))){
stop('evalMode must currently be specified as "complete"')
}
evalMode <- c("add","check")
# "default" isotopes (i.e. those with the highest abundance)
defIsotopes <- c("107Ag", "27Al", "40Ar", "75As", "197Au", "11B", "138Ba", "9Be", "209Bi",
"79Br", "12C", "40Ca", "114Cd", "140Ce", "35Cl", "59Co", "52Cr", "133Cs", "63Cu", "164Dy",
"166Er", "153Eu", "19F", "56Fe", "69Ga", "158Gd", "74Ge", "1H", "4He", "180Hf", "202Hg",
"165Ho", "127I", "115In", "193Ir", "39K", "84Kr", "139La", "7Li", "175Lu", "24Mg", "55Mn",
"98Mo", "14N", "23Na", "93Nb", "142Nd", "20Ne", "58Ni", "16O", "192Os", "31P", "231Pa",
"208Pb", "106Pd", "141Pr", "195Pt", "85Rb", "187Re", "103Rh", "102Ru", "32S", "121Sb",
"45Sc", "80Se", "28Si", "152Sm", "120Sn", "86Sr", "88Sr", "181Ta", "159Tb", "130Te",
"232Th", "48Ti", "205Tl", "169Tm", "238U", "51V", "184W", "132Xe", "89Y", "174Yb",
"64Zn", "90Zr")
# Get the ppm limit from the settings
ppmlimit <- 2.25 * filterSettings$ppmFine
# Get the cutoff from the settings
cut <- filterSettings$fineCut
cut_ratio <- filterSettings$fineCutRatio
# Extract matched and unmatched peaks from the aggregated peaks
matchedPeaks <- peaksMatched(w)
unmatchedPeaks <- peaksUnmatched(w)
wEnv <- environment()
# lapply over all runs
lapply(w@spectra, function(spec){
# Find parent formula and cpdID
parent_formula <- add.formula(spec@formula, allowed_additions)
id <- as.numeric(spec@id)
# lapply over all extracted MS2 spectra
lapply(spec@children, function(msmsdata){
# Extract currently relevant peaks
currentMPeaks <- matchedPeaks[(matchedPeaks$cpdID == id) & (matchedPeaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
currentUPeaks <- unmatchedPeaks[(unmatchedPeaks$cpdID == id) & (unmatchedPeaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
rownames(currentMPeaks) <- 1:nrow(currentMPeaks)
rownames(currentUPeaks) <- 1:nrow(currentUPeaks)
# Generate isotopic patterns of the matched peaks
# Sort pattern by abundance
isoMPatterns <- lapply(currentMPeaks$formula, function(formula){
# Find pattern
pattern <- as.data.frame(isopattern(formula, isotopes = isotopes)[[1]])
mass <- findMz.formula(formula,"")$mzCenter
# Find index of nonisotopic molecule
mainIndex <- which.min(abs(pattern[,"m/z"]-mass))
isoCols <- colnames(pattern)[3:ncol(pattern)]
pattern[,"abundance"] <- round(pattern[,"abundance"] * (100/pattern[mainIndex,"abundance"]),3)
defAtoms <- which(isoCols %in% defIsotopes)
# Indices of isotope notation string to find atomname
atomIndices <- regexpr("[A-Z][a-z]?",isoCols)
newnames <- vector()
for(i in 1:length(atomIndices)){
if(i %in% defAtoms){
newnames[i] <- substr(isoCols[i],atomIndices[i],nchar(isoCols[i]))
}else{
lhs <- paste0('^(.{', atomIndices[i]-1, '})(.+)$')
newnames[i] <- gsub(lhs, '\\1]\\2', isoCols[i])
newnames[i] <- gsub('^(.{0})(.+)$', '\\1[\\2', newnames[i])
}
}
pattern$formula <- apply(pattern,1,function(p){
paste0(sapply(3:ncol(pattern),function(isoIndex){
if(p[isoIndex] == 0){
return("")
}
return(paste0(newnames[isoIndex-2],p[isoIndex]))
}),collapse="")
})
pattern <- pattern[order(pattern[,"abundance"],decreasing = T),][-mainIndex,]
})
names(isoMPatterns) <- currentMPeaks$formula
# Order patterns by abundance and make sure that the normal abundance is at 100%
# Calculate the expected intensities according to the abundance
# See which expected isotope peaks have an intensity higher than the cutoff
for(foundAnnotation in 1:nrow(currentMPeaks)){
intensities <- vector()
for(pattern in 1:nrow(isoMPatterns[[foundAnnotation]])){
intensities[pattern] <- currentMPeaks$int[foundAnnotation] * isoMPatterns[[foundAnnotation]]$abundance[pattern]/100
}
roundedInt <- round(intensities,digits=-2)
keepIndex <- which(roundedInt >= intensity_cutoff)
isoMPatterns[[foundAnnotation]] <- isoMPatterns[[foundAnnotation]][keepIndex,,drop=FALSE]
if(nrow(isoMPatterns[[foundAnnotation]])){
isoMPatterns[[foundAnnotation]]$minintensity <- roundedInt[keepIndex] - roundedInt[keepIndex] * precisionVal
isoMPatterns[[foundAnnotation]]$maxintensity <- roundedInt[keepIndex] + roundedInt[keepIndex] * precisionVal
# Calculate the expected mz range of the isotope peak
isoMPatterns[[foundAnnotation]] <- cbind(isoMPatterns[[foundAnnotation]],t(sapply(isoMPatterns[[foundAnnotation]][,"m/z"], ppm, dppm=ppmlimit,l=T)))
}
}
# Which isotope patterns still have theoretical intensities above the cutoff?
peaksToCheck <- which(as.logical(sapply(isoMPatterns,nrow)))
# Now, look for isotopic patterns in unmatched peaks with all specified parameters
if("add" %in% evalMode){
# Which peaks have no correct formula annotation as of now?
currentUPeaks <- currentUPeaks[which(is.na(currentUPeaks$dppm) | abs(currentUPeaks$dppmBest) > ppmlimit/2.25),]
# If there are any peaks without annotation:
if(nrow(currentUPeaks)){
j <- 1
UPList <- list()
# Iterate over all patterns that have relevant intensities
for(i in peaksToCheck){
UPList[[j]] <- list()
# Iterate over every isotopic permutation that is still in the pattern
for(k in 1:nrow(isoMPatterns[[i]])){
# Find peaks that fit the specified intensities and m/z
pIndex <- which(currentUPeaks[,"mzFound"] < isoMPatterns[[i]][k,"1"] & currentUPeaks[,"mzFound"] > isoMPatterns[[i]][k,"2"]
& currentUPeaks[,"intensity"] < isoMPatterns[[i]][k,"maxintensity"] & currentUPeaks[,"intensity"] > isoMPatterns[[i]][k,"minintensity"]
)
# Note these Peaks
UPList[[j]][[k]] <- currentUPeaks[pIndex,]
# If there are any: Change parameters in the aggregated matrix
if(nrow(UPList[[j]][[k]])){
UPList[[j]][[k]]$dppm <- (currentUPeaks[pIndex,]$mzFound / isoMPatterns[[i]][k,"m/z"] - 1) * 1e6
UPList[[j]][[k]]$mzCalc <- isoMPatterns[[i]][k,"m/z"]
UPList[[j]][[k]]$formula <- isoMPatterns[[i]][k,"formula"]
UPList[[j]][[k]]$matchedReanalysis <- NA
UPList[[j]][[k]]$filterOK <- TRUE
UPList[[j]][[k]]$good <- TRUE
UPList[[j]][[k]]$dppmBest <- UPList[[j]][[k]]$dppm
UPList[[j]][[k]]$formulaCount <- 1
}
}
j <- j + 1
}
} else{
# If there are no peaks, fake an empty dataset
UPList <- list(list(data.frame()),list(data.frame()))
}
additionMatrix <- do.call(rbind, unlist(UPList,recursive=FALSE))
if(nrow(additionMatrix)){
# Add all the peaks that could be annotated as isotopic
wEnv$w@aggregated[additionMatrix$index,] <- additionMatrix
}
# If conflict is "strict", end here
if(conflict == "strict"){
if(plotSpectrum){
plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
if(nrow(additionMatrix)){
points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
}
}
return(0)
}
}
# Now check the matched peaks for the patterns
j <- 1
MPList <- list()
for(i in peaksToCheck){
MPList[[j]] <- list()
for(k in 1:nrow(isoMPatterns[[i]])){
pIndex <- which(currentMPeaks[,"mzFound"] < isoMPatterns[[i]][k,"1"] & currentMPeaks[,"mzFound"] > isoMPatterns[[i]][k,"2"]
& currentMPeaks[,"intensity"] < isoMPatterns[[i]][k,"maxintensity"] & currentMPeaks[,"intensity"] > isoMPatterns[[i]][k,"minintensity"]
& currentMPeaks[,"filterOK"])
MPList[[j]][[k]] <- currentMPeaks[pIndex,]
if(nrow(MPList[[j]][[k]])){
MPList[[j]][[k]]$dppm <- (currentMPeaks[pIndex,]$mzFound / isoMPatterns[[i]][k,"m/z"] - 1) * 1e6
MPList[[j]][[k]]$mzCalc <- isoMPatterns[[i]][k,"m/z"]
MPList[[j]][[k]]$formula <- isoMPatterns[[i]][k,"formula"]
MPList[[j]][[k]]$matchedReanalysis <- NA
MPList[[j]][[k]]$filterOK <- TRUE
MPList[[j]][[k]]$good <- TRUE
MPList[[j]][[k]]$dppmBest <- MPList[[j]][[k]]$dppm
MPList[[j]][[k]]$formulaCount <- 1
}
}
j <- j + 1
}
# Peakindices of peaksToCheck where no isotopes have been found in Unmatched Peaks
noIsoPeaksUindex <- which(!sapply(UPList, function(x) any(sapply(x,nrow))))
# Peakindices of peaksToCheck where isotopes have been found in Unmatched Peaks
IsoPeaksUindex <- setdiff(1:length(peaksToCheck),noIsoPeaksUindex)
# Peakindices of peaksToCheck where no isotopes have been found in Matched Peaks
noIsoPeaksMindex <- which(!sapply(MPList, function(x) any(sapply(x,nrow))))
# Peakindices of peaksToCheck where isotopes have been found in Matched Peaks
IsoPeaksMindex <- setdiff(1:length(peaksToCheck),noIsoPeaksMindex)
if("add" %in% evalMode && conflict=="isotopic"){
correctionMatrix <- do.call(rbind, unlist(MPList,recursive=FALSE))
correctionMatrix <- correctionMatrix[order(abs(correctionMatrix$dppm)),]
for(ind in unique(correctionMatrix$index)){
if(length(which(correctionMatrix$index==ind))>1){
correctionMatrix <- correctionMatrix[-which(correctionMatrix$index==ind)[-1],]
}
}
if(nrow(correctionMatrix)){
# Peaks that are changed but also seem to have isotopes themselves
confPeaksIndex <- which(correctionMatrix$index %in% currentMPeaks[peaksToCheck[IsoPeaksMindex],"index"])
conflictedMatrix <- correctionMatrix[confPeaksIndex,,drop=FALSE]
if(length(confPeaksIndex)){
correctionMatrix <- correctionMatrix[-confPeaksIndex,]
}
if(nrow(correctionMatrix)){
wEnv$w@aggregated[correctionMatrix$index,] <- correctionMatrix
}
}
if(!("check" %in% evalMode)){
if(plotSpectrum){
plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
if(nrow(additionMatrix)){
points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
}
if(nrow(correctionMatrix)){
points(correctionMatrix$mzFound, correctionMatrix$intensity,type="h", col="yellow", lwd=3)
}
if(nrow(conflictedMatrix)){
points(conflictedMatrix$mzFound, conflictedMatrix$intensity,type="h", col="red", lwd=3)
}
}
return(0)
}
}
if("check" %in% evalMode){
# Matched Peaks where no isotopes have been found
noIsoPeaksMatrix <- currentMPeaks[peaksToCheck[noIsoPeaksMindex[which(noIsoPeaksMindex %in% noIsoPeaksUindex)]],]
# Matched Peaks where no isotopes have been found and which haven't been marked as isotopic themselves
noIsoPeaksMatrix <- noIsoPeaksMatrix[which(!grepl("[",wEnv$w@aggregated$formula[noIsoPeaksMatrix$index],fixed=TRUE)),]
if(plotSpectrum){
plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
}
if(nrow(noIsoPeaksMatrix)){
wEnv$w@aggregated[noIsoPeaksMatrix$index,]$good <- FALSE
wEnv$w@aggregated[noIsoPeaksMatrix$index,]$filterOK <- FALSE
wEnv$w@aggregated[noIsoPeaksMatrix$index,]$matchedReanalysis <- FALSE
if(plotSpectrum){
points(noIsoPeaksMatrix$mzFound, noIsoPeaksMatrix$intensity, type="h", col="orange", lwd=3)
}
}
if("add" %in% evalMode && conflict=="isotopic" && plotSpectrum){
if(nrow(additionMatrix)){
points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
}
if(nrow(correctionMatrix)){
points(correctionMatrix$mzFound, correctionMatrix$intensity,type="h", col="yellow", lwd=3)
}
if(nrow(conflictedMatrix)){
points(conflictedMatrix$mzFound, conflictedMatrix$intensity,type="h", col="red", lwd=3)
}
}
}
return(0)
})
})
return(w)
}
fragDataIndexing <- function(fragData){
index <- vector()
fragDataWholeMass <- floor(fragData$mass*100)
k <- 0
for(i in 1:length(fragDataWholeMass)){
while(k <= fragDataWholeMass[i]){
index[k] <- i
k <- k + 1
}
}
return(index)
}
is.sub.formula <- function(formula, targetAtomList){
atomList <- formulastring.to.list(formula)
if(!all(names(atomList) %in% names(targetAtomList))){
return(FALSE)
}
for(atom in names(atomList)){
if(atomList[[atom]] > targetAtomList[[atom]]){
return(FALSE)
}
}
return(TRUE)
}
newStep2WorkFlow <- function(w, mode="pH",
confirmMode=FALSE, progressbar = "progressBarHook",
settings = getOption("RMassBank"), analyzeMethod="formula", fragdataFile = NA){
##Load the fragment data (locally or from RMassBank package)
if(is.na(fragdataFile)){
fragData <- nfragData
} else{
fragData <- read.csv(fragdataFile,colClasses = c("character","numeric"))
}
##Progress bar
nLen <- length(w@files)
nProg <- 0
message("msmsWorkflow: Step 2. First analysis pre recalibration")
pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
##Index the fragment data (for time reasons, "which" is very slow for large matrices)
fragDataIndex <- fragDataIndexing(fragData)
w@analyzedSpecs <- lapply(w@specs, function(spec) {
#print(spec$id)
s <- analyzeMsMs.optimized(spec, mode=mode, detail=TRUE, run="preliminary",
filterSettings = settings$filterSettings,
spectraList = settings$spectraList, method = analyzeMethod, fragData = fragData,
fragDataIndex = fragDataIndex)
# Progress:
nProg <<- nProg + 1
pb <- do.call(progressbar, list(object=pb, value= nProg))
return(s)
})
for(f in w@files)
w@analyzedSpecs[[basename(as.character(f))]]$name <- basename(as.character(f))
suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
return(w)
}
analyzeMsMs.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
filterSettings = getOption("RMassBank")$filterSettings,
spectraList = getOption("RMassBank")$spectraList, method="formula", fragData,
fragDataIndex)
{
.checkMbSettings()
if(msmsPeaks$foundOK == FALSE)
return(list(foundOK = FALSE, id=msmsPeaks$id))
if(method=="formula")
{
return(analyzeMsMs.formula.optimized(msmsPeaks, mode, detail, run, filterSettings,
spectraList, fragData, fragDataIndex
))
}
else if(method == "intensity")
{
return(analyzeMsMs.intensity(msmsPeaks, mode, detail, run, filterSettings,
spectraList
))
}
}
#' @export
analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, run="preliminary",
filterSettings = getOption("RMassBank")$filterSettings,
spectraList = getOption("RMassBank")$spectraList, fragData, fragDataIndex)
{
cut <- 0
cut_ratio <- 0
if(run=="preliminary")
{
mzColname <- "mz"
filterMode <- "coarse"
cut <- filterSettings$prelimCut
if(is.na(cut))
{
if(mode %in% c("pH", "pM", "pNa", "pNH4"))
cut <- 1e4
else if(mode %in% c("mH", "mFA","mM"))
cut <- 0
else stop(paste("The ionization mode", mode, "is unknown."))
}
cutRatio <- filterSettings$prelimCutRatio
} else{
mzColname <- "mzRecal"
filterMode <- "fine"
cut <- filterSettings$fineCut
cut_ratio <- filterSettings$fineCutRatio
if(is.na(cut)) cut <- 0
}
# find whole spectrum of parent peak, so we have reasonable data to feed into
# MolgenMsMs
parentSpectrum <- msmsPeaks$parentPeak
# Check whether the spectra can be fitted to the spectra list correctly!
if(nrow(msmsPeaks$childHeaders) != length(spectraList))
{
warning(paste0(
"The spectra count of the substance ", msmsPeaks$id, " (", nrow(msmsPeaks$childHeaders), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)."
))
return(list(specOK=FALSE))
}
# here follows the Rcdk analysis
#------------------------------------
parentPeaks <- data.frame(mzFound=msmsPeaks$mz$mzCenter,
formula=msmsPeaks$formula,
dppm=0,
x1=0,x2=0,x3=0)
# define the adduct additions
if(mode == "pH") {
allowed_additions <- "H"
mode.charge <- 1
} else if(mode == "pNa") {
allowed_additions <- "Na"
mode.charge <- 1
} else if(mode == "pM") {
allowed_additions <- ""
mode.charge <- 1
} else if(mode == "mM") {
allowed_additions <- ""
mode.charge <- -1
} else if(mode == "mH") {
allowed_additions <- "H-1"
mode.charge <- -1
} else if(mode == "mFA") {
allowed_additions <- "C2H3O2"
mode.charge <- -1
} else if(mode == "pNH4") {
allowed_additions <- "NH4"
mode.charge <- 1
} else{
stop("mode = \"", mode, "\" not defined")
}
# the ppm range is two-sided here.
# The range is slightly expanded because dppm calculation of
# generate.formula starts from empirical mass, but dppm cal-
# culation of the evaluation starts from theoretical mass.
# So we don't miss the points on 'the border'.
if(run=="preliminary")
ppmlimit <- 2 * max(filterSettings$ppmLowMass, filterSettings$ppmHighMass)
else
ppmlimit <- 2.25 * filterSettings$ppmFine
parent_formula <- add.formula(msmsPeaks$formula, allowed_additions)
dbe_parent <- dbe(parent_formula)
# check whether the formula is valid, i.e. has no negative or zero element numbers.
#print(parent_formula)
if(!is.valid.formula(parent_formula))
return(list(specOK=FALSE))
limits <- to.limits.rcdk(parent_formula)
parentAtomList <- formulastring.to.list(parent_formula)
rAtoms <- which(c(!grepl("Br",parent_formula),!grepl("N",parent_formula), !grepl("S",parent_formula),!grepl("Cl",parent_formula)))
if(length(rAtoms)){
newfragData <- fragData[-which(apply(occurrenceMatrix[,rAtoms,drop=FALSE], 1, any)),]
newfragDataIndex <- fragDataIndexing(newfragData)
} else{
newfragData <- fragData
newfragDataIndex <- fragDataIndex
}
# On each spectrum the following function analyzeTandemShot will be applied.
# It takes the raw peaks matrix as argument (mz, int) and processes the spectrum by
# filtering out low-intensity (<1e4) and shoulder peaks (deltam/z < 0.5, intensity
# < 5%) and subsequently matching the peaks to formulas using Rcdk, discarding peaks
# with insufficient match accuracy or no match.
analyzeTandemShot <- function(shot_mat)
{
shot <- as.data.frame(shot_mat)
shot_orig <- shot
# Filter out low intensity peaks:
shot_lo <- shot[(shot$int < cut) | (shot$int < max(shot$int) * cut_ratio),]
shot <- shot[(shot$int >= cut) & (shot$int > max(shot$int) * cut_ratio),]
shot_full <- shot
# Is there still anything left?
if(nrow(shot)==0)
return(list(specOK=FALSE))
# Filter out satellite peaks:
shot <- filterPeakSatellites(shot, filterSettings)
shot_satellite_n <- setdiff(row.names(shot_full), row.names(shot))
shot_satellite <- shot_full[shot_satellite_n,]
# Is there still anything left?
if(nrow(shot)==0)
return(list(specOK=FALSE))
if(max(shot$int) < as.numeric(filterSettings$specOkLimit))
return(list(specOK=FALSE))
# Crop to 4 digits (necessary because of the recalibrated values)
shot[,mzColname] <- round(shot[,mzColname], 5)
# check whether formula can be fitted into precalculated fragment data
usePrecalcData <- is.sub.formula(parent_formula,list("C"=50,"H"=70,"N"=50,"O"=50,"S"=10,"Cl"=10,"Br"=10))
if(usePrecalcData){
# if yes, split the peaks into 2 sets: Those which can be identified using the
# fragment data, and those which can (by maximum fragment Data m/z - 0.5)
precalcIndex <- which(shot[,mzColname] < (max(newfragData$mass)-0.5))
smallShots <- shot[precalcIndex,]
bigShots <- shot[-precalcIndex,]
# for smaller peaks use the precalculated fragment data
if(nrow(smallShots)){
system.time(peakmatrixSmall <- lapply(smallShots[,mzColname],function(mass){
mass.calc <- mass + mode.charge * .emass
maxminMass <- ppm(mass.calc, ppmlimit, l=TRUE)
# retrieve only possibly relevant rows of the fragment Data (use the index)
indices <- newfragDataIndex[floor(maxminMass[2]*100)]:newfragDataIndex[ceiling(maxminMass[1]*100)]
fragmentedFragmentData <- newfragData[indices,]
# narrow it down further using the ppm
fragmentedFragmentData <- fragmentedFragmentData[which(fragmentedFragmentData$mass > maxminMass[2] & fragmentedFragmentData $mass < maxminMass[1]),]
# return nothing if the narrowed down data is empty at this point
if(!nrow(fragmentedFragmentData)){
return(t(c(mzFound=as.numeric(as.character(mass)),
formula=NA, mzCalc=NA)))
}
# find out if the narrowed down fragments have a sub-formula of the parentformula
# return the indexes of relevant formulas
fragmentedFragmentData <- fragmentedFragmentData[which(sapply(fragmentedFragmentData$formula, function(currentFormula) is.sub.formula(currentFormula,parentAtomList))),]
# return nothing if the narrowed down data is empty at this point
if(!nrow(fragmentedFragmentData)){
return(t(c(mzFound=as.numeric(as.character(mass)),
formula=NA, mzCalc=NA)))
}
return(t(sapply(1:nrow(fragmentedFragmentData), function(f)
{
mzCalc <- fragmentedFragmentData$mass[f] - mode.charge * .emass
c(mzFound=as.numeric(as.character(mass)),
formula=fragmentedFragmentData$formula[f],
mzCalc=mzCalc)
})))
}))
} else{
peakmatrixSmall <- list()
}
# for bigger peaks use generate.formula from rcdk
if(nrow(bigShots)){
system.time(peakmatrixBig <- lapply(bigShots[,mzColname], function(mass){
# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
# finally back-correct calculated masses for the charge
mass.calc <- mass + mode.charge * .emass
peakformula <- tryCatch(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
limits, charge=0), error=function(e) NA)
#peakformula <- tryCatch(
# generate.formula(mass,
# ppm(mass, ppmlimit, p=TRUE),
# limits, charge=1),
#error= function(e) list())
if(!is.list(peakformula)){
return(t(c(mzFound=as.numeric(as.character(mass)),
formula=NA, mzCalc=NA)))
}else{
return(t(sapply(peakformula, function(f)
{
mzCalc <- f@mass - mode.charge * .emass
c(mzFound=as.numeric(as.character(mass)),
formula=f@string,
mzCalc=mzCalc)
})))
}
}))
} else{
peakmatrixBig <- list()
}
peakmatrix <- c(peakmatrixSmall,peakmatrixBig)
} else{
system.time(peakmatrix <- lapply(shot[,mzColname], function(mass){
# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
# finally back-correct calculated masses for the charge
mass.calc <- mass + mode.charge * .emass
peakformula <- tryCatch(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
limits, charge=0), error=function(e) NA)
#peakformula <- tryCatch(
# generate.formula(mass,
# ppm(mass, ppmlimit, p=TRUE),
# limits, charge=1),
#error= function(e) list())
if(!is.list(peakformula)){
return(t(c(mzFound=as.numeric(as.character(mass)),
formula=NA, mzCalc=NA)))
}else
{
return(t(sapply(peakformula, function(f)
{
mzCalc <- f@mass - mode.charge * .emass
c(mzFound=mass,
formula=f@string,
mzCalc=mzCalc)
})))
}
}))
}
childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
childPeaks$mzFound <- as.numeric(as.character(childPeaks$mzFound))
childPeaks$formula <- as.character(childPeaks$formula)
childPeaks$mzCalc <- as.numeric(as.character(childPeaks$mzCalc))
childPeaks$dppm <- (childPeaks$mzFound / childPeaks$mzCalc - 1) * 1e6
# delete the NA data out again, because MolgenMsMs doesn't have them
# here and they will be re-added later
# (this is just left like this for "historical" reasons)
childPeaks <- childPeaks[!is.na(childPeaks$formula),]
# check if a peak was recognized (here for the first time,
# otherwise the next command would fail)
if(nrow(childPeaks)==0)
return(list(specOK=FALSE))
# now apply the rule-based filters to get rid of total junk:
# dbe >= -0.5, dbe excess over mother cpd < 3
childPeaks$dbe <- unlist(lapply(childPeaks$formula, dbe))
#iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
childPeaks <- childPeaks[childPeaks$dbe >= filterSettings$dbeMinLimit,]
# & dbe < dbe_parent + 3)
# check if a peak was recognized
if(nrow(childPeaks)==0)
return(list(specOK=FALSE))
# trim mz to 5 digits
shot[,mzColname] <- round(shot[,mzColname], 5)
childPeaksInt <- merge(childPeaks, shot, by.x = "mzFound", by.y = mzColname, all.x = TRUE, all.y = FALSE )
# find the best ppm value
bestPpm <- aggregate(childPeaksInt$dppm, list(childPeaksInt$mzFound),
function(dppm) dppm[[which.min(abs(dppm))]])
colnames(bestPpm) <- c("mzFound", "dppmBest")
childPeaksInt <- merge(childPeaksInt, bestPpm, by="mzFound", all.x=TRUE)
# count formulas found per mass
countFormulasTab <- xtabs( ~formula + mzFound, data=childPeaksInt)
countFormulas <- colSums(countFormulasTab)
childPeaksInt$formulaCount <- countFormulas[as.character(childPeaksInt$mzFound)]
# filter results
childPeaksFilt <- filterLowaccResults(childPeaksInt, filterMode, filterSettings)
childPeaksGood <- childPeaksFilt[["TRUE"]]
childPeaksBad <- childPeaksFilt[["FALSE"]]
# count formulas within new limits
# (the results of the "old" count stay in childPeaksInt and are returned
# in $childPeaks)
if(!is.null(childPeaksGood))
{
countFormulasTab <- xtabs( ~formula + mzFound, data=childPeaksGood)
countFormulas <- colSums(countFormulasTab)
childPeaksGood$formulaCount <- countFormulas[as.character(childPeaksGood$mzFound)]
}
childPeaksUnmatched <- merge(childPeaks, shot, by.x = "mzFound", by.y = mzColname,
all.x = TRUE, all.y = TRUE )
childPeaksUnmatched$dppmBest <- NA
childPeaksUnmatched$formulaCount <- 0
childPeaksUnmatched$good <- FALSE
childPeaksUnmatched <- childPeaksUnmatched[is.na(childPeaksUnmatched$mzCalc),]
# return list:
rl <- list(
specOK = !is.null(childPeaksGood),
parent = parentPeaks,
childFilt = childPeaksGood,
childRaw=shot_orig
)
# if "detail" is set to TRUE, return more detailed results including
# all the deleted peaks and the stages when they were culled
if(detail)
{
rl$childRawLow = shot_lo
rl$childRawSatellite = shot_satellite
rl$childRawOK = shot
rl$child =childPeaksInt
rl$childBad = childPeaksBad
rl$childUnmatched = childPeaksUnmatched
}
return(rl)
}
shots <- lapply(msmsPeaks$peaks, analyzeTandemShot)
#browser()
shots <- mapply(function(shot, scan, info)
{
shot$scan <- scan
shot$info <- info
shot$header <- msmsPeaks$childHeaders[as.character(scan),]
return(shot)
}, shots, msmsPeaks$childScans, spectraList, SIMPLIFY=FALSE)
mzranges <- t(sapply(shots, function(p) {
if(!is.null(p$childRaw)){
return(range(p$childRaw[,mzColname]))
} else {
return(c(NA,NA))
}
}))
mzmin <- min(mzranges[,1], na.rm=TRUE)
mzmax <- max(mzranges[,2], na.rm=TRUE)
return(list(
msmsdata=shots,
mzrange=c(mzmin, mzmax),
id=msmsPeaks$id,
mode=mode,
parentHeader = msmsPeaks$parentHeader,
parentMs = msmsPeaks$parentPeak,
formula = msmsPeaks$formula,
foundOK = TRUE))
}
findPeaksInFracData <- function(mass, fracData, fracDataIndex){
}
\ No newline at end of file
File added
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/Isotopic_Annotation.R
\name{checkIsotopes}
\alias{checkIsotopes}
\title{Checks for isotopes in a \code{msmsWorkspace}}
\usage{
checkIsotopes(w, mode = "pH", intensity_cutoff = 5000,
intensity_precision = "low", conflict = "dppm", isolationWindow = 4,
evalMode = "complete", plotSpectrum = TRUE,
settings = getOption("RMassBank"))
}
\arguments{
\item{w}{A \code{msmsWorkspace} to work with.}
\item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions
([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).}
\item{intensity_cutoff}{The cutoff (as an absolute intensity value) under which isotopic peaks shouldn't be checked for or accepted as valid.}
\item{intensity_precision}{The difference that is accepted between the calculated and observed intensity of a possible isotopic peak. Further details down below.}
\item{conflict}{Either "isotopic"(Peak formulas are always chosen if they fit the requirements for an isotopic peak)
or "strict"(Peaks are only marked as isotopic when there hasn't been a formula assigned before.)}
\item{isolationWindow}{The isolation window in Da}
\item{evalMode}{Currently no function yet, but planned}
\item{plotSpectrum}{A boolean specifiying whether the spectrumshould be plotted}
\item{settings}{Options to be used for processing. Defaults to the options loaded via
\code{\link{loadRmbSettings}} et al. Refer to there for specific settings.}
}
\value{
The \code{msmsWorkspace} with .
}
\description{
Checks for isotopes in a \code{msmsWorkspace}
}
\details{
text describing parameter inputs in more detail.
\itemize{
\item{\code{intensity_precision}}{This parameter determines how strict the intensity values should adhere to the calculated intensity in relation to the parent peak.
Options for this parameter are \code{"none"}, where the intensity is irrelevant, \code{"low"}, which has an error margin of 70\% and \code{"high"}, where the error margin
is set to 35\%. The recommended setting is \code{"low"}, but can be changed to adjust to the intensity precision of the mass spectrometer.}
}
}
\author{
Michael Stravs, Eawag <michael.stravs@eawag.ch>
Erik Mueller, UFZ
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment