diff --git a/DESCRIPTION b/DESCRIPTION index 11f273698fc94253f1925577ff4fad25235171c3..342da0989857ca8fc8f3ba413192e6f1572ef308 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,9 +49,10 @@ Collate: 'SpectrumMethods.R' 'RmbWorkspace.R' 'RmbWorkspaceUpdate.R' + 'SpectraSetMethods.R' + 'AggregateMethods.R' 'validateMassBank.R' - 'zzz.R' 'tools.R' 'msmsRead.R' - + 'zzz.R' \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index 45a2b984e5fe6ec2214e2559d0fc12757189e154..08d541d588b58b4b7877847932df2baad6183d5e 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,8 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2 (4.1.0): do not edit by hand +S3method(c,msmsWSspecs) export(CTS.externalIdSubset) export(CTS.externalIdTypes) -export(gatherPubChem) export(RmbDefaultSettings) export(RmbSettingsTemplate) export(add.formula) @@ -15,6 +15,7 @@ export(analyzeMsMs.formula) export(analyzeMsMs.intensity) export(annotator.default) export(archiveResults) +export(checkSpectra) export(cleanElnoise) export(combineMultiplicities) export(compileRecord) @@ -49,6 +50,8 @@ export(flatten) export(formulastring.to.list) export(gatherCompound) export(gatherData) +export(gatherDataBabel) +export(gatherPubChem) export(gatherSpectrum) export(getCactus) export(getCtsKey) @@ -93,7 +96,9 @@ export(recalibrateSingleSpec) export(recalibrateSpectra) export(resetInfolists) export(resetList) +export(selectSpectra) export(smiles2mass) +export(spectraCount) export(to.limits.rcdk) export(toMassbank) export(toRMB) @@ -101,10 +106,14 @@ export(updateSettings) export(validate) exportClasses(mbWorkspace) exportClasses(msmsWorkspace) +exportMethods(checkSpectra) +exportMethods(selectSpectra) exportMethods(show) +exportMethods(spectraCount) import(Biobase) import(MSnbase) import(RCurl) +import(Rcpp) import(S4Vectors) import(XML) import(methods) diff --git a/R/AggregateMethods.R b/R/AggregateMethods.R new file mode 100644 index 0000000000000000000000000000000000000000..f6a845d9ddbb7a59b67268cc05e0da048f64ce11 --- /dev/null +++ b/R/AggregateMethods.R @@ -0,0 +1,24 @@ + +#' @export +setGeneric("peaksMatched", function(o) standardGeneric("peaksMatched")) + +#' @export +setMethod("peaksMatched", c("data.frame"), function(o) + { + o[o$good,,drop=FALSE] + }) + +#' @export +setMethod("peaksMatched", c("msmsWorkspace"), function(o) peaksMatched(o@aggregated)) + +#' @export +setGeneric("peaksUnmatched", function(o) standardGeneric("peaksUnmatched")) + +#' @export +setMethod("peaksUnmatched", c("data.frame"), function(o) + { + o[!o$good,,drop=FALSE] + }) + +#' @export +setMethod("peaksUnmatched", c("msmsWorkspace"), function(o) peaksUnmatched(o@aggregated)) \ No newline at end of file diff --git a/R/RmbWorkspace.R b/R/RmbWorkspace.R index 13b504bd56334dec89dc8e6d51db179935f292e3..3cdcdfccff9976fba355607be13c2eec171ccebb 100755 --- a/R/RmbWorkspace.R +++ b/R/RmbWorkspace.R @@ -62,9 +62,11 @@ setClassUnion("msmsWorkspaceOrNULL", "NULL") representation = representation( files = "character", spectra = "RmbSpectraSetList", + aggregated = "data.frame", parent = "msmsWorkspaceOrNULL", analyzedSpecs = "list", aggregatedSpecs = "list", + rc = "ANY", rc.ms1 = "ANY", recalibratedSpecs = "list", diff --git a/R/RmbWorkspaceUpdate.R b/R/RmbWorkspaceUpdate.R index df02a646b96bef27bd89b5385ef552f2d6a93bf6..0a700f77a7b8c2067e4669beffa6408a85f3e326 100644 --- a/R/RmbWorkspaceUpdate.R +++ b/R/RmbWorkspaceUpdate.R @@ -42,6 +42,7 @@ w.new@parent <- w.parent.new } w.new@spectra <- .updateObject.spectra(w.old@specs, w.old@analyzedSpecs) + w.new@aggregatedSpecs <- w.old@aggregatedSpecs } return(w.new) @@ -63,58 +64,60 @@ set@found <- as.logical(spec$foundOK) # now parent and child MS # check for parent recalibration column - if("mzRecal" %in% colnames(spec$parentPeak)) - mzcol <- "mzRecal" - else - mzcol <- "mz" - set@parent <- new("Spectrum1", - mz = spec$parentPeak[,mzcol], - intensity = spec$parentPeak[,2], - polarity = as.integer(spec$parentHeader$polarity), - peaksCount = as.integer(spec$parentHeader$peaksCount), - rt = spec$parentHeader$retentionTime, - acquisitionNum = as.integer(spec$parentHeader$acquisitionNum), - tic = spec$parentHeader$totIonCurrent, - centroided = TRUE - ) - # get MSMS data from spec$peaks into RmbSpectrum2 objects - children.p1 <- lapply(spec$peaks, function(peaks) - { - if("mzRecal" %in% colnames(peaks)) - mzcol <- "mzRecal" - else - mzcol <- "mz" - new("RmbSpectrum2", - mz=peaks[,mzcol], - intensity=peaks[,2], - peaksCount=nrow(peaks)) - }) - # get header data from spec$childHeaders into separate RmbSpectrum2 objects - children.p2 <- apply(spec$childHeaders, 1, function(line) - { - new("RmbSpectrum2", - precScanNum = as.integer(line["precursorScanNum"]), - precursorMz = line["precursorMZ"], - precursorIntensity = line["precursorIntensity"], - precursorCharge = as.integer(line["precursorCharge"]), - collisionEnergy = line["collisionEnergy"], - tic = line["totIonCurrent"], - rt = line["retentionTime"], - acquisitionNum = as.integer(line["acquisitionNum"]), - centroided = TRUE - ) - }) - # merge MSMS RmbSpectrum2 with header RmbSpectrum2 - children <- mapply(function(c1,c2) - { - c2slots <- c("precScanNum","precursorMz", "precursorIntensity", "precursorCharge", "collisionEnergy", - "tic", "rt", "acquisitionNum", "centroided") - for(c2slot in c2slots) - slot(c1, c2slot) <- slot(c2, c2slot) - return(c1) - }, children.p1, children.p2) - set@children <- as(children, "SimpleList") - + if(set@found) + { + if("mzRecal" %in% colnames(spec$parentPeak)) + mzcol <- "mzRecal" + else + mzcol <- "mz" + set@parent <- new("Spectrum1", + mz = spec$parentPeak[,mzcol], + intensity = spec$parentPeak[,2], + polarity = as.integer(spec$parentHeader$polarity), + peaksCount = as.integer(spec$parentHeader$peaksCount), + rt = spec$parentHeader$retentionTime, + acquisitionNum = as.integer(spec$parentHeader$acquisitionNum), + tic = spec$parentHeader$totIonCurrent, + centroided = TRUE + ) + # get MSMS data from spec$peaks into RmbSpectrum2 objects + children.p1 <- lapply(spec$peaks, function(peaks) + { + if("mzRecal" %in% colnames(peaks)) + mzcol <- "mzRecal" + else + mzcol <- "mz" + new("RmbSpectrum2", + mz=peaks[,mzcol], + intensity=peaks[,2], + peaksCount=nrow(peaks)) + }) + # get header data from spec$childHeaders into separate RmbSpectrum2 objects + children.p2 <- apply(spec$childHeaders, 1, function(line) + { + new("RmbSpectrum2", + precScanNum = as.integer(line["precursorScanNum"]), + precursorMz = line["precursorMZ"], + precursorIntensity = line["precursorIntensity"], + precursorCharge = as.integer(line["precursorCharge"]), + collisionEnergy = line["collisionEnergy"], + tic = line["totIonCurrent"], + rt = line["retentionTime"], + acquisitionNum = as.integer(line["acquisitionNum"]), + centroided = TRUE + ) + }) + # merge MSMS RmbSpectrum2 with header RmbSpectrum2 + children <- mapply(function(c1,c2) + { + c2slots <- c("precScanNum","precursorMz", "precursorIntensity", "precursorCharge", "collisionEnergy", + "tic", "rt", "acquisitionNum", "centroided") + for(c2slot in c2slots) + slot(c1, c2slot) <- slot(c2, c2slot) + return(c1) + }, children.p1, children.p2) + set@children <- as(children, "SimpleList") + } return(set) }) spectra <- mapply(function(set, name) @@ -132,107 +135,176 @@ { if(length(analyzedSpec$msmsdata) != length(set@children)) stop("updateObject: Could not update object because data is inconsistent. length(analyzedSpec$msmsdata) != length(set@children)") + + set@complete <- FALSE + set@empty <- FALSE + + + if(length(analyzedSpec$msmsdata) == 0) + { + empty <- TRUE + return(set) + } children <- mapply(function(spectrum, msmsrecord) { - if(!is.data.frame(msmsrecord$childBad)) - msmsrecord$childBad <- data.frame() - # note: mz/intensity are replaced with the values from the analyzed spectrum, - # such as to have a mass multiple times for multiple matched formulas + if(msmsrecord$specOK) + + spectrum@ok <- TRUE + else + spectrum@ok <- FALSE # check if the spectrum has recalibrated masses; if yes, use those if("mzRecal" %in% colnames(msmsrecord$childRaw)) mzcol <- "mzRecal" else mzcol <- "mz" - spectrum@mz <- c(msmsrecord$childFilt$mzFound, + + # create potentially missing data frames + if(!is.data.frame(msmsrecord$childFilt)) + { + msmsrecord$childFilt <- data.frame() + msmsrecord$childRawLow[,"mzFound"] <- numeric() + msmsrecord$childRawLow[,"int"] <- numeric() + } + if(!is.data.frame(msmsrecord$childBad)) + { + msmsrecord$childBad <- data.frame() + msmsrecord$childBad[,"mzFound"] <- numeric() + msmsrecord$childBad[,"int"] <- numeric() + } + if(!is.data.frame(msmsrecord$childUnmatched)) + { + msmsrecord$childUnmatched <- data.frame() + msmsrecord$childUnmatched[,"mzFound"] <- numeric() + msmsrecord$childUnmatched[,"int"] <- numeric() + } + if(!is.data.frame(msmsrecord$childRawLow)) + { + msmsrecord$childRawLow <- data.frame() + msmsrecord$childRawLow[,mzcol] <- numeric() + msmsrecord$childRawLow[,"int"] <- numeric() + } + if(!is.data.frame(msmsrecord$childRawSatellite)) + { + msmsrecord$childRawSatellite <- data.frame() + msmsrecord$childRawSatellite[,mzcol] <- numeric() + msmsrecord$childRawSatellite[,"int"] <- numeric() + + } + # note: mz/intensity are replaced with the values from the analyzed spectrum, + # such as to have a mass multiple times for multiple matched formulas + + mz <- c(msmsrecord$childFilt$mzFound, msmsrecord$childBad$mzFound, msmsrecord$childUnmatched$mzFound, msmsrecord$childRawLow[,mzcol], msmsrecord$childRawSatellite[,mzcol]) - spectrum@intensity <- c(msmsrecord$childFilt$int, - msmsrecord$childBad$int, - msmsrecord$childUnmatched$int, - msmsrecord$childRawLow$int, - msmsrecord$childRawSatellite$int) - spectrum@peaksCount <- length(spectrum@mz) - - spectrum@satellite <- as.logical(c( - rep(FALSE,nrow(msmsrecord$childFilt)), - rep(FALSE,nrow(msmsrecord$childBad)), - rep(FALSE,nrow(msmsrecord$childUnmatched)), - rep(FALSE,nrow(msmsrecord$childRawLow)), - rep(TRUE,nrow(msmsrecord$childRawSatellite))) - ) - - - spectrum@low <- as.logical(c( - rep(FALSE,nrow(msmsrecord$childFilt)), - rep(FALSE,nrow(msmsrecord$childBad)), - rep(FALSE,nrow(msmsrecord$childUnmatched)), - rep(TRUE,nrow(msmsrecord$childRawLow)), - rep(FALSE,nrow(msmsrecord$childRawSatellite))) - ) - - spectrum@rawOK <- as.logical(c( - rep(TRUE,nrow(msmsrecord$childFilt)), - rep(TRUE,nrow(msmsrecord$childBad)), - rep(TRUE,nrow(msmsrecord$childUnmatched)), - rep(FALSE,nrow(msmsrecord$childRawLow)), - rep(FALSE,nrow(msmsrecord$childRawSatellite))) - ) - - spectrum@good <- as.logical(c( - msmsrecord$childFilt$good, - msmsrecord$childBad$good, - msmsrecord$childUnmatched$good, - rep(NA,nrow(msmsrecord$childRawLow)), - rep(NA,nrow(msmsrecord$childRawSatellite))) - ) - spectrum@mzCalc <- as.numeric(c( - msmsrecord$childFilt$mzCalc, - msmsrecord$childBad$mzCalc, - msmsrecord$childUnmatched$mzCalc, - rep(NA,nrow(msmsrecord$childRawLow)), - rep(NA,nrow(msmsrecord$childRawSatellite))) - ) - spectrum@formula <- as.character(c( - msmsrecord$childFilt$formula, - msmsrecord$childBad$formula, - msmsrecord$childUnmatched$formula, - rep(NA,nrow(msmsrecord$childRawLow)), - rep(NA,nrow(msmsrecord$childRawSatellite))) - ) - - spectrum@dbe <- as.numeric(c( - msmsrecord$childFilt$dbe, - msmsrecord$childBad$dbe, - msmsrecord$childUnmatched$dbe, - rep(NA,nrow(msmsrecord$childRawLow)), - rep(NA,nrow(msmsrecord$childRawSatellite))) - ) - - spectrum@formulaCount <- as.integer(c( - msmsrecord$childFilt$formulaCount, - msmsrecord$childBad$formulaCount, - msmsrecord$childUnmatched$formulaCount, - rep(NA,nrow(msmsrecord$childRawLow)), - rep(NA,nrow(msmsrecord$childRawSatellite))) - ) + if(length(mz) > 0) + { + spectrum@mz <- mz + spectrum@intensity <- c(msmsrecord$childFilt$int, + msmsrecord$childBad$int, + msmsrecord$childUnmatched$int, + msmsrecord$childRawLow$int, + msmsrecord$childRawSatellite$int) + spectrum@peaksCount <- length(spectrum@mz) + + spectrum@satellite <- as.logical(c( + rep(FALSE,nrow(msmsrecord$childFilt)), + rep(FALSE,nrow(msmsrecord$childBad)), + rep(FALSE,nrow(msmsrecord$childUnmatched)), + rep(FALSE,nrow(msmsrecord$childRawLow)), + rep(TRUE,nrow(msmsrecord$childRawSatellite))) + ) + + + spectrum@low <- as.logical(c( + rep(FALSE,nrow(msmsrecord$childFilt)), + rep(FALSE,nrow(msmsrecord$childBad)), + rep(FALSE,nrow(msmsrecord$childUnmatched)), + rep(TRUE,nrow(msmsrecord$childRawLow)), + rep(FALSE,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@rawOK <- as.logical(c( + rep(TRUE,nrow(msmsrecord$childFilt)), + rep(TRUE,nrow(msmsrecord$childBad)), + rep(TRUE,nrow(msmsrecord$childUnmatched)), + rep(FALSE,nrow(msmsrecord$childRawLow)), + rep(FALSE,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@good <- as.logical(c( + msmsrecord$childFilt$good, + msmsrecord$childBad$good, + msmsrecord$childUnmatched$good, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + spectrum@mzCalc <- as.numeric(c( + msmsrecord$childFilt$mzCalc, + msmsrecord$childBad$mzCalc, + msmsrecord$childUnmatched$mzCalc, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + spectrum@formula <- as.character(c( + msmsrecord$childFilt$formula, + msmsrecord$childBad$formula, + msmsrecord$childUnmatched$formula, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@dbe <- as.numeric(c( + msmsrecord$childFilt$dbe, + msmsrecord$childBad$dbe, + msmsrecord$childUnmatched$dbe, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@formulaCount <- as.integer(c( + msmsrecord$childFilt$formulaCount, + msmsrecord$childBad$formulaCount, + msmsrecord$childUnmatched$formulaCount, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@dppm <- as.numeric(c( + msmsrecord$childFilt$dppm, + msmsrecord$childBad$dppm, + msmsrecord$childUnmatched$dppm, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + spectrum@dppmBest <- as.numeric(c( + msmsrecord$childFilt$dppmBest, + msmsrecord$childBad$dppmBest, + msmsrecord$childUnmatched$dppmBest, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + } + else + { + # mz and intensity are already there + spectrum@satellite <- as.logical(rep(NA, spectrum@peaksCount)) + spectrum@low <- as.logical(rep(NA, spectrum@peaksCount)) + spectrum@rawOK <- as.logical(rep(FALSE, spectrum@peaksCount)) + spectrum@good <- as.logical(rep(FALSE, spectrum@peaksCount)) + spectrum@mzCalc <- as.numeric(rep(NA, spectrum@peaksCount)) + spectrum@formula <- as.character(rep(NA, spectrum@peaksCount)) + spectrum@dbe <- as.numeric(rep(NA, spectrum@peaksCount)) + spectrum@formulaCount <- as.integer(rep(NA, spectrum@peaksCount)) + spectrum@dppm <- as.numeric(rep(NA, spectrum@peaksCount)) + spectrum@dppmBest <- as.numeric(rep(NA, spectrum@peaksCount)) + + + + } - spectrum@dppm <- as.numeric(c( - msmsrecord$childFilt$dppm, - msmsrecord$childBad$dppm, - msmsrecord$childUnmatched$dppm, - rep(NA,nrow(msmsrecord$childRawLow)), - rep(NA,nrow(msmsrecord$childRawSatellite))) - ) - spectrum@dppmBest <- as.numeric(c( - msmsrecord$childFilt$dppmBest, - msmsrecord$childBad$dppmBest, - msmsrecord$childUnmatched$dppmBest, - rep(NA,nrow(msmsrecord$childRawLow)), - rep(NA,nrow(msmsrecord$childRawSatellite))) - ) # .RmbSpectrum2 <- setClass("RmbSpectrum2", # representation = representation( @@ -252,6 +324,14 @@ }, set@children, analyzedSpec$msmsdata) set@children <- as(children, "SimpleList") + + ok <- unlist(lapply(set@children, function(c) c@ok)) + if(all(ok)) + set@complete <- TRUE + if(all(!ok)) + set@empty <- TRUE + + set@mode <- analyzedSpec$mode return(set) }, @@ -263,6 +343,11 @@ } +## .updateObject.aggregated <- function(aggregatedSpecs) +## { +## +## } + # Finds progress in the "old workspace version" to determine whether to take the old spectra or the recalibrated ones (and # make a parent workspace) .findProgress.v1 <- function(workspace) diff --git a/R/SpectraSetMethods.R b/R/SpectraSetMethods.R new file mode 100644 index 0000000000000000000000000000000000000000..ec740a1adc44bfc20f3b8123ff3e96ce20640b85 --- /dev/null +++ b/R/SpectraSetMethods.R @@ -0,0 +1,67 @@ + + +# Accessor methods for found, complete, empty +# .checkSpectra <- +#' @export +setGeneric("checkSpectra", function(s, property) standardGeneric("checkSpectra")) + +# .checkSpectra.RmbSpectraSet <- +#' @export +setMethod("checkSpectra", c("RmbSpectraSet", "character"), function(s, property) + { + #stopifnot(value=="logical", "For single spectraSet, only TRUE/FALSE output is supported.") + fields <- c("found", "complete", "empty") + if(!(property %in% fields)) stop("Only found, complete, empty properties are allowed") + slot(s, property) + }) + + +# .selectSpectra <- +#' @export +setGeneric("selectSpectra", def = function(s, property, value="logical") standardGeneric("selectSpectra"), + signature = c("s", "property")) + +# .selectSpectra.RmbSpectraSetList <- +#' @export +setMethod("selectSpectra", c("RmbSpectraSetList", "character"), function(s, property, value="logical") + { + matches <- unlist(lapply(s, function(s) checkSpectra(s, property))) + if(value == "logical") + return(matches) + else if(value == "index") + return(which(matches)) + else if(value == "object") + return(s[matches]) + else if(value == "mismatch") + return(s[!matches]) + }) + +# .selectSpectra.msmsWorkspace <- +#' @export +setMethod("selectSpectra", c("msmsWorkspace", "character"), function(s, property, value="logical") + selectSpectra(s@spectra, property, value)) + +# .spectraCount <- +#' @export +setGeneric("spectraCount", function(s) standardGeneric("spectraCount")) + +# .spectraCount.RmbSpectraSet <- +#' @export +setMethod("spectraCount", c("RmbSpectraSet"), function(s) + { + length(s@children) + }) + +# .spectraCount.RmbSpectraSetList <- +#' @export +setMethod("spectraCount", c("RmbSpectraSetList"), function(s) + { + unlist(lapply(s, spectraCount)) + }) + + +# .spectraCount.msmsWorkspace <- +#' @export +setMethod("spectraCount", c("msmsWorkspace"), function(s) + spectraCount(s@spectra)) + diff --git a/R/SpectrumMethods.R b/R/SpectrumMethods.R index 5e05ca859e55a15fa448e125c2d0281b5564a729..097e0af6b3708e65c64f920326ba4804f7e462a5 100644 --- a/R/SpectrumMethods.R +++ b/R/SpectrumMethods.R @@ -3,6 +3,7 @@ # Author: stravsmi ############################################################################### +#' @export setMethod("getData", c("RmbSpectrum2"), function(s) { peaks <- s@peaksCount @@ -14,7 +15,7 @@ setMethod("getData", c("RmbSpectrum2"), function(s) df }) - +#' @export setMethod("setData", c("RmbSpectrum2", "data.frame"), function(s, df, clean = TRUE) { cols <- c("mz", "intensity", "satellite", "low", "rawOK", "good", "mzCalc", "formula", "dppm")