From 2f74bd5443fa08e4dd039d720d8759a4b41609ee Mon Sep 17 00:00:00 2001 From: Michele Stravs <stravsmi@eawag.ch> Date: Thu, 4 Dec 2014 11:43:59 +0100 Subject: [PATCH] msmsWorkspace update: specs and analyzedSpecs to spectra --- R/RmbWorkspace.R | 61 +++++++-- R/RmbWorkspaceUpdate.R | 279 +++++++++++++++++++++++++++++++++++++++++ R/SpectrumClasses.R | 76 ++++++----- R/SpectrumMethods.R | 2 +- 4 files changed, 378 insertions(+), 40 deletions(-) create mode 100644 R/RmbWorkspaceUpdate.R diff --git a/R/RmbWorkspace.R b/R/RmbWorkspace.R index 48b08dd..402aa12 100755 --- a/R/RmbWorkspace.R +++ b/R/RmbWorkspace.R @@ -1,6 +1,9 @@ #' @import methods NULL + +setClassUnion("msmsWorkspaceOrNULL", "NULL") + #' Workspace for \code{msmsWorkflow} data #' #' A workspace which stores input and output data for \code{\link{msmsWorkflow}}. @@ -46,7 +49,7 @@ NULL ## ' peaks (after step 7, see \code{\link{reanalyzeFailpeaks}}). ## ' @slot refilteredRcSpecs Final data to use for MassBank record creation after ## ' multiplicity filtering (step 8). -#' +##' ## ' @method show,msmsWorkflow Shows a brief summary of the object. Currently only the included files. #' #' @seealso \code{\link{msmsWorkflow}} @@ -55,10 +58,11 @@ NULL #' @docType class #' @exportClass msmsWorkspace #' @export -setClass("msmsWorkspace", - representation( +.msmsWorkspace <- setClass("msmsWorkspace", + representation = representation( files = "character", - specs = "list", + spectra = "RmbSpectraSetList", + parent = "msmsWorkspaceOrNULL", analyzedSpecs = "list", aggregatedSpecs = "list", rc = "ANY", @@ -70,8 +74,39 @@ setClass("msmsWorkspace", refilteredRcSpecs = "list", archivename = "character", settings = "list" - ), + ), + contains=c("Versioned"), + prototype = prototype( + new("Versioned", versions=c(msmsWorkspace = "2.0.1")), + parent = NULL ) +) + +setIs("msmsWorkspace", "msmsWorkspaceOrNULL") + +#.msmsWorkspace <- setClass("msmsWorkspace", +# representation = representation( +# files = "character", +# specs = "list", +# analyzedSpecs = "list", +# aggregatedSpecs = "list", +# rc = "ANY", +# rc.ms1 = "ANY", +# recalibratedSpecs = "list", +# analyzedRcSpecs = "list", +# aggregatedRcSpecs = "list", +# reanalyzedRcSpecs = "list", +# refilteredRcSpecs = "list", +# archivename = "character", +# settings = "list" +# ), +# contains=c("Versioned"), +# prototype = prototype( +# new("Versioned", versions=c(msmsWorkspace = "1.0.1")) +# ) +# ) + + #' Workspace for \code{mbWorkflow} data #' @@ -159,7 +194,7 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE) load(fileName, envir=tempEnv) # Look if there is a msmsWorkspace in the file objs <- ls(tempEnv) - isWs <- unlist(lapply(objs, function(obj) "msmsWorkspace" %in% class(tempEnv[[obj]]))) + isWs <- unlist(lapply(objs, function(obj) is(tempEnv[[obj]], "msmsWorkspace"))) whichWs <- match(TRUE, isWs) # Found? Then just return it. if(!is.na(whichWs)) @@ -169,7 +204,7 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE) if(loadSettings == FALSE) w@settings <- list() } - # Otherwise hope to load the dataset into a new workspace + # If there is no msmsWorkspace object in the workspace, this means that the workspace is version 1! else { w <- new("msmsWorkspace") @@ -188,12 +223,22 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE) for(var in dataset) { if(exists(var, envir=tempEnv)) - slot(w, var) <- tempEnv[[var]] + slot(w, var, check=FALSE) <- tempEnv[[var]] + classVersion(w) <- "1.0.0" } # Check if settings exist... if((loadSettings == TRUE) && exists("RmbSettings", envir=tempEnv)) w@settings <- tempEnv$RmbSettings } + # process version updates + updateClass <- FALSE + if(!isVersioned(w)) updateClass <- TRUE + else if(!all(isCurrent(w))) updateClass <- TRUE + if(updateClass) + { + w <- updateObject(w) + } + # If loadSettings is set: load the settings into RMassBank if((loadSettings == TRUE) && (length(w@settings) > 0)) loadRmbSettings(w@settings) diff --git a/R/RmbWorkspaceUpdate.R b/R/RmbWorkspaceUpdate.R new file mode 100644 index 0000000..441904f --- /dev/null +++ b/R/RmbWorkspaceUpdate.R @@ -0,0 +1,279 @@ +# TODO: Add comment +# +# Author: stravsmi +############################################################################### + + +.updateObject.RmbWorkspace <- setMethod("updateObject", signature(object="msmsWorkspace"), function(object, ..., verbose = FALSE) + { + w <- object + if(isVersioned(w)) + if(all(isCurrent(w))) + return(w) + # get msmsWorkspace version + if(!isVersioned(w)) + v <- "1.0.0" + else + v <- classVersion(w)["msmsWorkspace"] + w.new <- w + # gradually step up versions + # 2.0.1: + # * spectra go from specs, analyzedSpecs or their rc analogs to Spectra + # * data pre recalibration get shifted to "parent workspace" + if(v < "2.0.1") + { + w.old <- w.new + w.new <- new("msmsWorkspace") + w.new@files <- w.old@files + # Do we have recalibration done? If so: all data in the WS will be the recalibrated data, the unrecalibrated data will be + # moved into a new parent workspace which is referenced + progress <- .findProgress.v1(w.old) + if(4 %in% progress) + { + w.parent <- w.old + w.parent@recalibratedSpecs <- list() + w.parent@analyzedRcSpecs <- list() + w.parent@aggregatedRcSpecs <- list() + w.parent@reanalyzedRcSpecs <- list() + w.parent@refilteredRcSpecs <- list() + w.old@specs <- w.old@recalibratedSpecs + w.old@analyzedSpecs <- w.old@analyzedRcSpecs + w.parent.new <- updateObject(w.parent) + w.new@parent <- w.parent.new + } + w.new@spectra <- .updateObject.spectra(w.old@specs, w.old@analyzedSpecs) + } + + return(w.new) + }) + + + +.updateObject.spectra <- function(specs, analyzedSpecs) +{ + if((length(specs) != length(analyzedSpecs)) && (0 != length(analyzedSpecs) )) + stop("updateObject: Could not update object because data is inconsistent. length(analyzedSpecs) != length(specs) or 0") + # process info ex specs + spectra <- lapply(specs, function(spec){ + set <- new("RmbSpectraSet") + # identifiers and properties + set@mz <- spec$mz$mzCenter + set@id <- as.integer(spec$id) + set@formula <- spec$formula + set@found <- as.logical(spec$foundOK) + # now parent and child MS + # check for parent recalibration column + if("mzRecal" %in% colnames(spec$parentPeak)) + mzcol <- "mzRecal" + else + mzcol <- "mz" + set@parent <- new("Spectrum1", + mz = spec$parentPeak[,mzcol], + intensity = spec$parentPeak[,2], + polarity = as.integer(spec$parentHeader$polarity), + peaksCount = as.integer(spec$parentHeader$peaksCount), + rt = spec$parentHeader$retentionTime, + acquisitionNum = as.integer(spec$parentHeader$acquisitionNum), + tic = spec$parentHeader$totIonCurrent, + centroided = TRUE + ) + # get MSMS data from spec$peaks into RmbSpectrum2 objects + children.p1 <- lapply(spec$peaks, function(peaks) + { + if("mzRecal" %in% colnames(peaks)) + mzcol <- "mzRecal" + else + mzcol <- "mz" + new("RmbSpectrum2", + mz=peaks[,mzcol], + intensity=peaks[,2], + peaksCount=nrow(peaks)) + }) + # get header data from spec$childHeaders into separate RmbSpectrum2 objects + children.p2 <- apply(spec$childHeaders, 1, function(line) + { + new("RmbSpectrum2", + precScanNum = as.integer(line["precursorScanNum"]), + precursorMz = line["precursorMZ"], + precursorIntensity = line["precursorIntensity"], + precursorCharge = as.integer(line["precursorCharge"]), + collisionEnergy = line["collisionEnergy"], + tic = line["totIonCurrent"], + rt = line["retentionTime"], + acquisitionNum = as.integer(line["acquisitionNum"]), + centroided = TRUE + ) + }) + # merge MSMS RmbSpectrum2 with header RmbSpectrum2 + children <- mapply(function(c1,c2) + { + c2slots <- c("precScanNum","precursorMz", "precursorIntensity", "precursorCharge", "collisionEnergy", + "tic", "rt", "acquisitionNum", "centroided") + for(c2slot in c2slots) + slot(c1, c2slot) <- slot(c2, c2slot) + return(c1) + }, children.p1, children.p2) + set@children <- as(children, "SimpleList") + + return(set) + }) + spectra <- mapply(function(set, name) + { + set@name <- name + return(set) + }, + spectra, names(specs)) + + + # add info ex analyzedSpecs if present + if(length(analyzedSpecs) > 0) + { + spectra <- mapply(function(set, analyzedSpec) + { + if(length(analyzedSpec$msmsdata) != length(set@children)) + stop("updateObject: Could not update object because data is inconsistent. length(analyzedSpec$msmsdata) != length(set@children)") + children <- mapply(function(spectrum, msmsrecord) + { + if(!is.data.frame(msmsrecord$childBad)) + msmsrecord$childBad <- data.frame() + # note: mz/intensity are replaced with the values from the analyzed spectrum, + # such as to have a mass multiple times for multiple matched formulas + + # check if the spectrum has recalibrated masses; if yes, use those + if("mzRecal" %in% colnames(msmsrecord$childRaw)) + mzcol <- "mzRecal" + else + mzcol <- "mz" + spectrum@mz <- c(msmsrecord$childFilt$mzFound, + msmsrecord$childBad$mzFound, + msmsrecord$childUnmatched$mzFound, + msmsrecord$childRawLow[,mzcol], + msmsrecord$childRawSatellite[,mzcol]) + spectrum@intensity <- c(msmsrecord$childFilt$int, + msmsrecord$childBad$int, + msmsrecord$childUnmatched$int, + msmsrecord$childRawLow$int, + msmsrecord$childRawSatellite$int) + spectrum@peaksCount <- length(spectrum@mz) + + spectrum@satellite <- as.logical(c( + rep(FALSE,nrow(msmsrecord$childFilt)), + rep(FALSE,nrow(msmsrecord$childBad)), + rep(FALSE,nrow(msmsrecord$childUnmatched)), + rep(FALSE,nrow(msmsrecord$childRawLow)), + rep(TRUE,nrow(msmsrecord$childRawSatellite))) + ) + + + spectrum@low <- as.logical(c( + rep(FALSE,nrow(msmsrecord$childFilt)), + rep(FALSE,nrow(msmsrecord$childBad)), + rep(FALSE,nrow(msmsrecord$childUnmatched)), + rep(TRUE,nrow(msmsrecord$childRawLow)), + rep(FALSE,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@rawOK <- as.logical(c( + rep(TRUE,nrow(msmsrecord$childFilt)), + rep(TRUE,nrow(msmsrecord$childBad)), + rep(TRUE,nrow(msmsrecord$childUnmatched)), + rep(FALSE,nrow(msmsrecord$childRawLow)), + rep(FALSE,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@good <- as.logical(c( + msmsrecord$childFilt$good, + msmsrecord$childBad$good, + msmsrecord$childUnmatched$good, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + spectrum@mzCalc <- as.numeric(c( + msmsrecord$childFilt$mzCalc, + msmsrecord$childBad$mzCalc, + msmsrecord$childUnmatched$mzCalc, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + spectrum@formula <- as.character(c( + msmsrecord$childFilt$formula, + msmsrecord$childBad$formula, + msmsrecord$childUnmatched$formula, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@dbe <- as.numeric(c( + msmsrecord$childFilt$dbe, + msmsrecord$childBad$dbe, + msmsrecord$childUnmatched$dbe, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@formulaCount <- as.integer(c( + msmsrecord$childFilt$formulaCount, + msmsrecord$childBad$formulaCount, + msmsrecord$childUnmatched$formulaCount, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + + spectrum@dppm <- as.numeric(c( + msmsrecord$childFilt$dppm, + msmsrecord$childBad$dppm, + msmsrecord$childUnmatched$dppm, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + spectrum@dppmBest <- as.numeric(c( + msmsrecord$childFilt$dppmBest, + msmsrecord$childBad$dppmBest, + msmsrecord$childUnmatched$dppmBest, + rep(NA,nrow(msmsrecord$childRawLow)), + rep(NA,nrow(msmsrecord$childRawSatellite))) + ) + +# .RmbSpectrum2 <- setClass("RmbSpectrum2", +# representation = representation( + ## satellite="logical", + ## low="logical", + ## rawOK ="logical", + ## good = "logical", + ## mzCalc = "numeric", + ## formula = "character", + ## formulaCount = "integer", + ## dppm = "numeric", + ## dppmBest = "numeric", +# ), + + return(spectrum) + + }, + set@children, analyzedSpec$msmsdata) + set@children <- as(children, "SimpleList") + return(set) + }, + spectra, analyzedSpecs) + } + + + return(as(spectra, "SimpleList")) + +} + +# Finds progress in the "old workspace version" to determine whether to take the old spectra or the recalibrated ones (and +# make a parent workspace) +.findProgress.v1 <- function(workspace) +{ + step1 <- (length(workspace@specs) > 0) + step2 <- (length(workspace@analyzedSpecs) > 0) + step3 <- (length(workspace@aggregatedSpecs) > 0) + step4 <- (length(workspace@recalibratedSpecs) > 0) + step5 <- (length(workspace@analyzedRcSpecs) > 0) + step6 <- (length(workspace@aggregatedRcSpecs) > 0) + step7 <- (length(workspace@reanalyzedRcSpecs) > 0) + step8 <- (length(workspace@refilteredRcSpecs) > 0) + steps <- which(c(step1, step2, step3, step4, step5, step6, step7, step8)) + return(steps) +} \ No newline at end of file diff --git a/R/SpectrumClasses.R b/R/SpectrumClasses.R index c1b88e4..2205543 100644 --- a/R/SpectrumClasses.R +++ b/R/SpectrumClasses.R @@ -1,7 +1,44 @@ -setClass("RmbSpectraSet", +.RmbSpectrum2 <- setClass("RmbSpectrum2", + representation = representation( + satellite="logical", + low="logical", + rawOK ="logical", + good = "logical", + mzCalc = "numeric", + formula = "character", + dbe = "numeric", + formulaCount = "integer", + dppm = "numeric", + dppmBest = "numeric" + ), + contains=c("Spectrum2"), + prototype = prototype( + satellite = logical(), + low = logical(), + rawOK = logical(), + good = logical(), + mzCalc = numeric(), + formula = character(), + dbe = numeric(), + formulaCount = integer(), + dppm = numeric(), + dppmBest = numeric(), + new("Versioned", versions=c(classVersion("Spectrum2"), RmbSpectrum2 = "0.1.0")) + ), +) + +.RmbSpectrum2List <- setClass("RmbSpectrum2List", contains="SimpleList", + prototype=prototype(elementType="RmbSpectrum2")) +# +#setAs("ANY", "RmbSpectrum2List", function(from) { +# coerceToSimpleList(from) +# }) + + +.RmbSpectraSet <- setClass("RmbSpectraSet", representation = representation( parent = "Spectrum1", - children = "list", + children = "RmbSpectrum2List", # These are done as slots and not as S4 functions, because they are set during the workflow # in "checking" steps. It's easier. found = "logical", @@ -15,7 +52,7 @@ setClass("RmbSpectraSet", ), prototype = prototype( parent = new("Spectrum1"), - children = list(), + children = new("RmbSpectrum2List"), found = FALSE, complete = NA, empty = NA, @@ -25,36 +62,13 @@ setClass("RmbSpectraSet", name = character(), annotations = list() ) - , - validity = function(object) - { - childrenSpectraOK <- all(unlist(lapply(object@children, function(s) inherits(s, "RmbSpectrum2")))) - if(!childrenSpectraOK) return("MS2 spectra are not of class RmbSpectrum2") - return(TRUE) - } ); -setClass("RmbSpectrum2", - representation = representation( - satellite="logical", - low="logical", - rawOK ="logical", - good = "logical", - mzCalc = "numeric", - formula = "numeric", - dppm = "numeric" - ), - contains=c("Spectrum2"), - prototype = prototype( - satellite = logical(), - low = logical(), - rawOK = logical(), - good = logical(), - mzCalc = numeric(), - formula = numeric(), - dppm = numeric() - ), -) +.RmbSpectraSetList <- setClass("RmbSpectraSetList", contains="SimpleList", + prototype=prototype(elementType="RmbSpectraSet")) + + + setGeneric("getData", function(s) standardGeneric("getData")) setGeneric("setData", function(s, df) standardGeneric("setData")) diff --git a/R/SpectrumMethods.R b/R/SpectrumMethods.R index b139f38..ba480b3 100644 --- a/R/SpectrumMethods.R +++ b/R/SpectrumMethods.R @@ -6,7 +6,7 @@ setMethod("getData", c("RmbSpectrum2"), function(s) { peaks <- s@peaksCount - cols <- c("mz", "intensity", "satellite", "low", "rawOK", "good", "mzCalc", "formula", "dppm") + cols <- c("mz", "intensity", "satellite", "low", "rawOK", "good", "mzCalc", "formula", "dbe", "formulaCount", "dppm", "dppmBest") cols.isFilled <- unlist(lapply(cols, function(col) length(slot(s, col)) == peaks)) cols.filled <- cols[cols.isFilled] df <- do.call(data.frame, lapply(cols.filled, function(col) slot(s, col))) -- GitLab