Skip to content
Snippets Groups Projects
Commit 2f74bd54 authored by Michele Stravs's avatar Michele Stravs
Browse files

msmsWorkspace update: specs and analyzedSpecs to spectra

parent 017f6132
No related branches found
No related tags found
No related merge requests found
#' @import methods
NULL
setClassUnion("msmsWorkspaceOrNULL", "NULL")
#' Workspace for \code{msmsWorkflow} data
#'
#' A workspace which stores input and output data for \code{\link{msmsWorkflow}}.
......@@ -46,7 +49,7 @@ NULL
## ' peaks (after step 7, see \code{\link{reanalyzeFailpeaks}}).
## ' @slot refilteredRcSpecs Final data to use for MassBank record creation after
## ' multiplicity filtering (step 8).
#'
##'
## ' @method show,msmsWorkflow Shows a brief summary of the object. Currently only the included files.
#'
#' @seealso \code{\link{msmsWorkflow}}
......@@ -55,10 +58,11 @@ NULL
#' @docType class
#' @exportClass msmsWorkspace
#' @export
setClass("msmsWorkspace",
representation(
.msmsWorkspace <- setClass("msmsWorkspace",
representation = representation(
files = "character",
specs = "list",
spectra = "RmbSpectraSetList",
parent = "msmsWorkspaceOrNULL",
analyzedSpecs = "list",
aggregatedSpecs = "list",
rc = "ANY",
......@@ -70,8 +74,39 @@ setClass("msmsWorkspace",
refilteredRcSpecs = "list",
archivename = "character",
settings = "list"
),
),
contains=c("Versioned"),
prototype = prototype(
new("Versioned", versions=c(msmsWorkspace = "2.0.1")),
parent = NULL
)
)
setIs("msmsWorkspace", "msmsWorkspaceOrNULL")
#.msmsWorkspace <- setClass("msmsWorkspace",
# representation = representation(
# files = "character",
# specs = "list",
# analyzedSpecs = "list",
# aggregatedSpecs = "list",
# rc = "ANY",
# rc.ms1 = "ANY",
# recalibratedSpecs = "list",
# analyzedRcSpecs = "list",
# aggregatedRcSpecs = "list",
# reanalyzedRcSpecs = "list",
# refilteredRcSpecs = "list",
# archivename = "character",
# settings = "list"
# ),
# contains=c("Versioned"),
# prototype = prototype(
# new("Versioned", versions=c(msmsWorkspace = "1.0.1"))
# )
# )
#' Workspace for \code{mbWorkflow} data
#'
......@@ -159,7 +194,7 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE)
load(fileName, envir=tempEnv)
# Look if there is a msmsWorkspace in the file
objs <- ls(tempEnv)
isWs <- unlist(lapply(objs, function(obj) "msmsWorkspace" %in% class(tempEnv[[obj]])))
isWs <- unlist(lapply(objs, function(obj) is(tempEnv[[obj]], "msmsWorkspace")))
whichWs <- match(TRUE, isWs)
# Found? Then just return it.
if(!is.na(whichWs))
......@@ -169,7 +204,7 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE)
if(loadSettings == FALSE)
w@settings <- list()
}
# Otherwise hope to load the dataset into a new workspace
# If there is no msmsWorkspace object in the workspace, this means that the workspace is version 1!
else
{
w <- new("msmsWorkspace")
......@@ -188,12 +223,22 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE)
for(var in dataset)
{
if(exists(var, envir=tempEnv))
slot(w, var) <- tempEnv[[var]]
slot(w, var, check=FALSE) <- tempEnv[[var]]
classVersion(w) <- "1.0.0"
}
# Check if settings exist...
if((loadSettings == TRUE) && exists("RmbSettings", envir=tempEnv))
w@settings <- tempEnv$RmbSettings
}
# process version updates
updateClass <- FALSE
if(!isVersioned(w)) updateClass <- TRUE
else if(!all(isCurrent(w))) updateClass <- TRUE
if(updateClass)
{
w <- updateObject(w)
}
# If loadSettings is set: load the settings into RMassBank
if((loadSettings == TRUE) && (length(w@settings) > 0))
loadRmbSettings(w@settings)
......
# TODO: Add comment
#
# Author: stravsmi
###############################################################################
.updateObject.RmbWorkspace <- setMethod("updateObject", signature(object="msmsWorkspace"), function(object, ..., verbose = FALSE)
{
w <- object
if(isVersioned(w))
if(all(isCurrent(w)))
return(w)
# get msmsWorkspace version
if(!isVersioned(w))
v <- "1.0.0"
else
v <- classVersion(w)["msmsWorkspace"]
w.new <- w
# gradually step up versions
# 2.0.1:
# * spectra go from specs, analyzedSpecs or their rc analogs to Spectra
# * data pre recalibration get shifted to "parent workspace"
if(v < "2.0.1")
{
w.old <- w.new
w.new <- new("msmsWorkspace")
w.new@files <- w.old@files
# Do we have recalibration done? If so: all data in the WS will be the recalibrated data, the unrecalibrated data will be
# moved into a new parent workspace which is referenced
progress <- .findProgress.v1(w.old)
if(4 %in% progress)
{
w.parent <- w.old
w.parent@recalibratedSpecs <- list()
w.parent@analyzedRcSpecs <- list()
w.parent@aggregatedRcSpecs <- list()
w.parent@reanalyzedRcSpecs <- list()
w.parent@refilteredRcSpecs <- list()
w.old@specs <- w.old@recalibratedSpecs
w.old@analyzedSpecs <- w.old@analyzedRcSpecs
w.parent.new <- updateObject(w.parent)
w.new@parent <- w.parent.new
}
w.new@spectra <- .updateObject.spectra(w.old@specs, w.old@analyzedSpecs)
}
return(w.new)
})
.updateObject.spectra <- function(specs, analyzedSpecs)
{
if((length(specs) != length(analyzedSpecs)) && (0 != length(analyzedSpecs) ))
stop("updateObject: Could not update object because data is inconsistent. length(analyzedSpecs) != length(specs) or 0")
# process info ex specs
spectra <- lapply(specs, function(spec){
set <- new("RmbSpectraSet")
# identifiers and properties
set@mz <- spec$mz$mzCenter
set@id <- as.integer(spec$id)
set@formula <- spec$formula
set@found <- as.logical(spec$foundOK)
# now parent and child MS
# check for parent recalibration column
if("mzRecal" %in% colnames(spec$parentPeak))
mzcol <- "mzRecal"
else
mzcol <- "mz"
set@parent <- new("Spectrum1",
mz = spec$parentPeak[,mzcol],
intensity = spec$parentPeak[,2],
polarity = as.integer(spec$parentHeader$polarity),
peaksCount = as.integer(spec$parentHeader$peaksCount),
rt = spec$parentHeader$retentionTime,
acquisitionNum = as.integer(spec$parentHeader$acquisitionNum),
tic = spec$parentHeader$totIonCurrent,
centroided = TRUE
)
# get MSMS data from spec$peaks into RmbSpectrum2 objects
children.p1 <- lapply(spec$peaks, function(peaks)
{
if("mzRecal" %in% colnames(peaks))
mzcol <- "mzRecal"
else
mzcol <- "mz"
new("RmbSpectrum2",
mz=peaks[,mzcol],
intensity=peaks[,2],
peaksCount=nrow(peaks))
})
# get header data from spec$childHeaders into separate RmbSpectrum2 objects
children.p2 <- apply(spec$childHeaders, 1, function(line)
{
new("RmbSpectrum2",
precScanNum = as.integer(line["precursorScanNum"]),
precursorMz = line["precursorMZ"],
precursorIntensity = line["precursorIntensity"],
precursorCharge = as.integer(line["precursorCharge"]),
collisionEnergy = line["collisionEnergy"],
tic = line["totIonCurrent"],
rt = line["retentionTime"],
acquisitionNum = as.integer(line["acquisitionNum"]),
centroided = TRUE
)
})
# merge MSMS RmbSpectrum2 with header RmbSpectrum2
children <- mapply(function(c1,c2)
{
c2slots <- c("precScanNum","precursorMz", "precursorIntensity", "precursorCharge", "collisionEnergy",
"tic", "rt", "acquisitionNum", "centroided")
for(c2slot in c2slots)
slot(c1, c2slot) <- slot(c2, c2slot)
return(c1)
}, children.p1, children.p2)
set@children <- as(children, "SimpleList")
return(set)
})
spectra <- mapply(function(set, name)
{
set@name <- name
return(set)
},
spectra, names(specs))
# add info ex analyzedSpecs if present
if(length(analyzedSpecs) > 0)
{
spectra <- mapply(function(set, analyzedSpec)
{
if(length(analyzedSpec$msmsdata) != length(set@children))
stop("updateObject: Could not update object because data is inconsistent. length(analyzedSpec$msmsdata) != length(set@children)")
children <- mapply(function(spectrum, msmsrecord)
{
if(!is.data.frame(msmsrecord$childBad))
msmsrecord$childBad <- data.frame()
# note: mz/intensity are replaced with the values from the analyzed spectrum,
# such as to have a mass multiple times for multiple matched formulas
# check if the spectrum has recalibrated masses; if yes, use those
if("mzRecal" %in% colnames(msmsrecord$childRaw))
mzcol <- "mzRecal"
else
mzcol <- "mz"
spectrum@mz <- c(msmsrecord$childFilt$mzFound,
msmsrecord$childBad$mzFound,
msmsrecord$childUnmatched$mzFound,
msmsrecord$childRawLow[,mzcol],
msmsrecord$childRawSatellite[,mzcol])
spectrum@intensity <- c(msmsrecord$childFilt$int,
msmsrecord$childBad$int,
msmsrecord$childUnmatched$int,
msmsrecord$childRawLow$int,
msmsrecord$childRawSatellite$int)
spectrum@peaksCount <- length(spectrum@mz)
spectrum@satellite <- as.logical(c(
rep(FALSE,nrow(msmsrecord$childFilt)),
rep(FALSE,nrow(msmsrecord$childBad)),
rep(FALSE,nrow(msmsrecord$childUnmatched)),
rep(FALSE,nrow(msmsrecord$childRawLow)),
rep(TRUE,nrow(msmsrecord$childRawSatellite)))
)
spectrum@low <- as.logical(c(
rep(FALSE,nrow(msmsrecord$childFilt)),
rep(FALSE,nrow(msmsrecord$childBad)),
rep(FALSE,nrow(msmsrecord$childUnmatched)),
rep(TRUE,nrow(msmsrecord$childRawLow)),
rep(FALSE,nrow(msmsrecord$childRawSatellite)))
)
spectrum@rawOK <- as.logical(c(
rep(TRUE,nrow(msmsrecord$childFilt)),
rep(TRUE,nrow(msmsrecord$childBad)),
rep(TRUE,nrow(msmsrecord$childUnmatched)),
rep(FALSE,nrow(msmsrecord$childRawLow)),
rep(FALSE,nrow(msmsrecord$childRawSatellite)))
)
spectrum@good <- as.logical(c(
msmsrecord$childFilt$good,
msmsrecord$childBad$good,
msmsrecord$childUnmatched$good,
rep(NA,nrow(msmsrecord$childRawLow)),
rep(NA,nrow(msmsrecord$childRawSatellite)))
)
spectrum@mzCalc <- as.numeric(c(
msmsrecord$childFilt$mzCalc,
msmsrecord$childBad$mzCalc,
msmsrecord$childUnmatched$mzCalc,
rep(NA,nrow(msmsrecord$childRawLow)),
rep(NA,nrow(msmsrecord$childRawSatellite)))
)
spectrum@formula <- as.character(c(
msmsrecord$childFilt$formula,
msmsrecord$childBad$formula,
msmsrecord$childUnmatched$formula,
rep(NA,nrow(msmsrecord$childRawLow)),
rep(NA,nrow(msmsrecord$childRawSatellite)))
)
spectrum@dbe <- as.numeric(c(
msmsrecord$childFilt$dbe,
msmsrecord$childBad$dbe,
msmsrecord$childUnmatched$dbe,
rep(NA,nrow(msmsrecord$childRawLow)),
rep(NA,nrow(msmsrecord$childRawSatellite)))
)
spectrum@formulaCount <- as.integer(c(
msmsrecord$childFilt$formulaCount,
msmsrecord$childBad$formulaCount,
msmsrecord$childUnmatched$formulaCount,
rep(NA,nrow(msmsrecord$childRawLow)),
rep(NA,nrow(msmsrecord$childRawSatellite)))
)
spectrum@dppm <- as.numeric(c(
msmsrecord$childFilt$dppm,
msmsrecord$childBad$dppm,
msmsrecord$childUnmatched$dppm,
rep(NA,nrow(msmsrecord$childRawLow)),
rep(NA,nrow(msmsrecord$childRawSatellite)))
)
spectrum@dppmBest <- as.numeric(c(
msmsrecord$childFilt$dppmBest,
msmsrecord$childBad$dppmBest,
msmsrecord$childUnmatched$dppmBest,
rep(NA,nrow(msmsrecord$childRawLow)),
rep(NA,nrow(msmsrecord$childRawSatellite)))
)
# .RmbSpectrum2 <- setClass("RmbSpectrum2",
# representation = representation(
## satellite="logical",
## low="logical",
## rawOK ="logical",
## good = "logical",
## mzCalc = "numeric",
## formula = "character",
## formulaCount = "integer",
## dppm = "numeric",
## dppmBest = "numeric",
# ),
return(spectrum)
},
set@children, analyzedSpec$msmsdata)
set@children <- as(children, "SimpleList")
return(set)
},
spectra, analyzedSpecs)
}
return(as(spectra, "SimpleList"))
}
# Finds progress in the "old workspace version" to determine whether to take the old spectra or the recalibrated ones (and
# make a parent workspace)
.findProgress.v1 <- function(workspace)
{
step1 <- (length(workspace@specs) > 0)
step2 <- (length(workspace@analyzedSpecs) > 0)
step3 <- (length(workspace@aggregatedSpecs) > 0)
step4 <- (length(workspace@recalibratedSpecs) > 0)
step5 <- (length(workspace@analyzedRcSpecs) > 0)
step6 <- (length(workspace@aggregatedRcSpecs) > 0)
step7 <- (length(workspace@reanalyzedRcSpecs) > 0)
step8 <- (length(workspace@refilteredRcSpecs) > 0)
steps <- which(c(step1, step2, step3, step4, step5, step6, step7, step8))
return(steps)
}
\ No newline at end of file
setClass("RmbSpectraSet",
.RmbSpectrum2 <- setClass("RmbSpectrum2",
representation = representation(
satellite="logical",
low="logical",
rawOK ="logical",
good = "logical",
mzCalc = "numeric",
formula = "character",
dbe = "numeric",
formulaCount = "integer",
dppm = "numeric",
dppmBest = "numeric"
),
contains=c("Spectrum2"),
prototype = prototype(
satellite = logical(),
low = logical(),
rawOK = logical(),
good = logical(),
mzCalc = numeric(),
formula = character(),
dbe = numeric(),
formulaCount = integer(),
dppm = numeric(),
dppmBest = numeric(),
new("Versioned", versions=c(classVersion("Spectrum2"), RmbSpectrum2 = "0.1.0"))
),
)
.RmbSpectrum2List <- setClass("RmbSpectrum2List", contains="SimpleList",
prototype=prototype(elementType="RmbSpectrum2"))
#
#setAs("ANY", "RmbSpectrum2List", function(from) {
# coerceToSimpleList(from)
# })
.RmbSpectraSet <- setClass("RmbSpectraSet",
representation = representation(
parent = "Spectrum1",
children = "list",
children = "RmbSpectrum2List",
# These are done as slots and not as S4 functions, because they are set during the workflow
# in "checking" steps. It's easier.
found = "logical",
......@@ -15,7 +52,7 @@ setClass("RmbSpectraSet",
),
prototype = prototype(
parent = new("Spectrum1"),
children = list(),
children = new("RmbSpectrum2List"),
found = FALSE,
complete = NA,
empty = NA,
......@@ -25,36 +62,13 @@ setClass("RmbSpectraSet",
name = character(),
annotations = list()
)
,
validity = function(object)
{
childrenSpectraOK <- all(unlist(lapply(object@children, function(s) inherits(s, "RmbSpectrum2"))))
if(!childrenSpectraOK) return("MS2 spectra are not of class RmbSpectrum2")
return(TRUE)
}
);
setClass("RmbSpectrum2",
representation = representation(
satellite="logical",
low="logical",
rawOK ="logical",
good = "logical",
mzCalc = "numeric",
formula = "numeric",
dppm = "numeric"
),
contains=c("Spectrum2"),
prototype = prototype(
satellite = logical(),
low = logical(),
rawOK = logical(),
good = logical(),
mzCalc = numeric(),
formula = numeric(),
dppm = numeric()
),
)
.RmbSpectraSetList <- setClass("RmbSpectraSetList", contains="SimpleList",
prototype=prototype(elementType="RmbSpectraSet"))
setGeneric("getData", function(s) standardGeneric("getData"))
setGeneric("setData", function(s, df) standardGeneric("setData"))
......
......@@ -6,7 +6,7 @@
setMethod("getData", c("RmbSpectrum2"), function(s)
{
peaks <- s@peaksCount
cols <- c("mz", "intensity", "satellite", "low", "rawOK", "good", "mzCalc", "formula", "dppm")
cols <- c("mz", "intensity", "satellite", "low", "rawOK", "good", "mzCalc", "formula", "dbe", "formulaCount", "dppm", "dppmBest")
cols.isFilled <- unlist(lapply(cols, function(col) length(slot(s, col)) == peaks))
cols.filled <- cols[cols.isFilled]
df <- do.call(data.frame, lapply(cols.filled, function(col) slot(s, col)))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment