From 2f74bd5443fa08e4dd039d720d8759a4b41609ee Mon Sep 17 00:00:00 2001
From: Michele Stravs <stravsmi@eawag.ch>
Date: Thu, 4 Dec 2014 11:43:59 +0100
Subject: [PATCH] msmsWorkspace update: specs and analyzedSpecs to spectra

---
 R/RmbWorkspace.R       |  61 +++++++--
 R/RmbWorkspaceUpdate.R | 279 +++++++++++++++++++++++++++++++++++++++++
 R/SpectrumClasses.R    |  76 ++++++-----
 R/SpectrumMethods.R    |   2 +-
 4 files changed, 378 insertions(+), 40 deletions(-)
 create mode 100644 R/RmbWorkspaceUpdate.R

diff --git a/R/RmbWorkspace.R b/R/RmbWorkspace.R
index 48b08dd..402aa12 100755
--- a/R/RmbWorkspace.R
+++ b/R/RmbWorkspace.R
@@ -1,6 +1,9 @@
 #' @import methods
 NULL
 
+
+setClassUnion("msmsWorkspaceOrNULL", "NULL")
+
 #' Workspace for \code{msmsWorkflow} data
 #' 
 #' A workspace which stores input and output data for \code{\link{msmsWorkflow}}.
@@ -46,7 +49,7 @@ NULL
 ## ' 		peaks (after step 7, see \code{\link{reanalyzeFailpeaks}}).
 ## ' @slot refilteredRcSpecs Final data to use for MassBank record creation after 
 ## ' 		multiplicity filtering (step 8).
-#' 
+##' 
 ## ' @method show,msmsWorkflow Shows a brief summary of the object. Currently only the included files.
 #' 
 #' @seealso \code{\link{msmsWorkflow}}
@@ -55,10 +58,11 @@ NULL
 #' @docType class
 #' @exportClass msmsWorkspace
 #' @export
-setClass("msmsWorkspace",
-		representation(
+.msmsWorkspace <- setClass("msmsWorkspace",
+		representation = representation(
 				files = "character",
-				specs = "list",
+				spectra = "RmbSpectraSetList",
+				parent = "msmsWorkspaceOrNULL",
 				analyzedSpecs = "list",
 				aggregatedSpecs = "list",
 				rc = "ANY",
@@ -70,8 +74,39 @@ setClass("msmsWorkspace",
 				refilteredRcSpecs = "list",
 				archivename = "character",
 				settings = "list"
-				),
+		),
+		contains=c("Versioned"),
+		prototype = prototype(
+				new("Versioned", versions=c(msmsWorkspace = "2.0.1")),
+				parent = NULL
 		)
+)
+
+setIs("msmsWorkspace", "msmsWorkspaceOrNULL")
+
+#.msmsWorkspace <- setClass("msmsWorkspace",
+#		representation = representation(
+#				files = "character",
+#				specs = "list",
+#				analyzedSpecs = "list",
+#				aggregatedSpecs = "list",
+#				rc = "ANY",
+#				rc.ms1 = "ANY",
+#				recalibratedSpecs = "list",
+#				analyzedRcSpecs = "list",
+#				aggregatedRcSpecs = "list",
+#				reanalyzedRcSpecs = "list",
+#				refilteredRcSpecs = "list",
+#				archivename = "character",
+#				settings = "list"
+#				),
+#		contains=c("Versioned"),
+#		prototype = prototype(
+#				new("Versioned", versions=c(msmsWorkspace = "1.0.1"))
+#				)
+#		)
+		
+
 
 #' Workspace for \code{mbWorkflow} data
 #' 
@@ -159,7 +194,7 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE)
 	load(fileName, envir=tempEnv)
 	# Look if there is a msmsWorkspace in the file
 	objs <- ls(tempEnv)
-	isWs <- unlist(lapply(objs, function(obj) "msmsWorkspace" %in% class(tempEnv[[obj]])))
+	isWs <- unlist(lapply(objs, function(obj) is(tempEnv[[obj]], "msmsWorkspace")))
 	whichWs <- match(TRUE, isWs)
 	# Found? Then just return it.
 	if(!is.na(whichWs))
@@ -169,7 +204,7 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE)
 		if(loadSettings == FALSE)
 			w@settings <- list()
 	}
-	# Otherwise hope to load the dataset into a new workspace
+	# If there is no msmsWorkspace object in the workspace, this means that the workspace is version 1!
 	else
 	{
 		w <- new("msmsWorkspace")
@@ -188,12 +223,22 @@ loadMsmsWorkspace <- function(fileName, loadSettings = FALSE)
 		for(var in dataset)
 		{
 			if(exists(var, envir=tempEnv))
-				slot(w, var) <- tempEnv[[var]]
+				slot(w, var, check=FALSE) <- tempEnv[[var]]
+			classVersion(w) <- "1.0.0"
 		}
 		# Check if settings exist...
 		if((loadSettings == TRUE) && exists("RmbSettings", envir=tempEnv))
 			w@settings <- tempEnv$RmbSettings
 	}
+	# process version updates
+	updateClass <- FALSE
+	if(!isVersioned(w)) updateClass <- TRUE
+	else if(!all(isCurrent(w))) updateClass <- TRUE
+	if(updateClass)
+	{
+		w <- updateObject(w)
+	}
+	
 	# If loadSettings is set: load the settings into RMassBank
 	if((loadSettings == TRUE) && (length(w@settings) > 0))
 		loadRmbSettings(w@settings)
diff --git a/R/RmbWorkspaceUpdate.R b/R/RmbWorkspaceUpdate.R
new file mode 100644
index 0000000..441904f
--- /dev/null
+++ b/R/RmbWorkspaceUpdate.R
@@ -0,0 +1,279 @@
+# TODO: Add comment
+# 
+# Author: stravsmi
+###############################################################################
+
+
+.updateObject.RmbWorkspace <- setMethod("updateObject", signature(object="msmsWorkspace"), function(object, ..., verbose = FALSE) 
+		{
+			w <- object
+			if(isVersioned(w))
+				if(all(isCurrent(w)))
+					return(w)
+			# get msmsWorkspace version
+			if(!isVersioned(w))
+				v <- "1.0.0"
+			else
+				v <- classVersion(w)["msmsWorkspace"]
+			w.new <- w
+			# gradually step up versions
+			# 2.0.1: 
+			# * spectra go from specs, analyzedSpecs or their rc analogs to Spectra
+			# * data pre recalibration get shifted to "parent workspace"
+			if(v < "2.0.1")
+			{
+				w.old <- w.new
+				w.new <- new("msmsWorkspace")
+				w.new@files <- w.old@files
+				# Do we have recalibration done? If so: all data in the WS will be the recalibrated data, the unrecalibrated data will be
+				# moved into a new parent workspace which is referenced
+				progress <- .findProgress.v1(w.old)
+				if(4 %in% progress)
+				{
+					w.parent <- w.old
+					w.parent@recalibratedSpecs <- list()
+					w.parent@analyzedRcSpecs <- list()
+					w.parent@aggregatedRcSpecs <- list()
+					w.parent@reanalyzedRcSpecs <- list()
+					w.parent@refilteredRcSpecs <- list()
+					w.old@specs <- w.old@recalibratedSpecs
+					w.old@analyzedSpecs <- w.old@analyzedRcSpecs
+					w.parent.new <- updateObject(w.parent)
+					w.new@parent <- w.parent.new
+				}
+				w.new@spectra <- .updateObject.spectra(w.old@specs, w.old@analyzedSpecs)
+			}
+			
+			return(w.new)
+		})
+
+
+
+.updateObject.spectra <- function(specs, analyzedSpecs)
+{
+	if((length(specs) != length(analyzedSpecs)) && (0 != length(analyzedSpecs) ))
+		stop("updateObject: Could not update object because data is inconsistent. length(analyzedSpecs) != length(specs) or 0")
+	# process info ex specs
+	spectra <- lapply(specs, function(spec){
+				set <- new("RmbSpectraSet")
+				# identifiers and properties
+				set@mz <- spec$mz$mzCenter
+				set@id <- as.integer(spec$id)
+				set@formula <- spec$formula
+				set@found <- as.logical(spec$foundOK)
+				# now parent and child MS
+				# check for parent recalibration column
+				if("mzRecal" %in% colnames(spec$parentPeak))
+					mzcol <- "mzRecal"
+				else
+					mzcol <- "mz"
+				set@parent <- new("Spectrum1", 
+						mz = spec$parentPeak[,mzcol],
+						intensity = spec$parentPeak[,2],
+						polarity = as.integer(spec$parentHeader$polarity),
+						peaksCount = as.integer(spec$parentHeader$peaksCount),
+						rt = spec$parentHeader$retentionTime,
+						acquisitionNum = as.integer(spec$parentHeader$acquisitionNum),
+						tic = spec$parentHeader$totIonCurrent,
+						centroided = TRUE
+						)
+				# get MSMS data from spec$peaks into RmbSpectrum2 objects
+				children.p1 <- lapply(spec$peaks, function(peaks)
+						{
+							if("mzRecal" %in% colnames(peaks))
+								mzcol <- "mzRecal"
+							else
+								mzcol <- "mz"
+							new("RmbSpectrum2",
+									mz=peaks[,mzcol],
+									intensity=peaks[,2],
+									peaksCount=nrow(peaks))
+						})
+				# get header data from spec$childHeaders into separate RmbSpectrum2 objects
+				children.p2 <- apply(spec$childHeaders, 1, function(line)
+						{
+							new("RmbSpectrum2",
+									precScanNum = as.integer(line["precursorScanNum"]),
+									precursorMz = line["precursorMZ"],
+									precursorIntensity = line["precursorIntensity"],
+									precursorCharge = as.integer(line["precursorCharge"]),
+									collisionEnergy = line["collisionEnergy"],
+									tic = line["totIonCurrent"],
+									rt = line["retentionTime"],
+									acquisitionNum = as.integer(line["acquisitionNum"]),
+									centroided = TRUE
+							)
+						}) 
+				# merge MSMS RmbSpectrum2 with header RmbSpectrum2
+				children <- mapply(function(c1,c2)
+						{
+							c2slots <- c("precScanNum","precursorMz", "precursorIntensity", "precursorCharge", "collisionEnergy",
+									"tic", "rt", "acquisitionNum", "centroided")
+							for(c2slot in c2slots)
+								slot(c1, c2slot) <- slot(c2, c2slot)
+							return(c1)
+						}, children.p1, children.p2)
+				set@children <- as(children, "SimpleList")
+				
+				return(set)
+			})
+	spectra <- mapply(function(set, name)
+			{
+				set@name <- name
+				return(set)
+			},
+			spectra, names(specs))
+	
+				
+	# add info ex analyzedSpecs if present
+	if(length(analyzedSpecs) > 0)
+	{
+		spectra <- mapply(function(set, analyzedSpec)
+				{
+					if(length(analyzedSpec$msmsdata) != length(set@children))
+						stop("updateObject: Could not update object because data is inconsistent. length(analyzedSpec$msmsdata) != length(set@children)")
+					children <- mapply(function(spectrum, msmsrecord)
+							{
+								if(!is.data.frame(msmsrecord$childBad))
+									msmsrecord$childBad <- data.frame()
+								# note: mz/intensity are replaced with the values from the analyzed spectrum,
+								# such as to have a mass multiple times for multiple matched formulas
+								
+								# check if the spectrum has recalibrated masses; if yes, use those
+								if("mzRecal" %in% colnames(msmsrecord$childRaw))
+									mzcol <- "mzRecal"
+								else
+									mzcol <- "mz"
+								spectrum@mz <- c(msmsrecord$childFilt$mzFound,
+										msmsrecord$childBad$mzFound,
+										msmsrecord$childUnmatched$mzFound,
+										msmsrecord$childRawLow[,mzcol], 
+										msmsrecord$childRawSatellite[,mzcol])
+								spectrum@intensity <- c(msmsrecord$childFilt$int,
+										msmsrecord$childBad$int,
+										msmsrecord$childUnmatched$int,
+										msmsrecord$childRawLow$int, 
+										msmsrecord$childRawSatellite$int)
+								spectrum@peaksCount <- length(spectrum@mz)
+
+								spectrum@satellite <- as.logical(c(
+												rep(FALSE,nrow(msmsrecord$childFilt)),
+												rep(FALSE,nrow(msmsrecord$childBad)),
+												rep(FALSE,nrow(msmsrecord$childUnmatched)),
+												rep(FALSE,nrow(msmsrecord$childRawLow)), 
+												rep(TRUE,nrow(msmsrecord$childRawSatellite)))
+								)
+								
+								
+								spectrum@low <- as.logical(c(
+												rep(FALSE,nrow(msmsrecord$childFilt)),
+												rep(FALSE,nrow(msmsrecord$childBad)),
+												rep(FALSE,nrow(msmsrecord$childUnmatched)),
+												rep(TRUE,nrow(msmsrecord$childRawLow)), 
+												rep(FALSE,nrow(msmsrecord$childRawSatellite)))
+								)
+								
+								spectrum@rawOK <- as.logical(c(
+												rep(TRUE,nrow(msmsrecord$childFilt)),
+												rep(TRUE,nrow(msmsrecord$childBad)),
+												rep(TRUE,nrow(msmsrecord$childUnmatched)),
+												rep(FALSE,nrow(msmsrecord$childRawLow)), 
+												rep(FALSE,nrow(msmsrecord$childRawSatellite)))
+								)
+								
+								spectrum@good <- as.logical(c(
+										msmsrecord$childFilt$good,
+										msmsrecord$childBad$good,
+										msmsrecord$childUnmatched$good,
+										rep(NA,nrow(msmsrecord$childRawLow)), 
+										rep(NA,nrow(msmsrecord$childRawSatellite)))
+								)
+								spectrum@mzCalc <- as.numeric(c(
+										msmsrecord$childFilt$mzCalc,
+										msmsrecord$childBad$mzCalc,
+										msmsrecord$childUnmatched$mzCalc,
+										rep(NA,nrow(msmsrecord$childRawLow)), 
+										rep(NA,nrow(msmsrecord$childRawSatellite)))
+								)
+								spectrum@formula <- as.character(c(
+										msmsrecord$childFilt$formula,
+										msmsrecord$childBad$formula,
+										msmsrecord$childUnmatched$formula,
+										rep(NA,nrow(msmsrecord$childRawLow)), 
+										rep(NA,nrow(msmsrecord$childRawSatellite)))
+								)
+								
+								spectrum@dbe <- as.numeric(c(
+												msmsrecord$childFilt$dbe,
+												msmsrecord$childBad$dbe,
+												msmsrecord$childUnmatched$dbe,
+												rep(NA,nrow(msmsrecord$childRawLow)), 
+												rep(NA,nrow(msmsrecord$childRawSatellite)))
+								)
+								
+								spectrum@formulaCount <- as.integer(c(
+												msmsrecord$childFilt$formulaCount,
+												msmsrecord$childBad$formulaCount,
+												msmsrecord$childUnmatched$formulaCount,
+												rep(NA,nrow(msmsrecord$childRawLow)), 
+												rep(NA,nrow(msmsrecord$childRawSatellite)))
+								)
+								
+								spectrum@dppm <- as.numeric(c(
+												msmsrecord$childFilt$dppm,
+												msmsrecord$childBad$dppm,
+												msmsrecord$childUnmatched$dppm,
+												rep(NA,nrow(msmsrecord$childRawLow)), 
+												rep(NA,nrow(msmsrecord$childRawSatellite)))
+								)
+								spectrum@dppmBest <- as.numeric(c(
+												msmsrecord$childFilt$dppmBest,
+												msmsrecord$childBad$dppmBest,
+												msmsrecord$childUnmatched$dppmBest,
+												rep(NA,nrow(msmsrecord$childRawLow)), 
+												rep(NA,nrow(msmsrecord$childRawSatellite)))
+								)
+								
+#								.RmbSpectrum2 <- setClass("RmbSpectrum2",
+#										representation = representation(
+								## satellite="logical",
+								## low="logical",
+								## rawOK ="logical",
+								## good = "logical",
+								## mzCalc = "numeric",
+								## formula = "character",
+								## formulaCount = "integer",
+								## dppm = "numeric",
+								## dppmBest = "numeric",
+#										),
+								
+								return(spectrum)
+								
+							},
+							set@children, analyzedSpec$msmsdata)
+							set@children <- as(children, "SimpleList")
+							return(set)
+				},
+				spectra, analyzedSpecs)
+	}
+
+
+	return(as(spectra, "SimpleList"))
+	
+}
+
+# Finds progress in the "old workspace version" to determine whether to take the old spectra or the recalibrated ones (and
+# make a parent workspace)
+.findProgress.v1 <- function(workspace)
+{
+	step1 <- (length(workspace@specs) > 0)
+	step2 <- (length(workspace@analyzedSpecs) > 0)
+	step3 <- (length(workspace@aggregatedSpecs) > 0)
+	step4 <- (length(workspace@recalibratedSpecs) > 0)
+	step5 <- (length(workspace@analyzedRcSpecs) > 0)
+	step6 <- (length(workspace@aggregatedRcSpecs) > 0)
+	step7 <- (length(workspace@reanalyzedRcSpecs) > 0)
+	step8 <- (length(workspace@refilteredRcSpecs) > 0)
+	steps <- which(c(step1, step2, step3, step4, step5, step6, step7, step8))
+	return(steps)
+}
\ No newline at end of file
diff --git a/R/SpectrumClasses.R b/R/SpectrumClasses.R
index c1b88e4..2205543 100644
--- a/R/SpectrumClasses.R
+++ b/R/SpectrumClasses.R
@@ -1,7 +1,44 @@
-setClass("RmbSpectraSet",
+.RmbSpectrum2 <- setClass("RmbSpectrum2",
+		representation = representation(
+				satellite="logical",
+				low="logical",
+				rawOK ="logical",
+				good = "logical",
+				mzCalc = "numeric",
+				formula = "character",
+				dbe = "numeric",
+				formulaCount = "integer",
+				dppm = "numeric",
+				dppmBest = "numeric"
+		),
+		contains=c("Spectrum2"),
+		prototype = prototype(
+				satellite = logical(),
+				low = logical(),
+				rawOK = logical(),
+				good = logical(),
+				mzCalc = numeric(),
+				formula = character(),
+				dbe = numeric(),
+				formulaCount = integer(),
+				dppm = numeric(),
+				dppmBest = numeric(),
+				new("Versioned", versions=c(classVersion("Spectrum2"), RmbSpectrum2 = "0.1.0"))
+		),
+)
+
+.RmbSpectrum2List <- setClass("RmbSpectrum2List", contains="SimpleList",
+		prototype=prototype(elementType="RmbSpectrum2"))
+#
+#setAs("ANY", "RmbSpectrum2List", function(from) {
+#			coerceToSimpleList(from)
+#		})
+
+
+.RmbSpectraSet <- setClass("RmbSpectraSet",
 		representation = representation(
 				parent = "Spectrum1",
-				children = "list",
+				children = "RmbSpectrum2List",
 				# These are done as slots and not as S4 functions, because they are set during the workflow
 				# in "checking" steps. It's easier.
 				found = "logical",
@@ -15,7 +52,7 @@ setClass("RmbSpectraSet",
 				),
 		prototype = prototype(
 				parent = new("Spectrum1"),
-				children = list(),
+				children = new("RmbSpectrum2List"),
 				found = FALSE,
 				complete = NA,
 				empty = NA,
@@ -25,36 +62,13 @@ setClass("RmbSpectraSet",
 				name = character(),
 				annotations = list()
 		)
-		,
-		validity = function(object)
-		{
-			childrenSpectraOK <- all(unlist(lapply(object@children, function(s) inherits(s, "RmbSpectrum2"))))
-			if(!childrenSpectraOK) return("MS2 spectra are not of class RmbSpectrum2")
-			return(TRUE)
-		}
 );
 
-setClass("RmbSpectrum2",
-		representation = representation(
-				satellite="logical",
-				low="logical",
-				rawOK ="logical",
-				good = "logical",
-				mzCalc = "numeric",
-				formula = "numeric",
-				dppm = "numeric"
-				),
-		contains=c("Spectrum2"),
-		prototype = prototype(
-				satellite = logical(),
-				low = logical(),
-				rawOK = logical(),
-				good = logical(),
-				mzCalc = numeric(),
-				formula = numeric(),
-				dppm = numeric()
-				),
-)
+.RmbSpectraSetList <- setClass("RmbSpectraSetList", contains="SimpleList",
+		prototype=prototype(elementType="RmbSpectraSet"))
+
+
+
 
 setGeneric("getData",	function(s) standardGeneric("getData"))
 setGeneric("setData",	function(s, df) standardGeneric("setData"))
diff --git a/R/SpectrumMethods.R b/R/SpectrumMethods.R
index b139f38..ba480b3 100644
--- a/R/SpectrumMethods.R
+++ b/R/SpectrumMethods.R
@@ -6,7 +6,7 @@
 setMethod("getData", c("RmbSpectrum2"), function(s)
 		{
 			peaks <- s@peaksCount
-			cols <- c("mz", "intensity", "satellite", "low", "rawOK", "good", "mzCalc", "formula", "dppm")
+			cols <- c("mz", "intensity", "satellite", "low", "rawOK", "good", "mzCalc", "formula", "dbe", "formulaCount", "dppm", "dppmBest")
 			cols.isFilled <- unlist(lapply(cols, function(col) length(slot(s, col)) == peaks))
 			cols.filled <- cols[cols.isFilled]
 			df <- do.call(data.frame, lapply(cols.filled, function(col) slot(s, col)))
-- 
GitLab