Skip to content
Snippets Groups Projects
Commit 3327b6be authored by Emma Schymanski's avatar Emma Schymanski
Browse files

RMassBank: updates from git repo

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/RMassBank@81148 bc3139a8-67e5-0310-9ffc-ced21a209358
parent fd3f8d6b
No related branches found
No related tags found
No related merge requests found
Showing with 15673 additions and 339 deletions
Package: RMassBank
Type: Package
Title: Workflow to process tandem MS files and build MassBank records
Version: 1.3.1
Version: 1.3.2
Authors@R: c(
person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
role=c("cre")),
......@@ -9,11 +9,12 @@ Authors@R: c(
"michael.stravs@eawag.ch", role=c("aut")), person(given = "Emma L.",
family = "Schymanski", email = "emma.schymanski@eawag.ch", role=c("aut")),
person(given = "Steffen", family = "Neumann", role = "aut", email =
"sneumann@ipb-halle.de"), person(given = "Erik", family = "Mueller", role =
"sneumann@ipb-halle.de"), person(given = "Erik", family = "Muller", role =
"aut", email = "erik.mueller@student.uni-halle.de"), person(given =
"Tobias", family = "Schulze", role = "ctb", email =
"tobias.schulze@ufz.de") )
Author: Michael Stravs, Emma Schymanski, Steffen Neumann, Erik Mueller, with contributions from Tobias Schulze
Author: Michael Stravs, Emma Schymanski, Steffen Neumann, Erik Mueller, with
contributions from Tobias Schulze
Maintainer: RMassBank at Eawag <massbank@eawag.ch>
Description: Workflow to process tandem MS files and build MassBank records.
Functions include automated extraction of tandem MS spectra, formula
......@@ -24,11 +25,15 @@ License: Artistic-2.0
SystemRequirements: OpenBabel
biocViews: Bioinformatics, MassSpectrometry, Metabolomics, Software
Depends:
rcdk,yaml,mzR,methods,rjson
mzR,rcdk,yaml,methods
Imports:
XML,RCurl
XML,RCurl,rjson
Suggests:
gplots,RMassBankData,xcms
gplots,RMassBankData,
xcms (>= 1.37.1),
CAMERA,
ontoCAT,
RUnit
Collate:
'createMassBank.R'
'formulaCalculator.R'
......@@ -38,5 +43,8 @@ Collate:
'settings_example.R'
'webAccess.R'
'deprofile.R'
'parseMassBank.R'
'RmbWorkspace.R'
'validateMassBank.R'
'zzz.R'
'tools.R'
......@@ -3,9 +3,14 @@ export(CTS.externalIdTypes)
export(RmbDefaultSettings)
export(RmbSettingsTemplate)
export(add.formula)
export(addMB)
export(addPeaks)
export(addPeaksManually)
export(aggregateSpectra)
export(analyzeMsMs)
export(analyzeMsMs.formula)
export(analyzeMsMs.intensity)
export(annotator.default)
export(archiveResults)
export(cleanElnoise)
export(combineMultiplicities)
......@@ -31,6 +36,7 @@ export(findMsMsHR.mass)
export(findMz)
export(findMz.formula)
export(findName)
export(findProgress)
export(findRt)
export(findSmiles)
export(flatten)
......@@ -59,26 +65,38 @@ export(multiply.formula)
export(newMbWorkspace)
export(newMsmsWorkspace)
export(order.formula)
export(parseMassBank)
export(plotRecalibration)
export(plotRecalibration.direct)
export(ppm)
export(problematicPeaks)
export(progressBarHook)
export(readMbdata)
export(reanalyzeFailpeak)
export(reanalyzeFailpeaks)
export(recalibrate)
export(recalibrate.addMS1data)
export(recalibrate.identity)
export(recalibrate.linear)
export(recalibrate.loess)
export(recalibrate.mean)
export(recalibrateSingleSpec)
export(recalibrateSpectra)
export(resetInfolists)
export(resetList)
export(smiles2mass)
export(to.limits.rcdk)
export(toMassbank)
export(validate)
exportClasses(mbWorkspace)
exportClasses(msmsWorkspace)
exportMethods(show)
import(RCurl)
import(XML)
import(methods)
import(mzR)
import(rcdk)
import(rjson)
import(yaml)
importClassesFrom(mzR)
importMethodsFrom(mzR)
#' @import methods
NULL
#' Workspace for \code{msmsWorkflow} data
#'
......
......@@ -453,7 +453,7 @@ gatherData <- function(id)
}
else
{
iupacName <-infos$synonyms[[ipreferred]][["name"]]
iupacName <-infos$synonyms[[ipreferred[[1]]]][["name"]]
}
# Eliminate duplicate names from our list of 3
names <- as.list(unique(c(dbname, iupacName)))
......@@ -916,9 +916,6 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, refiltered, additionalP
peaks$dppm <- round(peaks$dppm, 2)
peaks$mzCalc <- round(peaks$mzCalc, 4)
peaks$int <- round(peaks$int, 1)
# I actually have no idea what the "num" entry in the annotation data
# is supposed to mean.
peaks$num <- 1
# copy the peak table to the annotation table. (The peak table will then be extended
# with peaks from the global "additional_peaks" table, which can be used to add peaks
# to the spectra by hand.
......@@ -971,7 +968,12 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, refiltered, additionalP
"pM" = "+",
"mM" = "-")
type <- formula_tag[[spec$mode]]
annotation$formula <- paste(annotation$formula, type, sep='')
annotator <- getOption("RMassBank")$annotator
if(is.null(annotator))
annotator <- "annotator.default"
# Here, the relative intensity is recalculated using the newly added additional
# peaks from the peak list. Therefore, we throw superfluous peaks out again.
......@@ -980,9 +982,7 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, refiltered, additionalP
annotation$intrel <- floor(annotation$int / max(peaks$int) * 999)
annotation <- annotation[annotation$intrel >= 1,]
# Select the right columns and name them correctly for output.
annotation <- annotation[,c("mzSpec", "num", "formula", "mzCalc", "dppm")]
colnames(annotation) <- c("m/z", "num", "{formula", "mass", "error(ppm)}")
annotation <- do.call(annotator, list(annotation= annotation, type=type))
# Create the "lower part" of the record.
mbdata <- list()
......@@ -1002,9 +1002,17 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, refiltered, additionalP
mbdata[["MS$FOCUSED_ION"]] <- ms_fi
# the data processing tag :)
# Change by Tobias:
# I suggest to add here the current version number of the clone due to better distinction between different makes of MB records
# Could be automatised from DESCRIPTION file?
if(getOption("RMassBank")$use_rean_peaks)
processingComment <- list("REANALYZE" = "Peaks with additional N2/O included")
else
processingComment <- list()
mbdata[["MS$DATA_PROCESSING"]] <- c(
getOption("RMassBank")$annotations$ms_dataprocessing,
list("WHOLE" = "RMassBank")
processingComment,
list("WHOLE" = paste("RMassBank", packageVersion("RMassBank")))
)
# Annotation:
......@@ -1016,7 +1024,20 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, refiltered, additionalP
# These two entries will be thrown out later, but they are necessary to build the
# record title and the accession number.
mbdata[["RECORD_TITLE_CE"]] <- msmsdata$info$ces #formatted collision energy
mbdata[["SUBSCAN"]] <- msmsdata$scan - spec$parentHeader$acquisitionNum #relative scan
# Mode of relative scan calculation: by default it is calculated relative to the
# parent scan. If a corresponding option is set, it will be calculated from the first
# present child scan in the list.
relativeScan <- "fromParent"
if(!is.null(getOption("RMassBank")$recomputeRelativeScan))
if(getOption("RMassBank")$recomputeRelativeScan == "fromFirstChild")
relativeScan <- "fromFirstChild"
if(relativeScan == "fromParent")
mbdata[["SUBSCAN"]] <- msmsdata$scan - spec$parentHeader$acquisitionNum #relative scan
else if(relativeScan == "fromFirstChild")
{
firstChild <- min(unlist(lapply(spec,function(d) d$header$acquisitionNum)))
mbdata[["SUBSCAN"]] <- msmsdata$scan - firstChild + 1
}
return(mbdata)
}
......@@ -1092,32 +1113,16 @@ compileRecord <- function(spec, mbdata, refiltered, additionalPeaks = NULL)
# Here is the right place to fix the name of the INTERNAL ID field.
names(mbrecord[["COMMENT"]])[[which(names(mbrecord[["COMMENT"]]) == "ID")]] <-
getOption("RMassBank")$annotations$internal_id_fieldname
# The fields are named differently in MB record definitions v.1 and 2.
# Therefore, the title is composed slightly differently (with the same result.)
# get mode parameter (for accession number generation) depending on version
# of record definition
# Change by Tobias:
# I suggest to include fragmentation mode here for information
if(getOption("RMassBank")$use_version == 2)
{
mbrecord[["RECORD_TITLE"]] <- paste(
mbrecord[["CH$NAME"]][[1]],
mbrecord[["AC$INSTRUMENT_TYPE"]],
mbrecord[["AC$MASS_SPECTROMETRY"]][["MS_TYPE"]],
mbrecord[["RECORD_TITLE_CE"]],
paste("R=",mbrecord[["AC$MASS_SPECTROMETRY"]][["RESOLUTION"]], sep='' ),
mbrecord[["MS$FOCUSED_ION"]][["PRECURSOR_TYPE"]],
sep="; ")
mode <- mbrecord[["AC$MASS_SPECTROMETRY"]][["ION_MODE"]]
}
else
{
mbrecord[["RECORD_TITLE"]] <- paste(
mbrecord[["CH$NAME"]][[1]],
mbrecord[["AC$INSTRUMENT_TYPE"]],
mbrecord[["AC$ANALYTICAL_CONDITION"]][["MS_TYPE"]],
paste("CE: ", mbrecord[["RECORD_TITLE_CE"]], sep=''),
paste("R=",mbrecord[["AC$ANALYTICAL_CONDITION"]][["RESOLUTION"]], sep='' ),
mbrecord[["MS$FOCUSED_ION"]][["PRECURSOR_TYPE"]],
sep="; ")
mode <- mbrecord[["AC$ANALYTICAL_CONDITION"]][["MODE"]]
}
# Generate the title and then delete the temprary RECORD_TITLE_CE field used before
mbrecord[["RECORD_TITLE"]] <- .parseTitleString(mbrecord)
mbrecord[["RECORD_TITLE_CE"]] <- NULL
# Calculate the accession number from the options.
shift <- getOption("RMassBank")$accessionNumberShifts[[spec$mode]]
......@@ -1129,6 +1134,137 @@ compileRecord <- function(spec, mbdata, refiltered, additionalPeaks = NULL)
})
}
#' Generate peak annotation from peaklist
#'
#' Generates the PK$ANNOTATION entry from the peaklist obtained. This function is
#' overridable by using the "annotator" option in the settings file.
#'
#' @param annotation A peak list to be annotated. Contains columns:
#' \code{"cpdID","formula","mzFound" ,"scan","mzCalc","dppm",
#' "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest",
#' "formulaMultiplicity","intrel","mzSpec"}
#'
#' @param type The ion type to be added to annotated formulas ("+" or "-" usually)
#'
#' @return The annotated peak table. Table \code{colnames()} will be used for the
#' titles (preferrably don't use spaces in the column titles; however no format is
#' strictly enforced by the MassBank data format.
#'
#' @examples
#' \dontrun{
#' annotation <- annotator.default(annotation)
#' }
#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
#' @export
annotator.default <- function(annotation, type)
{
annotation$formula <- paste(annotation$formula, type, sep='')
# Select the right columns and name them correctly for output.
annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
colnames(annotation) <- c("m/z", "tentative_formula", "formula_count", "mass", "error(ppm)")
return(annotation)
}
#' Parse record title
#'
#' Parses a title for a single MassBank record using the title format
#' specified in the option titleFormat. Internally used, not exported.
#'
#' If the option is not set, a standard title format is used (for record definition
#' version 1 or 2).
#'
#' @usage .parseTitleString(mbrecord)
#' @param mbrecord A MassBank record in list format, as returned from
#' \code{\link{gatherSpectrum}}.
#' @return A string with the title.
#' @author Michael Stravs, Eawag
#' @seealso \code{\link{compileRecord}}
#' @references MassBank record format:
#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
#' @examples
#' \dontrun{
#' # used in compileRecord()
#' title <- .parseTitleString(mbrecord)
#' }
#'
#'
#'
.parseTitleString <- function(mbrecord)
{
varlist <- getOption("RMassBank")$titleFormat
# Set the standard title format.
if(is.null(varlist))
{
if(getOption("RMassBank")$use_version == 2)
{
varlist <- c(
"{CH$NAME}",
"{AC$INSTRUMENT_TYPE}",
"{AC$MASS_SPECTROMETRY: MS_TYPE}",
"CE: {RECORD_TITLE_CE}",
"R={AC$MASS_SPECTROMETRY: RESOLUTION}",
"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
)
}
else
{
varlist <- c(
"{CH$NAME}",
"{AC$INSTRUMENT_TYPE}",
"{AC$ANALYTICAL_CONDITION: MS_TYPE}",
"CE: {RECORD_TITLE_CE}",
"R={AC$ANALYTICAL_CONDITION: RESOLUTION}",
"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
)
}
}
# Extract a {XXX} argument from each title section.
# check that every title has one and only one match
args <- regexec("\\{(.*)\\}", varlist)
arglist <- regmatches(varlist, args)
if(any(unlist(lapply(arglist, length)) != 2))
stop("Title format is incorrectly specified: a section with not exactly 1 parameters")
parsedVars <- lapply(varlist, function(var)
{
# Extract the specified parameter inside the {}.
# I.e. from a string like "R={BLA: BLUB}" return "BLA: BLUB"
args <- regexec("\\{(.*)\\}", var)
arg <- regmatches(var, args)[[1]][[2]]
# Split the parameter by colon if necessary
splitVar <- strsplit(arg, ": ")[[1]]
# Read the parameter value from the record
if(length(splitVar) == 2)
replaceVar <- mbrecord[[splitVar[[1]]]][[splitVar[[2]]]]
else if(length(splitVar) == 1)
replaceVar <- mbrecord[[splitVar]]
else
stop(paste(
"Title format is incorrectly specified:", var)
)
# Fix problems: NULL returns
if(is.null(replaceVar))
replaceVar <- ""
# Fix problems: Names will have >= 1 match. Take the first
if(length(replaceVar) > 1)
replaceVar <- replaceVar[[1]]
# Substitute the parameter value into the string
parsedVar <- sub("\\{(.*)\\}", replaceVar, var)
return(parsedVar)
})
title <- paste(parsedVars, collapse="; ")
return(title)
}
# This converts the tree-like list (as obtained e.g. from compileRecord())
# into a plain text array, which can then be dumped to a file suitable for
# MassBank upload.
......
#' @import rcdk
NULL
......
......@@ -265,5 +265,4 @@ findMass <- function(cpdID_or_smiles)
s <- findSmiles(cpdID_or_smiles)
mol <- getMolecule(s)
return(get.exact.mass(mol))
}
}
\ No newline at end of file
This diff is collapsed.
......@@ -9,15 +9,30 @@ NULL
#' Extracts MS/MS spectra from LC-MS raw data for a specified precursor, specified
#' either via the RMassBank compound list (see \code{\link{loadList}}) or via a mass.
#'
#' Different versions of the function get the data from different sources.
#'
#' @usage findMsMsHR(fileName, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE, dppm=10)
#' Different versions of the function get the data from different sources. Note that
#' findMsMsHR and findMsMsHR.direct differ mainly in that findMsMsHR opens a file
#' whereas findMsMs.direct uses an open file handle - both are intended to be used
#' in a full process which involves compound lists etc. In contrast, findMsMsHR.mass
#' is a low-level function which uses the mass directly for lookup and is intended for
#' use as a standalone function in unrelated applications.
#'
#' @usage findMsMsHR(fileName, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
#' ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
#' mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
#' fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
#' rtMargin = getOption("RMassBank")$rtMargin,
#' deprofile = getOption("RMassBank")$deprofile)
#'
#' findMsMsHR.mass(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
#' headerCache = NA)
#'
#' findMsMsHR.direct(msRaw, cpdID, mode = "pH", confirmMode = 0,
#' useRtLimit = TRUE, dppm=10, limit.coarse=0.5)
#' headerCache = NA, fillPrecursorScan = FALSE,
#' deprofile = getOption("RMassBank")$deprofile)
#'
#' findMsMsHR.direct(msRaw, cpdID, mode = "pH", confirmMode = 0, useRtLimit = TRUE,
#' ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
#' mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
#' fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
#' rtMargin = getOption("RMassBank")$rtMargin,
#' deprofile = getOption("RMassBank")$deprofile, headerCache = NA)
#'
#' @aliases findMsMsHR.mass findMsMsHR.direct findMsMsHR
#' @param fileName The file to open and search the MS2 spectrum in.
......@@ -25,13 +40,16 @@ NULL
#' @param cpdID The compound ID in the compound list (see \code{\link{loadList}})
#' to use for formula lookup.
#' @param mz The mass to use for spectrum search.
#' @param dppm The limit in ppm to use for fine limit (see below) calculation.
#' @param limit.coarse The coarse limit to use for locating potential MS2 scans:
#' @param ppmFine The limit in ppm to use for fine limit (see below) calculation.
#' @param mzCoarse The coarse limit to use for locating potential MS2 scans:
#' this tolerance is used when finding scans with a suitable precursor
#' ion value.
#' @param limit.fine The fine limit to use for locating MS2 scans: this tolerance
#' is used when locating an appropriate analyte peak in the MS1 precursor
#' spectrum.
#' spectrum.
#' @param limit.coarse Parameter in \code{findMsMsHR.mass} corresponding to \code{mzCoarse}.
#' (The parameters are distinct to clearly conceptually distinguish findMsMsHR.mass
#' (a standalone useful function) from the cpdID based functions (workflow functions).)
#' @param mode The processing mode (determines which ion/adduct is searched):
#' \code{"pH", "pNa", "pM", "mH", "mM", "mFA"} for different ions
#' ([M+H]+, [M+Na]+, [M]+, [M-H]-, [M]-, [M+FA]-).
......@@ -47,6 +65,11 @@ NULL
#' @param maxCount The maximal number of spectra groups to return. One spectra group
#' consists of all data-dependent scans from the same precursor whose precursor
#' mass matches the specified search mass.
#' @param fillPrecursorScan If \code{TRUE}, the precursor scan will be filled from MS1 data.
#' To be used for data where the precursor scan is not stored in the raw data.
#' @param rtMargin The retention time tolerance to use.
#' @param deprofile Whether deprofiling should take place, and what method should be
#' used (cf. \code{\link{deprofile}})
#' @return For \code{findMsMsHR} and \code{findMsMsHR.direct}: A "spectrum set", a list with items:
#' \item{foundOK}{\code{TRUE} if a spectrum was found, \code{FALSE} otherwise.
#' Note: if \code{FALSE}, all other values can be missing!}
......@@ -76,36 +99,55 @@ NULL
#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
#' @seealso findEIC
#' @export
findMsMsHR <- function(fileName, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE, dppm=10)
findMsMsHR <- function(fileName, cpdID, mode="pH",confirmMode =0, useRtLimit = TRUE,
ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
rtMargin = getOption("RMassBank")$rtMargin,
deprofile = getOption("RMassBank")$deprofile)
{
# access data directly for finding the MS/MS data. This is done using
# mzR.
msRaw <- openMSfile(fileName)
ret <- findMsMsHR.direct(msRaw, cpdID, mode, confirmMode, useRtLimit, dppm)
ret <- findMsMsHR.direct(msRaw, cpdID, mode, confirmMode, useRtLimit, ppmFine, mzCoarse, fillPrecursorScan,
rtMargin, deprofile)
mzR::close(msRaw)
return(ret)
}
#' @export
findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, maxCount = NA,
headerCache = NA)
headerCache = NA, fillPrecursorScan = FALSE,
deprofile = getOption("RMassBank")$deprofile)
{
eic <- findEIC(msRaw, mz, limit.fine, rtLimits)
# if(!is.na(rtLimits))
# {
# eic <- subset(eic, rt >= rtLimits[[1]] & rt <= rtLimits[[2]])
# }
if(!is.na(headerCache))
if(!all(is.na(headerCache)))
headerData <- headerCache
else
headerData <- as.data.frame(header(msRaw))
if(fillPrecursorScan == TRUE)
{
# reset the precursor scan number. first set to NA, then
# carry forward the precursor scan number from the last parent scan
headerData$precursorScanNum <- NA
headerData[which(headerData$msLevel == 1),"precursorScanNum"] <-
headerData[which(headerData$msLevel == 1),"acquisitionNum"]
headerData[,"precursorScanNum"] <- .locf(headerData[,"precursorScanNum"])
# Clear the actual MS1 precursor scan number again
headerData[which(headerData$msLevel == 1),"precursorScanNum"] <- 0
}
# Find MS2 spectra with precursors which are in the allowed
# scan filter (coarse limit) range
findValidPrecursors <- headerData[
(headerData$precursorMZ > mz - limit.coarse) &
(headerData$precursorMZ < mz + limit.coarse),]
(headerData$precursorMZ < mz + limit.coarse),]
# Find the precursors for the found spectra
validPrecursors <- unique(findValidPrecursors$precursorScanNum)
# check whether the precursors are real: must be within fine limits!
......@@ -138,17 +180,17 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
{
masterHeader <- headerData[headerData$acquisitionNum == masterScan,]
childHeaders <- headerData[(headerData$precursorScanNum == masterScan)
& (headerData$precursorMZ > mz - limit.coarse)
& (headerData$precursorMZ < mz + limit.coarse) ,]
& (headerData$precursorMZ > mz - limit.coarse)
& (headerData$precursorMZ < mz + limit.coarse) ,]
childScans <- childHeaders$acquisitionNum
msPeaks <- mzR::peaks(msRaw, masterHeader$seqNum)
# if deprofile option is set: run deprofiling
deprofile.setting <- getOption("RMassBank")$deprofile
deprofile.setting <- deprofile
if(!is.na(deprofile.setting))
msPeaks <- deprofile.scan(
msPeaks, method = deprofile.setting, noise = NA, colnames = FALSE
)
)
colnames(msPeaks) <- c("mz","int")
msmsPeaks <- lapply(childHeaders$seqNum, function(scan)
{
......@@ -180,7 +222,13 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
}
#' @export
findMsMsHR.direct <- function(msRaw, cpdID, mode = "pH", confirmMode = 0, useRtLimit = TRUE, dppm=10, limit.coarse=0.5)
findMsMsHR.direct <- function(msRaw, cpdID, mode = "pH", confirmMode = 0, useRtLimit = TRUE,
ppmFine = getOption("RMassBank")$findMsMsRawSettings$ppmFine,
mzCoarse = getOption("RMassBank")$findMsMsRawSettings$mzCoarse,
fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
rtMargin = getOption("RMassBank")$rtMargin,
deprofile = getOption("RMassBank")$deprofile,
headerCache = NA)
{
# for finding the peak RT: use the gauss-fitted centwave peak
# (centroid data converted with TOPP is necessary. save as
......@@ -190,21 +238,28 @@ findMsMsHR.direct <- function(msRaw, cpdID, mode = "pH", confirmMode = 0, useRtL
# find cpd m/z
mzLimits <- findMz(cpdID, mode)
mz <- mzLimits$mzCenter
limit.fine <- ppm(mz, dppm, p=TRUE)
limit.fine <- ppm(mz, ppmFine, p=TRUE)
if(!useRtLimit)
rtLimits <- NA
else
{
rtMargin <- getOption("RMassBank")$rtMargin
dbRt <- findRt(cpdID)
rtLimits <- c(dbRt$RT - rtMargin, dbRt$RT + rtMargin) * 60
}
spectra <- findMsMsHR.mass(msRaw, mz, limit.coarse, limit.fine, rtLimits, confirmMode + 1)
spectra[[confirmMode + 1]]$mz <- mzLimits
return(spectra[[confirmMode + 1]])
spectra <- findMsMsHR.mass(msRaw, mz, mzCoarse, limit.fine, rtLimits, confirmMode + 1,headerCache
,fillPrecursorScan, deprofile)
# check whether a) spectrum was found and b) enough spectra were found
if(length(spectra) < (confirmMode + 1))
sp <- list(foundOK = FALSE)
else
sp <- spectra[[confirmMode + 1]]
sp$mz <- mzLimits
sp$id <- cpdID
sp$formula <- findFormula(cpdID)
return(sp)
}
# Finds the EIC for a mass trace with a window of x ppm.
# (For ppm = 10, this is +5 / -5 ppm from the non-recalibrated mz.)
#' Extract EICs
......@@ -248,3 +303,115 @@ findEIC <- function(msRaw, mz, limit = NULL, rtLimit = NA)
scan <- headerMS1$acquisitionNum
return(data.frame(rt = rt, intensity=pks_t, scan=scan))
}
#' Addition of manual peaklists
#'
#' Adds a manual peaklist in matrix-format
#'
#' @usage addPeaksManually(w, cpdID, handSpec, mode)
#' @param w The msmsWorkspace that the peaklist should be added to.
#' @param cpdID The compoundID of the compound that has been used for the peaklist
#' @param handSpec A peaklist with 2 columns,
#' @param mode The ionization mode that has been used for the spectrum represented by the peaklist
#' @return The \code{msmsWorkspace} with the additional peaklist added to the right spectrum
#' @seealso \code{\link{msmsWorkflow}}
#' @author Erik Mueller
#' @examples \dontrun{
#' handSpec <- matrix(0,4,2)
#' handSpec[,1] <- c(274.986685367956, 259.012401087427, 95.9493025990907, 96.9573002472772)
#' handSpec[,2] <- c(357,761, 2821, 3446)
#' addPeaksManually(w, cpdID, handSpec)
#' }
#' @export
addPeaksManually <- function(w, cpdID, handSpec, mode = "pH"){
childHeaderAddition <- t(sapply(handSpec, function(spec){
header <- vector()
header[1:3] <- 2
header[4] <- length(spec[,1])
header[5] <- 0 ##Does this matter?
header[6] <- findRt(cpdID)$RT * 60
header[7] <- spec[which.max(spec[,2]),1]
header[8] <- max(spec[,2])
header[9] <- 0 ##Does this matter?
header[10] <- 0 ##Does this matter?
header[11] <- min(spec[,1])
header[12] <- max(spec[,1])
header[13] <- 1
header[14] <- findMz(cpdID)[[3]]
header[15] <- -1 ##Will be changed for different charges
header[16] <- 0 ##There sadly isnt any precursor intensity to find in the msms-scans. Workaround? msmsXCMS@files[1]
header[17:20] <- 0 ##Will be changed if merge is wanted
return(header)
}))
##Set colnames and rownames
colnames(childHeaderAddition) <- c("seqNum", "acquisitionNum", "msLevel", "peaksCount", "totIonCurrent", "retentionTime", "basepeakMZ",
"basePeakIntensity", "collisionEnergy", "ionisationEnergy", "lowMZ", "highMZ", "precursorScanNum",
"precursorMZ", "precursorCharge", "precursorIntensity", "mergedScan", "mergedResultScanNum",
"mergedResultStartScanNum", "mergedResultEndScanNum")
##Convert the manual peaklists
peaksHand <- lapply (handSpec, function(specs){
peaks <- matrix(nrow = length(specs[,1]), ncol = 2)
colnames(peaks) <- c("mz","int")
peaks <- specs
return(peaks)
})
##Where do the peaks and the header need to be added?
pos <- sapply(w@specs,function(spec){cpdID %in% spec$id})
##If the compound for the cpdID isn't in specs yet, add a new spectrum
if(length(pos) == 0){
pos <- length(w@specs) + 1
childHeaderAddition[,1:2] <- 1
w@specs[[pos]] <- list()
w@specs[[pos]]$foundOK <- 1
w@specs[[pos]]$parentscan <- 1
w@specs[[pos]]$parentHeader <- matrix(0, ncol = 20, nrow = 1)
rownames(w@specs[[pos]]$parentHeader) <- 1
colnames(w@specs[[pos]]$parentHeader) <- c("seqNum", "acquisitionNum", "msLevel", "peaksCount", "totIonCurrent", "retentionTime", "basepeakMZ",
"basePeakIntensity", "collisionEnergy", "ionisationEnergy", "lowMZ", "highMZ", "precursorScanNum",
"precursorMZ", "precursorCharge", "precursorIntensity", "mergedScan", "mergedResultScanNum",
"mergedResultStartScanNum", "mergedResultEndScanNum")
w@specs[[pos]]$parentHeader[1,1:3] <- 1
w@specs[[pos]]$parentHeader[1,4:20] <- 0
w@specs[[pos]]$parentHeader <- as.data.frame(w@specs[[pos]]$parentHeader)
w@specs[[pos]]$childScans <- 1
w@specs[[pos]]$childHeader <- as.data.frame(childHeaderAddition)
w@specs[[pos]]$parentPeak <- matrix(nrow = 1, ncol = 2)
colnames(w@specs[[pos]]$parentPeak) <- c("mz","int")
w@specs[[pos]]$parentPeak[1,] <- c(findMz(cpdID,mode=mode)$mzCenter,100)
w@specs[[pos]]$peaks <- peaksHand
w@specs[[pos]]$mz <- findMz(cpdID,mode=mode)
w@specs[[pos]]$id <- cpdID
w@specs[[pos]]$formula <- findFormula(cpdID)
} else { pos <- which(pos)
w@specs[[pos]]$childHeader <- rbind(w@specs[[pos]]$childHeader,childHeaderAddition)
w@specs[[pos]]$peaks <- c(w@specs[[pos]]$peaks, peaksHand) }
return(w)
}
#' MassBank-record Addition
#'
#' Adds the peaklist of a MassBank-Record to the specs of an msmsWorkspace
#'
#' @aliases addMB
#' @usage addMB(w, cpdID, fileName, mode)
#' @param w The msmsWorkspace that the peaklist should be added to.
#' @param cpdID The compoundID of the compound that has been used for the record
#' @param fileName The path to the record
#' @param mode The ionization mode that has been used to create the record
#' @return The \code{msmsWorkspace} with the additional peaklist from the record
#' @seealso \code{\link{addPeaksManually}}
#' @author Erik Mueller
#' @examples \dontrun{
#' addMB("filepath_to_records/RC00001.txt")
#' }
#' @export
addMB <- function(w, cpdID, fileName, mode){
mb <- parseMassBank(fileName)
peaklist <- list()
peaklist[[1]] <- mb@compiled_ok[[1]][["PK$PEAK"]][,1:2]
w <- addPeaksManually(w, cpdID, peaklist[[1]], mode)
return(w)
}
#' MassBank-record Parser
#'
#' Can parse MassBank-records(only V2)
#'
#' @aliases parseMassBank
#' @usage parseMassBank(Files)
#' @param Files A path to the plaintext-record that should be read
#' @return The \code{mbWorkspace} that the plaintext-record creates.
#' @seealso \code{\link{validate}}
#' @author Erik Mueller
#' @examples \dontrun{
#' parseMassBank("filepath_to_records/RC00001.txt")
#' }
#' @export
parseMassBank <- function(Files){
mb <- new("mbWorkspace")
mb@compiled_ok <- list()
i <- 1
fileConnection <- file(normalizePath(Files[i]))
record <- readLines(fileConnection)
close(fileConnection)
mb@compiled_ok[[i]] <- list()
mb@compiled_ok[[i]][['ACCESSION']] <- substring(grep('ACCESSION:',record, value = TRUE, fixed = TRUE),12)
mb@compiled_ok[[i]][['RECORD_TITLE']] <- substring(grep('RECORD_TITLE:',record, value = TRUE),12)
mb@compiled_ok[[i]][['DATE']] <- format(as.Date(substring(grep('DATE:',record, value = TRUE, fixed = TRUE),7), format = "%Y.%m.%d"), "%Y.%m.%d")
mb@compiled_ok[[i]][['AUTHORS']] <- substring(grep('AUTHORS:',record, value = TRUE, fixed = TRUE),10)
mb@compiled_ok[[i]][['LICENSE']] <- substring(grep('LICENSE:',record, value = TRUE, fixed = TRUE),10)
mb@compiled_ok[[i]][['COPYRIGHT']] <- substring(grep('COPYRIGHT:',record, value = TRUE, fixed = TRUE),12)
##publication <- substring(grep('PUBLICATION:',record, fixed = TRUE),14)
##if(length(publication) > 0){
#mb@compiled_ok[[i]][['PUBLICATION']] <- publication
##}
##The list of comments is handled differently
##in RMassBank, but the flattening should work anyway, if I'm correct(RMassBank uses internal values for comments)
commentlist <- list()
commentlist <- as.list(substring(grep('COMMENT:',record, value = TRUE, fixed = TRUE),10))
mb@compiled_ok[[i]][['COMMENT']] <- list()
mb@compiled_ok[[i]][['COMMENT']] <- commentlist
chnames <- list()
chnames <- as.list(substring(grep('CH$NAME:',record, value = TRUE, fixed = TRUE),10))
mb@compiled_ok[[i]][['CH$NAME']] <- chnames
mb@compiled_ok[[i]][['CH$COMPOUND_CLASS']] <- substring(grep('CH$COMPOUND_CLASS:',record, value = TRUE, fixed = TRUE),20)
mb@compiled_ok[[i]][['CH$FORMULA']] <- substring(grep('CH$FORMULA:',record, value = TRUE, fixed = TRUE),13)
mb@compiled_ok[[i]][['CH$EXACT_MASS']] <- as.numeric(substring(grep('CH$EXACT_MASS:',record, value = TRUE, fixed = TRUE),16))
mb@compiled_ok[[i]][['CH$SMILES']] <- substring(grep('CH$SMILES:',record, value = TRUE, fixed = TRUE),12)
mb@compiled_ok[[i]][['CH$IUPAC']] <- substring(grep('CH$IUPAC:',record, value = TRUE, fixed = TRUE),11)
##Again: Flattening this should be no Problem, although the structure is different -
##RMassBank names every type of link, but this isn't necessary here since we're only
##reading, not creating. If that's a problem, I'll change it.
links <- list()
links <- as.list(substring(grep('CH$LINK:',record, value = TRUE, fixed = TRUE),10))
mb@compiled_ok[[i]][['CH$LINK']] <- links
##SP$ will be included later since it's kind of rarely used
mb@compiled_ok[[i]][['AC$INSTRUMENT']] <- substring(grep('AC$INSTRUMENT:',record, value = TRUE, fixed = TRUE),16)
mb@compiled_ok[[i]][['AC$INSTRUMENT_TYPE']] <- substring(grep('AC$INSTRUMENT_TYPE:',record, value = TRUE, fixed = TRUE),21)
##Get the Subvalues just like in RMassBank
##RECORD VERSION SPECIFIC READING INCLUDED
##This could convert Version 1 -> Version 2 if used right,
##Although I have no idea how well it'd do that
##I'll have to find the old specifications to do this right, until then it should only kind of work
##well enough to do some tests
Version <- 2
ac_ms <- list()
ac_ms[['MS_TYPE']] <- substring(grep('AC$MASS_SPECTROMETRY: MS_TYPE',record, value = TRUE, fixed = TRUE),31)
if(length(ac_ms[['MS_TYPE']]) == 0){
ac_ms[['MS_TYPE']] <- substring(grep('AC$ANALYTICAL_CONDITION: MS_TYPE',record, value = TRUE, fixed = TRUE),34)
Version <- 1
}
if(Version == 1){
##This not a real tag anymore(according to the specifications) but RMassBank still writes it...?
##I'll include it for the case that I'm reading V1-records
ac_ms[['IONIZATION']] <- substring(grep('AC$MASS_SPECTROMETRY: IONIZATION',record, value = TRUE, fixed = TRUE),34)
ac_ms[['ION_MODE']] <- substring(grep('AC$ANALYTICAL_CONDITION: MODE',record, value = TRUE, fixed = TRUE),31)
} else{
ac_ms[['ION_MODE']] <- substring(grep('AC$MASS_SPECTROMETRY: ION_MODE',record, value = TRUE, fixed = TRUE),32)
##Some of the following are part of the (optional) specification, but NOT in RMassBank(!)
##This is just for the sake of completeness
ac_ms[['COLLISION_ENERGY']] <- substring(grep('AC$MASS_SPECTROMETRY: COLLISION_ENERGY',record, value = TRUE, fixed = TRUE),40)
ac_ms[['COLLISION_GAS']] <- substring(grep('AC$MASS_SPECTROMETRY: COLLISION_GAS',record, value = TRUE, fixed = TRUE),37)
ac_ms[['DATE']] <- substring(grep('AC$MASS_SPECTROMETRY: DATE',record, value = TRUE, fixed = TRUE),28)
ac_ms[['DESOLVATION_GAS_FLOW']] <- substring(grep('AC$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW',record, value = TRUE, fixed = TRUE),44)
ac_ms[['DESOLVATION_TEMPERATURE']] <- substring(grep('AC$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE',record, value = TRUE, fixed = TRUE),47)
ac_ms[['IONIZATION_ENERGY']] <- substring(grep('AC$MASS_SPECTROMETRY: IONIZATION_ENERGY',record, value = TRUE, fixed = TRUE),41)
ac_ms[['LASER']] <- substring(grep('AC$MASS_SPECTROMETRY: LASER',record, value = TRUE, fixed = TRUE),29)
ac_ms[['MATRIX']] <- substring(grep('AC$MASS_SPECTROMETRY: MATRIX',record, value = TRUE, fixed = TRUE),30)
ac_ms[['MASS_ACCURACY']] <- substring(grep('AC$MASS_SPECTROMETRY: MASS_ACCURACY',record, value = TRUE, fixed = TRUE),37)
ac_ms[['REAGENT_GAS']] <- substring(grep('AC$MASS_SPECTROMETRY: REAGENT_GAS',record, value = TRUE, fixed = TRUE),35)
ac_ms[['SCANNING']] <- substring(grep('AC$MASS_SPECTROMETRY: SCANNING',record, value = TRUE, fixed = TRUE),32)
##These are in RMassBank, but not part of the specification?
##I think I'm misreading something...
#ac_ms[['FRAGMENTATION_MODE']] <- msmsdata$info$mode
#ac_ms[['PRECURSOR_TYPE']] <- precursor_types[spec$mode]
#ac_ms[['RESOLUTION']] <- msmsdata$info$res
ac_lc <- list();
ac_lc[['CAPILLARY_VOLTAGE']] <- substring(grep('AC$CHROMATOGRAPHY: CAPILLARY_VOLTAGE',record, value = TRUE, fixed = TRUE),36)
ac_lc[['COLUMN_NAME']] <- substring(grep('AC$CHROMATOGRAPHY: COLUMN_NAME',record, value = TRUE, fixed = TRUE),32)
ac_lc[['COLUMN_TEMPERATURE']] <- substring(grep('AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE',record, value = TRUE, fixed = TRUE),39)
ac_lc[['FLOW_GRADIENT']] <- substring(grep('AC$CHROMATOGRAPHY: FLOW_GRADIENT',record, value = TRUE, fixed = TRUE),34)
ac_lc[['FLOW_RATE']] <- substring(grep('AC$CHROMATOGRAPHY: FLOW_RATE',record, value = TRUE, fixed = TRUE),30)
ac_lc[['RETENTION_TIME']] <- substring(grep('AC$CHROMATOGRAPHY: RETENTION_TIME',record, value = TRUE, fixed = TRUE),35)
ac_lc[['SOLVENT A']] <- substring(grep('AC$CHROMATOGRAPHY: SOLVENT A',record, value = TRUE, fixed = TRUE),30)
ac_lc[['SOLVENT B']] <- substring(grep('AC$CHROMATOGRAPHY: SOLVENT B',record, value = TRUE, fixed = TRUE),30)
ms_fi <- list()
ms_fi[['BASE_PEAK']] <- as.double(substring(grep('MS$FOCUSED_ION: BASE_PEAK',record, value = TRUE, fixed = TRUE),27))
ms_fi[['PRECURSOR_M/Z']] <- substring(grep('MS$FOCUSED_ION: PRECURSOR_M/Z',record, value = TRUE, fixed = TRUE),31)
ms_fi[['PRECURSOR_TYPE']] <- substring(grep('MS$FOCUSED_ION: PRECURSOR_TYPE',record, value = TRUE, fixed = TRUE),32)
if(ac_ms[['MS_TYPE']] == 'MS2'){
ms_fi[['PRECURSOR_M/Z']] <- as.double(ms_fi[['PRECURSOR_M/Z']])
}
}
namesAcms <- names(ac_ms)
namesAclc <- names(ac_lc)
namesMsfi <- names(ms_fi)
for(k in 1:length(ac_ms)){
if(length(ac_ms[[namesAcms[k]]]) == 0){
ac_ms[[namesAcms[k]]] <- NA
}
}
for(k in 1:length(ac_lc)){
if(length(ac_lc[[namesAclc[k]]]) == 0){
ac_lc[[namesAclc[k]]] <- NA
}
}
for(k in 1:length(ms_fi)){
if(length(ms_fi[[namesMsfi[k]]]) == 0){
ms_fi[[namesMsfi[k]]] <- NA
}
}
mb@compiled_ok[[i]][['AC$MASS_SPECTROMETRY']] <- list()
mb@compiled_ok[[i]][['AC$MASS_SPECTROMETRY']] <- ac_ms
mb@compiled_ok[[i]][['AC$CHROMATOGRAPHY']] <- list()
mb@compiled_ok[[i]][['AC$CHROMATOGRAPHY']] <- ac_lc
mb@compiled_ok[[i]][['MS$FOCUSED_ION']] <- list()
mb@compiled_ok[[i]][['MS$FOCUSED_ION']] <- ms_fi
##Can currently only read annotations of the type "m/z num {formula mass error(ppm)}"
##and'll only read it properly if there is only one annotation
##the strange conversion of the data.frames is there so RMassBank can actually write it again
PKannotationStart <- grep('PK$ANNOTATION:',record, fixed = TRUE) + 1
numpeak <- grep('PK$NUM_PEAK:',record, fixed = TRUE)
if(length(PKannotationStart) > 0 && ReadAnnotation == TRUE){
if(PKannotationStart < numpeak){
splitted <- strsplit(record[PKannotationStart:(numpeak-1)]," ")
PKannotation <- matrix(nrow = numpeak - PKannotationStart, ncol = 5)
for(k in 1:length(splitted)){
splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
PKannotation[k,] <- splitted[[k]]
}
PKannotation <- as.data.frame(PKannotation, stringsAsFactors = FALSE)
PKannotation[] <- lapply(PKannotation, type.convert)
colnames(PKannotation) <- c("mz", "num", "{formula", "mass", "error(ppm)}")
PKannotation$"{formula" <- as.character(PKannotation$"{formula")
}
mb@compiled_ok[[i]][['PK$ANNOTATION']] <- PKannotation
}
##Extract the peaks and write the data into a data.frame
PKStart <- grep('PK$PEAK:',record, fixed = TRUE) + 1
endslash <- grep('//',record, fixed = TRUE)
if(PKStart < endslash){
splitted <- strsplit(record[PKStart:(endslash-1)]," ")
PKPeak <- matrix(nrow = endslash - PKStart, ncol = 3)
for(k in 1:length(splitted)){
splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
PKPeak[k,] <- splitted[[k]]
}
PKPeak <- as.data.frame(PKPeak, stringsAsFactors = FALSE)
PKPeak[] <- lapply(PKPeak, type.convert)
colnames(PKPeak) <- c("mz", "int", "rel.int")
}
mb@compiled_ok[[i]][['PK$PEAK']] <- PKPeak
namesComp <- names(mb@compiled_ok[[i]])
for(k in 1:length(mb@compiled_ok[[i]])){
if(length(mb@compiled_ok[[i]][[namesComp[k]]]) == 0){
mb@compiled_ok[[i]][[namesComp[k]]] <- NA
}
}
print(paste("Read",Files[i]))
flush.console()
return(mb)
}
\ No newline at end of file
#' @import yaml
NULL
.checkMbSettings <- function()
{
......@@ -13,12 +14,14 @@
#' Describes all settings for the RMassBank settings file.
#'
#' \itemize{
#' \item{\code{deprofile}}{Whether and how to deprofile input raw files. Leave the
#' \item{\code{deprofile}}{
#' Whether and how to deprofile input raw files. Leave the
#' setting empty if your raw files are already in "centroid" mode. If your
#' input files are in profile mode, you have the choice between algorithms
#' \code{\link{deprofile}.spline, deprofile.fwhm, deprofile.localMax}; refer to
#' the individual manpages for more information.}
#' \item{\code{rtMargin, rtShift}}{The allowed retention time deviation relative to the
#' \item{\code{rtMargin, rtShift}}{
#' The allowed retention time deviation relative to the
#' values specified in your compound list (see \code{\link{loadList}}), and the systematic
#' shift (due to the use of, e.g., pre-columns or other special equipment.}
#' \item{\code{babeldir}}{
......@@ -28,16 +31,18 @@
#' have explicit hydrogen atoms.
#' The path should point to the directory where babel.exe (or the Linux "babel" equivalent) lies.
#' }
#' \item{\code{use_version}}{Which MassBank record format to use; version 2 is strongly advised,
#' \item{\code{use_version}}{
#' Which MassBank record format to use; version 2 is strongly advised,
#' version 1 is considered outdated and should be used only if for some reason you are running
#' old servers and an upgrade is not feasible.}
#' \item{\code{use_rean_peaks}}{Whether to include peaks from reanalysis (see
#' \item{\code{use_rean_peaks}}{
#' Whether to include peaks from reanalysis (see
#' \code{\link{reanalyzeFailpeaks}}) in the MassBank records. Boolean, TRUE or FALSE.
#' }
#' \item{\code{annotations}}{
#' A list of constant annotations to use in the MassBank records. The entries
#' \code{authors, copyright, license, instrument, instrument_type, compound_class}
#' correspond to the MassBank entries \code{AUTHORS, COPYRIGHT, LICENSE, AC$INSTRUMENT,
#' correspond to the MassBank entries \code{AUTHORS, COPYRIGHT, PUBLICATION, LICENSE, AC$INSTRUMENT,
#' AC$INSTRUMENT_TYPE, CH$COMPOUND_CLASS}. The entry \code{confidence_comment} is added as
#' \code{COMMENT: CONFIDENCE} entry.
#'
......@@ -58,28 +63,100 @@
#' Entries under \code{ms_dataprocessing} are added as \code{MS$DATA_PROCESSING:} entries,
#' in addition to the default \code{WHOLE: RMassBank}.
#' }
#' \item{\code{spectraList}}{This setting describes the experimental annotations for the single
#' \item{\code{annotator}}{
#' For advanced users: option to select your own custom annotator.
#' Check \code{\link{annotator.default}} and the source code for details.}
#' \item{\code{spectraList}}{
#' This setting describes the experimental annotations for the single
#' data-dependent scans. For every data-dependent scan event, a \code{spectraList} entry with
#' \code{mode, ces, ce, res} denoting collision mode, collision energy in short and verbose
#' notation, and FT resolution.}
#' \item{\code{accessionNumberShifts}}{This denotes the starting points for accession numbers
#' \item{\code{accessionNumberShifts}}{
#' This denotes the starting points for accession numbers
#' for different ion types. For example, \code{pH: 0, mH: 50} means that [M+H]+ spectra will
#' start at \code{XX123401} (\code{XX} being the \code{entry_prefix} and \code{1234} the compound
#' id) and [M-H]- will start at \code{XX123451}.}
#' \item{\code{electronicNoise, electronicNoiseWidth}}{Known electronic noise peaks and the window
#' \item{\code{electronicNoise, electronicNoiseWidth}}{
#' Known electronic noise peaks and the window
#' to be used by \code{\link{cleanElnoise}}}
#' \item{\code{recalibrateBy}}{\code{dppm} or \code{dmz} to recalibrate either by delta ppm or by
#' \item{\code{recalibrateBy}}{
#' \code{dppm} or \code{dmz} to recalibrate either by delta ppm or by
#' delta mz.}
#' \item{\code{recalibrateMS1}}{\code{common} or \code{separate} to recalibrate MS1 data points together
#' \item{\code{recalibrateMS1}}{
#' \code{common} or \code{separate} to recalibrate MS1 data points together
#' or separately from MS2 data points.}
#' \item{\code{recalibrator: MS1, MS2}}{The functions to use for recalibration of MS1 and MS2 data points.
#' \item{\code{recalibrator: MS1, MS2}}{
#' The functions to use for recalibration of MS1 and MS2 data points.
#' Note that the \code{MS1} setting is only meaningful if \code{recalibrateMS1: separate}, otherwise
#' the \code{MS2} setting is used for a common recalibration curve. See \code{\link{recalibrate.loess}}
#' for details.
#' }
#' for details.}
#' \item{\code{multiplicityFilter}}{
#' Define the multiplicity filtering level. Default is 2, a value of 1
#' is off (no filtering) and >2 is harsher filtering.}
#' \item{\code{titleFormat}}{
#' The title of MassBank records is a mini-summary
#' of the record, for example "Dinotefuran; LC-ESI-QFT; MS2; CE: 35%; R=35000; [M+H]+".
#' By default, the first compound name \code{CH$NAME}, instrument type
#' \code{AC$INSTRUMENT_TYPE}, MS/MS type \code{AC$MASS_SPECTROMETRY: MS_TYPE},
#' collision energy \code{RECORD_TITLE_CE}, resolution \code{AC$MASS_SPECTROMETRY: RESOLUTION}
#' and precursor \code{MS$FOCUSED_ION: PRECURSOR_TYPE} are used. If alternative
#' information is relevant to differentiate acquired spectra, the title should be adjusted.
#' For example, many TOFs do not have a resolution setting.
#' See MassBank documentation for more.}
#' \item{\code{filterSettings}}{
#' A list of settings that affect the MS/MS processing. The entries
#' \code{ppmHighMass, ppmLowMass, massRangeDivision} set values for
#' pre-processing, prior to recalibration. \code{ppmHighMass} defines the
#' ppm error for the high mass range (default 10 ppm for Orbitraps),
#' \code{ppmLowMass} is the error for the low mass range (default 15 ppm
#' for Orbitraps) and \code{massRangeDivision} is the m/z value defining
#' the split between the high and low mass range (default m/z = 120).
#'
#' The entry \code{ppmFine} defines the ppm cut-off post recalibration.
#' The default value of 5 ppm is recommended for Orbitraps. For other
#' instruments this can be interpreted from the recalibration plot.
#' All ppm limits are one-sided (e.g. this includes values to +5 ppm or -5 ppm
#' deviation from the exact mass).
#'
#' The entries \code{prelimCut, prelimCutRatio} define the intensity cut-off and
#' cut-off ratio (in % of the most intense peak) for pre-processing. This affects
#' the peak selection for the recalibration only. Careful: the default value
#' 1e4 for Orbitrap LTQ positive mode could remove all peaks for TOF data
#' and will remove too many peaks for Orbitrap LTQ negative mode spectra!
#'
#' The entry \code{specOKLimit} defines the intensity limit to include MS/MS spectra.
#' MS/MS spectra must have at least one peak above this limit to proceed through
#' the workflow.
#'
#' \code{dbeMinLimit} defines the minimum allowable ring and double bond equivalents (DBE)
#' allowed for assigned formulas. This assumes maximum valuences for elements with
#' multiple valence states. The default is -0.5 (accounting for fragments being ions).
#'
#' The entries \code{satelliteMzLimit, satelliteIntLimit} define the cut-off m/z and
#' intensity values for satellite peak removal (an artefact of Fourier Transform
#' processing). All peaks within the m/z limit (default 0.5) and intensity ratio
#' (default 0.05 or 5 %) of the respective peak will be removed. Applicable to
#' Fourier Transform instruments only (e.g. Orbitrap).
#' }
#' \item{\code{filterSettings}}{
#' Parameters for adjusting the raw data retrieval.
#' The entry \code{ppmFine} defines the ppm error to look for the precursor in
#' the MS1 (parent) spectrum. Default is 10 ppm for Orbitrap.
#'
#' \code{mzCoarse} defines the error to search for the precursor specification
#' in the MS2 spectrum. This is often only saved to 2 decimal places and thus
#' can be quite inaccurate. The accuracy also depends on the isolation window used.
#' The default settings (for e.g. Orbitrap) is 0.5 (Da, or Th for m/z).
#'
#' The entry \code{fillPrecursorScan} is largely untested. The default value
#' (FALSE) assumes all necessary precursor information is available in the mzML file.
#' A setting ot TRUE tries to fill in the precursor data scan number if it is missing.
#' Only tested on one case study so far - feedback welcome!
#' }
#' }
#'
#'
#' @author Michael Stravs, Emma Schymanski
#' @seealso \code{\link{loadRmbSettings}}
#' @rdname RmbSettings
#' @name RmbSettings
......@@ -184,11 +261,55 @@ NULL
recalibrator = list(
MS1 = "recalibrate.loess",
MS2 = "recalibrate.loess"),
# Window width to look for MS1 peaks to recalibrate (in ppm)
recalibrateMS1Window= 15,
# Define the multiplicity filtering level
# Default is 2 (peak occurs at least twice)
# Set this to 1 if you want to turn this option off.
# Set this to anything > 2 if you want harder filtering
multiplicityFilter = 2
multiplicityFilter = 2,
# Define the title format.
# You can use all entries from MassBank records as tokens
# plus the additional token RECORD_TITLE_CE, which is a shortened
# version of the collision energy specifically for use in the title.
# Every line is one entry and must have one token in curly brackets
# e.g. {CH$NAME} or {AC$MASS_SPECTROMETRY: MS_TYPE} plus optionally
# additional text in front or behind e.g.
# R={AC$MASS_SPECTROMETRY: RESOLUTION}
# If this is not specified, it defaults to a title of the format
# "Dinotefuran; LC-ESI-QFT; MS2; CE: 35%; R=35000; [M+H]+"
titleFormat = c(
"{CH$NAME}",
"{AC$INSTRUMENT_TYPE}",
"{AC$MASS_SPECTROMETRY: MS_TYPE}",
"CE: {RECORD_TITLE_CE}",
"R={AC$MASS_SPECTROMETRY: RESOLUTION}",
"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
),
# Define filter settings.
# For Orbitrap, settings of 15 ppm in low mass range, 10 ppm in high
# mass range, m/z = 120 as mass range division and 5 ppm for recalibrated
# data overall are recommended.
filterSettings = list(
ppmHighMass = 10,
ppmLowMass = 15,
massRangeDivision= 120,
ppmFine= 5,
prelimCut= 1e4,
prelimCutRatio= 0,
fineCut= 0,
fineCutRatio= 0,
specOkLimit= 1e4,
dbeMinLimit= -0.5,
satelliteMzLimit= 0.5,
satelliteIntLimit= 0.05
),
findMsMsRawSettings = list(
ppmFine= 10,
mzCoarse= 0.5,
fillPrecursorScan= FALSE)
)
# Writes a file with sample settings which the user can adjust with his values.
......
......@@ -33,7 +33,7 @@
#' w1 <- msmsWorkflow(w, steps=c(1:7), mode="pH")
#' w2 <- msmsWorkflow(w, steps=c(1:7), mode="pH", confirmMode = 1)
#' wTotal <- combineMultiplicities(c(w1, w2))
#' wTotal <- msmsWorkflow(wTotal, steps=8, mode="pH", archiveName = "output")
#' wTotal <- msmsWorkflow(wTotal, steps=8, mode="pH", archivename = "output")
#' # continue here with mbWorkflow
#' }
#'
......@@ -54,3 +54,33 @@ combineMultiplicities <- function(workspaces)
return(wOut)
}
#' Determine processed steps
#'
#' This function reads out the content of different slots of the \code{workspace}
#' object and finds out which steps have already been processed on it.
#'
#' @param workspace A \code{msmsWorkspace} object.
#'
#' @return An array containing all \code{msmsWorkflow} steps which have
#' likely been processed.
#'
#' @examples \dontrun{
#' findProgress(w)
#' }
#' @author Stravs MA, Eawag <michael.stravs@@eawag.ch>
#' @export
findProgress <- function(workspace)
{
step1 <- (length(workspace@specs) > 0)
step2 <- (length(workspace@analyzedSpecs) > 0)
step3 <- (length(workspace@aggregatedSpecs) > 0)
step4 <- (length(workspace@recalibratedSpecs) > 0)
step5 <- (length(workspace@analyzedRcSpecs) > 0)
step6 <- (length(workspace@aggregatedRcSpecs) > 0)
step7 <- (length(workspace@reanalyzedRcSpecs) > 0)
step8 <- (length(workspace@refilteredRcSpecs) > 0)
steps <- which(c(step1, step2, step3, step4, step5, step6, step7, step8))
return(steps)
}
#' Validate MassBank records with a set of Unit tests
#'
#' Validates a plain text MassBank record, or recursively all
#' records within a directory. The Unit Tests to be used are
#' installed in RMassBank/inst/unitTests and currently include
#' checks for NAs, peaks versus precursor, precursor mz,
#' precursor type, SMILES vs exact mass, total intensities and
#' title versus type. The validation report is saved as
#' "report.html" in the working directory.
#'
#' @aliases validate
#' @usage validate(path)
#' @param path The filepath to a single record, or a directory to search recursively
#' @examples
#' \dontrun{
#' validate("/tmp/MassBank/OpenData/record/")
#' }
#' @export
validate <- function(path) {
if (!require(ontoCAT)) {
stop("Package ontoCAT missing. Validation requires package ontoCAT and RUnit")
}
if (!require(RUnit)) {
stop("Package RUnit missing. Validation requires package ontoCAT and RUnit")
}
# Is the argument a directory?
# If yes, list the files
RMassBank.env$Instrument_List <- .getInstruments()
RMassBank.env$testnumber <- 1
if(file.info(path[1])$isdir){
Files <- list.files(path = path,
recursive=TRUE,
full.names = TRUE)
} else {Files <- path}
# Parsing with the help the parseMassBank-function
RMassBank.env$mb <- lapply(Files,parseMassBank)
# Test RMassBank Objects with RUnit
# This loop creates the tests and defines one test suite for every record
tests <- list()
for(i in 1:length(RMassBank.env$mb)){
if(RMassBank.env$mb[[i]]@compiled_ok[[1]][['AC$MASS_SPECTROMETRY']][['MS_TYPE']] == "MS2" || RMassBank.env$mb[[i]]@compiled_ok[[1]][['AC$MASS_SPECTROMETRY']][['MS_TYPE']] == "MS"){
tests[[i]] <- defineTestSuite(Files[i], dirs = system.file(package="RMassBank", "unitTests"), testFileRegexp = "runit.MS2.test.R",
#testFuncRegexp = "^test.+",
rngKind = "Marsaglia-Multicarry",
rngNormalKind = "Kinderman-Ramage")
} else{
tests[[i]] <- defineTestSuite(Files[i], dirs = system.file(package="RMassBank", "unitTests"), testFileRegexp = "^runit.MSn.test.[rR]$",
#testFuncRegexp = "^test.+",
rngKind = "Marsaglia-Multicarry",
rngNormalKind = "Kinderman-Ramage")
}
}
print("Starting Tests")
# Testing the list of Testsuites
testData <- runTestSuite(tests)
# Prints the HTML-record
printHTMLProtocol(testData, fileName = paste(getwd(),"/report.html", sep = ""))
print(paste("Report for the file(s) finished"))
}
# This function checks if an .obo-file is readable for ontoCAT
.isOboReadable <- function(filename){
# getOntology() has a problem with reading relative Windows paths(it wants an URI),
# so the path has to be made absolute
# I reckon this should work under Linux without doing that
ont <- getOntology(normalizePath(filename))
if(is.null(getOntologyAccession(ont))){
return(FALSE)
}
return(TRUE)
}
# This function downloads the psi-ms.obo-ontology so we can get the allowed instrument-names
# This is a _temporary_ fix until I find out why getOntology() doesn't work when there are "import:"-lines in the .obo-file
# Until then I will simply remove them, because we don't need the imported ontologies
.downloadPsiObo <- function(){
connPsiObo <- url("http://psidev.cvs.sourceforge.net/viewvc/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo")
oboFile <- readLines(connPsiObo)
close(connPsiObo)
oboFile <- oboFile[-grep("import:",oboFile)]
connLocal <- file("psi-ms.obo")
writeLines(oboFile,connLocal)
close(connLocal)
}
# Checks if the psi-ms.obo is there
# Will be converted to "checkforinstruments" as soon as I can find the problem
# with getOntology()
.checkForPsiMs <- function(){
if(file.exists("psi-ms.obo")){
if(.isOboReadable("psi-ms.obo")){
print("It seems that you have a working psi-ms.obo, do you want to update it? [y/n]")
while(TRUE){
answer <- readLines(stdin(), n=1, warn=FALSE)
if(answer == "y"){
.downloadPsiObo()
return(TRUE)
}
if(answer == "n"){
return(TRUE)
}
print("Please type exactly y or n")
}
}
}
.downloadPsiObo()
return(TRUE)
}
# This is a list of the possible instrument names
.getInstruments <- function(){
Onto <- getOntology(system.file(package = "RMassBank", "psi-ms.obo"))
instrumentTerms <- getAllTermChildrenById(Onto,"MS_1000031")
instruments <- vector()
for(i in 1:length(instrumentTerms)){
instruments[i] <- getLabel(instrumentTerms[[i]])
}
return(instruments)
}
#' Calculate the mass from a SMILES-String
#'
#' Uses a SMILES-String to calculate the mass using rcdk-integrated functions.
#'
#' @aliases smiles2mass
#' @usage smiles2mass(SMILES)
#' @param SMILES A String-object representing a SMILES
#' @return The calculated mass of the given SMILES-Formula
#' @author Erik Mueller
#' @examples \dontrun{
#' smiles2mass("CC(=O)NC(C(O)1)C(O)C(OC(O2)C(O)C(OC(O3)C(O)C(O)C(O)C(CO)3)C(O)C(CO)2)C(CO)O1")
#' }
#' @export
smiles2mass <- function(SMILES){
massfromformula <- parse.smiles(SMILES)[[1]]
do.typing(massfromformula)
do.aromaticity(massfromformula)
convert.implicit.to.explicit(massfromformula)
do.isotopes(massfromformula)
mass <- get.exact.mass(massfromformula)
return(mass)
}
.onLoad <- function(libname, pkgname) {
RMassBank.env <<- new.env()
RMassBank.env$ReadAnnotation <- FALSE
RMassBank.env$testnumber <- 1
mb <- list()
attach(RMassBank.env)
}
\ No newline at end of file
......@@ -6,19 +6,22 @@
citHeader("To cite package 'RMassBank' in publications use:")
year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE)
year <- 2013
citEntry(entry="unpublished",
citEntry(entry="article",
title = "Automatic Recalibration and Processing of Tandem Mass Spectra using Formula Annotation.",
author = personList(as.person("Michael A. Stravs"),
as.person("Emma L. Schymanski"),
as.person("Heinz Singer"),
as.person("Juliane Hollender")),
year = year,
note = "in preparation",
year = 2013,
journal = "Journal of Mass Spectrometry",
volume = 48,
number = 1,
pages = 89--99,
textVersion =
paste("M. A. Stravs, E. L. Schymanski, H. Singer, J. Hollender, ",
"Automatic Recalibration and Processing of Tandem Mass Spectra using Formula Annotation. ",
year,", ",
"in preparation.", sep=""))
paste("M. A. Stravs, E. L. Schymanski, H. Singer, J. Hollender", year,
"Automatic Recalibration and Processing of Tandem Mass Spectra using Formula Annotation",
"Journal of Mass Spectrometry", "48(1)",
"89-99.", sep=", "))
......@@ -64,6 +64,13 @@ annotations:
ms_dataprocessing:
RECALIBRATE: loess on assigned fragments and MS1
# Annotator:
# by default, "annotator.default" is used.
# If you want to build your custom annotator (check ?annotator.default and the source code),
# select it here by using e.g.
# annotator: annotator.myown
# for a function annotator.myown(annotation)
# List of data-dependent scans in their order (relative to the parent scan), for annotation of the MassBank records
# For every data-dependent scan event, specify an element with:
# mode: fragmentation mode, e.g. CID
......@@ -158,6 +165,8 @@ recalibrateBy: dppm
# with common curve (common)
# do not recalibrate (none)
recalibrateMS1: common
# Window width to look for MS1 peaks to recalibrate (in ppm)
recalibrateMS1Window: 15
# Custom recalibration function: You can overwrite the recal function by
# making any function which takes rcdata$recalfield ~ rcdata$mzFound.
......@@ -174,3 +183,49 @@ recalibrator:
# Set this to 1 if you want to turn this option off.
# Set this to anything > 2 if you want harder filtering
multiplicityFilter: 2
# Define the title format.
# You can use all entries from MassBank records as tokens
# plus the additional token RECORD_TITLE_CE, which is a shortened
# version of the collision energy specifically for use in the title.
# Every line is one entry and must have one token in curly brackets
# e.g. {CH$NAME} or {AC$MASS_SPECTROMETRY: MS_TYPE} plus optionally
# additional text in front or behind e.g.
# R={AC$MASS_SPECTROMETRY: RESOLUTION}
# If this is not specified, it defaults to a title of the format
# "Dinotefuran; LC-ESI-QFT; MS2; CE: 35%; R=35000; [M+H]+"
# Note how everything must be in "" here because otherwise the : are getting mangled!
titleFormat:
- "{CH$NAME}"
- "{AC$INSTRUMENT_TYPE}"
- "{AC$MASS_SPECTROMETRY: MS_TYPE}"
- "CE: {RECORD_TITLE_CE}"
- "R={AC$MASS_SPECTROMETRY: RESOLUTION}"
- "{MS$FOCUSED_ION: PRECURSOR_TYPE}"
# Define filter settings.
# For Orbitrap, settings of 15 ppm in low mass range, 10 ppm in high
# mass range, m/z = 120 as mass range division and 5 ppm for recalibrated
# data overall are recommended.
filterSettings:
ppmHighMass: 10
ppmLowMass: 15
massRangeDivision: 120
ppmFine: 5
prelimCut: 1e4
prelimCutRatio: 0
fineCut: 0
fineCutRatio: 0
specOkLimit: 1e4
dbeMinLimit: -0.5
satelliteMzLimit: 0.5
satelliteIntLimit: 0.05
# Define raw MS retrieval settings.
findMsMsRawSettings:
ppmFine: 10
mzCoarse: 0.5
# fillPrecursorScan is FALSE for "good" mzML files which have all the info needed.
# However, for example AB Sciex files will have missing precursor scan information,
# in which case fillPrecursorScan = TRUE is needed. Try it out.
fillPrecursorScan: FALSE
This diff is collapsed.
test.instrumentname <- function(){
Instrument_Name <- RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['AC$INSTRUMENT']]
checkTrue(Instrument_Name %in% RMassBank.env$Instrument_List)
}
test.NA <- function(){
checkTrue(!(NA %in% as.matrix(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['PK$PEAK']])))
}
test.peaksvsprecursor <- function(){
Max_Peak <- unname(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['PK$PEAK']][dim(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['PK$PEAK']])[1],1])
Precursor <- RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_M/Z']]
if(is.na(Precursor)){
checkTrue(TRUE)
}else{
checkEquals(Max_Peak, Precursor, tolerance = Precursor/100)
}
}
test.precursormz <- function(){
precursorlist <- c("[M+H]+","[M+Na]+","[M-H]-","[M+HCOO-]-","[M]+","[M]-")
if(is.na(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_TYPE']])){
checkTrue(TRUE)
} else{
precursor <- grep(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_TYPE']],precursorlist, value = TRUE, fixed = TRUE)
if(precursor == "[M+H]+"){
checkEquals(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_M/Z']],RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['CH$EXACT_MASS']] + 1.008,tolerance = 0.002)
}
if(precursor == "[M+Na]+"){
checkEquals(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_M/Z']],RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['CH$EXACT_MASS']] + 22.989,tolerance = 0.002)
}
if(precursor == "[M-H]-"){
checkEquals(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_M/Z']],RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['CH$EXACT_MASS']] - 1.008,tolerance = 0.002)
}
if(precursor == "[M+HCOO-]-"){
checkEquals(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_M/Z']],RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['CH$EXACT_MASS']] + 45.017,tolerance = 0.002)
}
}
}
test.PrecursorType <- function(){
precursorlist <- c("[M+H]+","[M+Na]+","[M-H]-","[M+HCOO-]-","[M]+","[M]-")
if(is.na(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_TYPE']])){
checkTrue(TRUE)
}else{
checkTrue(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_TYPE']] %in% precursorlist)
}
}
test.smilesvsexactmass <- function(){
Mass_Calculated_Through_Smiles <- smiles2mass(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['CH$SMILES']])
Exact_Mass <- RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['CH$EXACT_MASS']]
checkEquals(Mass_Calculated_Through_Smiles, Exact_Mass, tolerance = Exact_Mass/100)
}
test.sumintensities <- function(){
sumOfIntensities <- sum(RMassBank.env$mb[[RMassBank.env$testnumber]]@compiled_ok[[1]][['PK$PEAK']][,2])
checkTrue(sumOfIntensities > 0)
}
test.TitleVsType <- function(){
RMassBank.env$testnumber <- RMassBank.env$testnumber + 1
if(is.na(RMassBank.env$mb[[RMassBank.env$testnumber-1]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_TYPE']])){
checkTrue(TRUE)
}else{
checkTrue(grepl(RMassBank.env$mb[[RMassBank.env$testnumber-1]]@compiled_ok[[1]][['MS$FOCUSED_ION']][['PRECURSOR_TYPE']], RMassBank.env$mb[[RMassBank.env$testnumber-1]]@compiled_ok[[1]][['RECORD_TITLE']], fixed = TRUE))
}
}
test.slashes <- function(){
Type <- as.numeric(substring(mb@compiled_ok[[testNumber]][['AC$MASS_SPECTROMETRY']][['MS_TYPE']],first = 3))
slashes <- length(gregexpr('/', mb@compiled_ok[[testNumber]][['MS$FOCUSED_ION']][['PRECURSOR_M/Z']]))
checkEquals(Type - 2,slashes)
}
\ No newline at end of file
......@@ -5,14 +5,14 @@
Describes all settings for the RMassBank settings file.
}
\details{
\itemize{ \item{\code{deprofile}}{Whether and how to
\itemize{ \item{\code{deprofile}}{ Whether and how to
deprofile input raw files. Leave the setting empty if
your raw files are already in "centroid" mode. If your
input files are in profile mode, you have the choice
between algorithms \code{\link{deprofile}.spline,
deprofile.fwhm, deprofile.localMax}; refer to the
individual manpages for more information.}
\item{\code{rtMargin, rtShift}}{The allowed retention
\item{\code{rtMargin, rtShift}}{ The allowed retention
time deviation relative to the values specified in your
compound list (see \code{\link{loadList}}), and the
systematic shift (due to the use of, e.g., pre-columns or
......@@ -24,11 +24,11 @@
OpenBabel; the CACTUS structures have explicit hydrogen
atoms. The path should point to the directory where
babel.exe (or the Linux "babel" equivalent) lies. }
\item{\code{use_version}}{Which MassBank record format to
use; version 2 is strongly advised, version 1 is
\item{\code{use_version}}{ Which MassBank record format
to use; version 2 is strongly advised, version 1 is
considered outdated and should be used only if for some
reason you are running old servers and an upgrade is not
feasible.} \item{\code{use_rean_peaks}}{Whether to
feasible.} \item{\code{use_rean_peaks}}{ Whether to
include peaks from reanalysis (see
\code{\link{reanalyzeFailpeaks}}) in the MassBank
records. Boolean, TRUE or FALSE. }
......@@ -36,10 +36,10 @@
to use in the MassBank records. The entries
\code{authors, copyright, license, instrument,
instrument_type, compound_class} correspond to the
MassBank entries \code{AUTHORS, COPYRIGHT, LICENSE,
AC$INSTRUMENT, AC$INSTRUMENT_TYPE, CH$COMPOUND_CLASS}.
The entry \code{confidence_comment} is added as
\code{COMMENT: CONFIDENCE} entry.
MassBank entries \code{AUTHORS, COPYRIGHT, PUBLICATION,
LICENSE, AC$INSTRUMENT, AC$INSTRUMENT_TYPE,
CH$COMPOUND_CLASS}. The entry \code{confidence_comment}
is added as \code{COMMENT: CONFIDENCE} entry.
The entry \code{internal_id_fieldname} is used to name
the MassBank entry which will keep a reference to the
......@@ -63,32 +63,116 @@
Entries under \code{ms_dataprocessing} are added as
\code{MS$DATA_PROCESSING:} entries, in addition to the
default \code{WHOLE: RMassBank}. }
\item{\code{spectraList}}{This setting describes the
experimental annotations for the single data-dependent
scans. For every data-dependent scan event, a
\code{spectraList} entry with \code{mode, ces, ce, res}
denoting collision mode, collision energy in short and
verbose notation, and FT resolution.}
\item{\code{accessionNumberShifts}}{This denotes the
\item{\code{annotator}}{ For advanced users: option to
select your own custom annotator. Check
\code{\link{annotator.default}} and the source code for
details.} \item{\code{spectraList}}{ This setting
describes the experimental annotations for the single
data-dependent scans. For every data-dependent scan
event, a \code{spectraList} entry with \code{mode, ces,
ce, res} denoting collision mode, collision energy in
short and verbose notation, and FT resolution.}
\item{\code{accessionNumberShifts}}{ This denotes the
starting points for accession numbers for different ion
types. For example, \code{pH: 0, mH: 50} means that
[M+H]+ spectra will start at \code{XX123401} (\code{XX}
being the \code{entry_prefix} and \code{1234} the
compound id) and [M-H]- will start at \code{XX123451}.}
\item{\code{electronicNoise, electronicNoiseWidth}}{Known
electronic noise peaks and the window to be used by
\code{\link{cleanElnoise}}}
\item{\code{recalibrateBy}}{\code{dppm} or \code{dmz} to
recalibrate either by delta ppm or by delta mz.}
\item{\code{recalibrateMS1}}{\code{common} or
\code{separate} to recalibrate MS1 data points together
or separately from MS2 data points.}
\item{\code{recalibrator: MS1, MS2}}{The functions to use
for recalibration of MS1 and MS2 data points. Note that
the \code{MS1} setting is only meaningful if
\item{\code{electronicNoise, electronicNoiseWidth}}{
Known electronic noise peaks and the window to be used by
\code{\link{cleanElnoise}}} \item{\code{recalibrateBy}}{
\code{dppm} or \code{dmz} to recalibrate either by delta
ppm or by delta mz.} \item{\code{recalibrateMS1}}{
\code{common} or \code{separate} to recalibrate MS1 data
points together or separately from MS2 data points.}
\item{\code{recalibrator: MS1, MS2}}{ The functions to
use for recalibration of MS1 and MS2 data points. Note
that the \code{MS1} setting is only meaningful if
\code{recalibrateMS1: separate}, otherwise the \code{MS2}
setting is used for a common recalibration curve. See
\code{\link{recalibrate.loess}} for details. } }
\code{\link{recalibrate.loess}} for details.}
\item{\code{multiplicityFilter}}{ Define the multiplicity
filtering level. Default is 2, a value of 1 is off (no
filtering) and >2 is harsher filtering.}
\item{\code{titleFormat}}{ The title of MassBank records
is a mini-summary of the record, for example
"Dinotefuran; LC-ESI-QFT; MS2; CE: 35%; R=35000; [M+H]+".
By default, the first compound name \code{CH$NAME},
instrument type \code{AC$INSTRUMENT_TYPE}, MS/MS type
\code{AC$MASS_SPECTROMETRY: MS_TYPE}, collision energy
\code{RECORD_TITLE_CE}, resolution
\code{AC$MASS_SPECTROMETRY: RESOLUTION} and precursor
\code{MS$FOCUSED_ION: PRECURSOR_TYPE} are used. If
alternative information is relevant to differentiate
acquired spectra, the title should be adjusted. For
example, many TOFs do not have a resolution setting. See
MassBank documentation for more.}
\item{\code{filterSettings}}{ A list of settings that
affect the MS/MS processing. The entries
\code{ppmHighMass, ppmLowMass, massRangeDivision} set
values for pre-processing, prior to recalibration.
\code{ppmHighMass} defines the ppm error for the high
mass range (default 10 ppm for Orbitraps),
\code{ppmLowMass} is the error for the low mass range
(default 15 ppm for Orbitraps) and
\code{massRangeDivision} is the m/z value defining the
split between the high and low mass range (default m/z =
120).
The entry \code{ppmFine} defines the ppm cut-off post
recalibration. The default value of 5 ppm is recommended
for Orbitraps. For other instruments this can be
interpreted from the recalibration plot. All ppm limits
are one-sided (e.g. this includes values to +5 ppm or -5
ppm deviation from the exact mass).
The entries \code{prelimCut, prelimCutRatio} define the
intensity cut-off and cut-off ratio (in % of the most
intense peak) for pre-processing. This affects the peak
selection for the recalibration only. Careful: the
default value 1e4 for Orbitrap LTQ positive mode could
remove all peaks for TOF data and will remove too many
peaks for Orbitrap LTQ negative mode spectra!
The entry \code{specOKLimit} defines the intensity limit
to include MS/MS spectra. MS/MS spectra must have at
least one peak above this limit to proceed through the
workflow.
\code{dbeMinLimit} defines the minimum allowable ring and
double bond equivalents (DBE) allowed for assigned
formulas. This assumes maximum valuences for elements
with multiple valence states. The default is -0.5
(accounting for fragments being ions).
The entries \code{satelliteMzLimit, satelliteIntLimit}
define the cut-off m/z and intensity values for satellite
peak removal (an artefact of Fourier Transform
processing). All peaks within the m/z limit (default 0.5)
and intensity ratio (default 0.05 or 5 %) of the
respective peak will be removed. Applicable to Fourier
Transform instruments only (e.g. Orbitrap). }
\item{\code{filterSettings}}{ Parameters for adjusting
the raw data retrieval. The entry \code{ppmFine} defines
the ppm error to look for the precursor in the MS1
(parent) spectrum. Default is 10 ppm for Orbitrap.
\code{mzCoarse} defines the error to search for the
precursor specification in the MS2 spectrum. This is
often only saved to 2 decimal places and thus can be
quite inaccurate. The accuracy also depends on the
isolation window used. The default settings (for e.g.
Orbitrap) is 0.5 (Da, or Th for m/z).
The entry \code{fillPrecursorScan} is largely untested.
The default value (FALSE) assumes all necessary precursor
information is available in the mzML file. A setting ot
TRUE tries to fill in the precursor data scan number if
it is missing. Only tested on one case study so far -
feedback welcome! } }
}
\author{
Michael Stravs, Emma Schymanski
}
\seealso{
\code{\link{loadRmbSettings}}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment