Skip to content
Snippets Groups Projects
Commit 3271f25f authored by Emma Schymanski's avatar Emma Schymanski
Browse files

Push current fixed version

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/RMassBank@116140 bc3139a8-67e5-0310-9ffc-ced21a209358
parent 423104df
No related branches found
No related tags found
No related merge requests found
Showing with 1828 additions and 1082 deletions
Package: RMassBank Package: RMassBank
Type: Package Type: Package
Title: Workflow to process tandem MS files and build MassBank records Title: Workflow to process tandem MS files and build MassBank records
Version: 1.99.7 Version: 1.99.11
Authors@R: c( Authors@R: c(
person(given = "RMassBank at Eawag", email = "massbank@eawag.ch", person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
role=c("cre")), role=c("cre")),
...@@ -28,8 +28,8 @@ Depends: ...@@ -28,8 +28,8 @@ Depends:
Rcpp Rcpp
Encoding: UTF-8 Encoding: UTF-8
Imports: Imports:
XML,RCurl,rjson, XML,RCurl,rjson,S4Vectors,digest,
rcdk,yaml,mzR,methods,Biobase,MSnbase,S4Vectors rcdk,yaml,mzR,methods,Biobase,MSnbase
Suggests: Suggests:
gplots,RMassBankData, gplots,RMassBankData,
xcms (>= 1.37.1), xcms (>= 1.37.1),
...@@ -41,6 +41,7 @@ Collate: ...@@ -41,6 +41,7 @@ Collate:
'alternateAnalyze.R' 'alternateAnalyze.R'
'createMassBank.R' 'createMassBank.R'
'formulaCalculator.R' 'formulaCalculator.R'
'getSplash.R'
'leCsvAccess.R' 'leCsvAccess.R'
'leMsMs.r' 'leMsMs.r'
'leMsmsRaw.R' 'leMsmsRaw.R'
......
...@@ -34,6 +34,7 @@ export(filterPeaksMultiplicity) ...@@ -34,6 +34,7 @@ export(filterPeaksMultiplicity)
export(findCAS) export(findCAS)
export(findEIC) export(findEIC)
export(findFormula) export(findFormula)
export(findLevel)
export(findMass) export(findMass)
export(findMsMsHR) export(findMsMsHR)
export(findMsMsHR.direct) export(findMsMsHR.direct)
...@@ -52,6 +53,7 @@ export(formulastring.to.list) ...@@ -52,6 +53,7 @@ export(formulastring.to.list)
export(gatherCompound) export(gatherCompound)
export(gatherData) export(gatherData)
export(gatherDataBabel) export(gatherDataBabel)
export(gatherDataUnknown)
export(gatherPubChem) export(gatherPubChem)
export(gatherSpectrum) export(gatherSpectrum)
export(getCSID) export(getCSID)
...@@ -129,6 +131,7 @@ import(RCurl) ...@@ -129,6 +131,7 @@ import(RCurl)
import(Rcpp) import(Rcpp)
import(S4Vectors) import(S4Vectors)
import(XML) import(XML)
import(digest)
import(methods) import(methods)
import(mzR) import(mzR)
import(rcdk) import(rcdk)
......
This diff is collapsed.
...@@ -32,11 +32,7 @@ newStep2WorkFlow <- function(w, mode="pH", ...@@ -32,11 +32,7 @@ newStep2WorkFlow <- function(w, mode="pH",
confirmMode=FALSE, progressbar = "progressBarHook", confirmMode=FALSE, progressbar = "progressBarHook",
settings = getOption("RMassBank"), analyzeMethod="formula", fragdataFile = NA){ settings = getOption("RMassBank"), analyzeMethod="formula", fragdataFile = NA){
##Load the fragment data (locally or from RMassBank package) ##Load the fragment data (locally or from RMassBank package)
if(is.na(fragdataFile)){ fragData <- read.csv(fragdataFile,colClasses = c("character","numeric"))
fragData <- nfragData
} else{
fragData <- read.csv(fragdataFile,colClasses = c("character","numeric"))
}
##Progress bar ##Progress bar
nLen <- length(w@files) nLen <- length(w@files)
...@@ -95,41 +91,41 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru ...@@ -95,41 +91,41 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
filterSettings = getOption("RMassBank")$filterSettings, filterSettings = getOption("RMassBank")$filterSettings,
spectraList = getOption("RMassBank")$spectraList, fragData, fragDataIndex) spectraList = getOption("RMassBank")$spectraList, fragData, fragDataIndex)
{ {
cut <- 0 cut <- 0
cut_ratio <- 0 cut_ratio <- 0
if(run=="preliminary") if(run=="preliminary")
{ {
mzColname <- "mz" mzColname <- "mz"
filterMode <- "coarse" filterMode <- "coarse"
cut <- filterSettings$prelimCut cut <- filterSettings$prelimCut
if(is.na(cut)) if(is.na(cut))
{ {
if(mode %in% c("pH", "pM", "pNa", "pNH4")) if(mode %in% c("pH", "pM", "pNa", "pNH4"))
cut <- 1e4 cut <- 1e4
else if(mode %in% c("mH", "mFA","mM")) else if(mode %in% c("mH", "mFA","mM"))
cut <- 0 cut <- 0
else stop(paste("The ionization mode", mode, "is unknown.")) else stop(paste("The ionization mode", mode, "is unknown."))
} }
cutRatio <- filterSettings$prelimCutRatio cutRatio <- filterSettings$prelimCutRatio
} else{ } else{
mzColname <- "mzRecal" mzColname <- "mzRecal"
filterMode <- "fine" filterMode <- "fine"
cut <- filterSettings$fineCut cut <- filterSettings$fineCut
cut_ratio <- filterSettings$fineCutRatio cut_ratio <- filterSettings$fineCutRatio
if(is.na(cut)) cut <- 0 if(is.na(cut)) cut <- 0
} }
# find whole spectrum of parent peak, so we have reasonable data to feed into
# MolgenMsMs
parentSpectrum <- msmsPeaks$parentPeak
# find whole spectrum of parent peak, so we have reasonable data to feed into
# Check whether the spectra can be fitted to the spectra list correctly! # MolgenMsMs
if(nrow(msmsPeaks$childHeaders) != length(spectraList)) parentSpectrum <- msmsPeaks$parentPeak
{
warning(paste0(
"The spectra count of the substance ", msmsPeaks$id, " (", nrow(msmsPeaks$childHeaders), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)." # Check whether the spectra can be fitted to the spectra list correctly!
)) if(nrow(msmsPeaks$childHeaders) != length(spectraList))
{
warning(paste0("The spectra count of the substance ", msmsPeaks$id, " (", nrow(msmsPeaks$childHeaders), " spectra) doesn't match the provided spectra list (",
length(spectraList), " spectra).")
)
return(list(specOK=FALSE)) return(list(specOK=FALSE))
} }
...@@ -240,52 +236,52 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru ...@@ -240,52 +236,52 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
if(usePrecalcData){ if(usePrecalcData){
# if yes, split the peaks into 2 sets: Those which can be identified using the # if yes, split the peaks into 2 sets: Those which can be identified using the
# fragment data, and those which can (by maximum fragment Data m/z - 0.5) # fragment data, and those which can't (by maximum fragment Data m/z - 0.5)
precalcIndex <- which(shot[,mzColname] < (max(newfragData$mass)-0.5)) precalcIndex <- which(shot[,mzColname] < (max(newfragData$mass)-0.5))
smallShots <- shot[precalcIndex,] smallShots <- shot[precalcIndex,]
bigShots <- shot[-precalcIndex,] bigShots <- shot[-precalcIndex,]
# for smaller peaks use the precalculated fragment data # for smaller peaks use the precalculated fragment data
if(nrow(smallShots)){ if(nrow(smallShots)){
system.time(peakmatrixSmall <- lapply(smallShots[,mzColname],function(mass){ peakmatrixSmall <- lapply(smallShots[,mzColname],function(mass){
mass.calc <- mass + mode.charge * .emass mass.calc <- mass + mode.charge * .emass
maxminMass <- ppm(mass.calc, ppmlimit, l=TRUE) maxminMass <- ppm(mass.calc, ppmlimit, l=TRUE)
# retrieve only possibly relevant rows of the fragment Data (use the index) # retrieve only possibly relevant rows of the fragment Data (use the index)
indices <- newfragDataIndex[floor(maxminMass[2]*100)]:newfragDataIndex[ceiling(maxminMass[1]*100)] indices <- newfragDataIndex[floor(maxminMass[2]*100)]:newfragDataIndex[ceiling(maxminMass[1]*100)]
fragmentedFragmentData <- newfragData[indices,] fragmentedFragmentData <- newfragData[indices,]
# narrow it down further using the ppm # narrow it down further using the ppm
fragmentedFragmentData <- fragmentedFragmentData[which(fragmentedFragmentData$mass > maxminMass[2] & fragmentedFragmentData $mass < maxminMass[1]),] fragmentedFragmentData <- fragmentedFragmentData[which(fragmentedFragmentData$mass > maxminMass[2] & fragmentedFragmentData $mass < maxminMass[1]),]
# return nothing if the narrowed down data is empty at this point # return nothing if the narrowed down data is empty at this point
if(!nrow(fragmentedFragmentData)){ if(!nrow(fragmentedFragmentData)){
return(t(c(mzFound=as.numeric(as.character(mass)), return(t(c(mzFound=as.numeric(as.character(mass)),
formula=NA, mzCalc=NA))) formula=NA, mzCalc=NA)))
} }
# find out if the narrowed down fragments have a sub-formula of the parentformula # find out if the narrowed down fragments have a sub-formula of the parentformula
# return the indexes of relevant formulas # return the indexes of relevant formulas
fragmentedFragmentData <- fragmentedFragmentData[which(sapply(fragmentedFragmentData$formula, function(currentFormula) is.sub.formula(currentFormula,parentAtomList))),] fragmentedFragmentData <- fragmentedFragmentData[which(sapply(fragmentedFragmentData$formula, function(currentFormula) is.sub.formula(currentFormula,parentAtomList))),]
# return nothing if the narrowed down data is empty at this point # return nothing if the narrowed down data is empty at this point
if(!nrow(fragmentedFragmentData)){ if(!nrow(fragmentedFragmentData)){
return(t(c(mzFound=as.numeric(as.character(mass)), return(t(c(mzFound=as.numeric(as.character(mass)),
formula=NA, mzCalc=NA))) formula=NA, mzCalc=NA)))
} }
return(t(sapply(1:nrow(fragmentedFragmentData), function(f) return(t(sapply(1:nrow(fragmentedFragmentData), function(f)
{ {
mzCalc <- fragmentedFragmentData$mass[f] - mode.charge * .emass mzCalc <- fragmentedFragmentData$mass[f] - mode.charge * .emass
c(mzFound=as.numeric(as.character(mass)), c(mzFound=as.numeric(as.character(mass)),
formula=fragmentedFragmentData$formula[f], formula=fragmentedFragmentData$formula[f],
mzCalc=mzCalc) mzCalc=mzCalc)
}))) })))
})) })
} else{ } else{
peakmatrixSmall <- list() peakmatrixSmall <- list()
} }
...@@ -323,31 +319,30 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru ...@@ -323,31 +319,30 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
peakmatrix <- c(peakmatrixSmall,peakmatrixBig) peakmatrix <- c(peakmatrixSmall,peakmatrixBig)
} else{ } else{
system.time(peakmatrix <- lapply(shot[,mzColname], function(mass){ peakmatrix <- lapply(shot[,mzColname], function(mass){
# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae # Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
# finally back-correct calculated masses for the charge # finally back-correct calculated masses for the charge
mass.calc <- mass + mode.charge * .emass mass.calc <- mass + mode.charge * .emass
peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE), peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE),
limits, charge=0)), error=function(e) NA) limits, charge=0)), error=function(e) NA)
#peakformula <- tryCatch( #peakformula <- tryCatch(
# generate.formula(mass, # generate.formula(mass,
# ppm(mass, ppmlimit, p=TRUE), # ppm(mass, ppmlimit, p=TRUE),
# limits, charge=1), # limits, charge=1),
#error= function(e) list()) #error= function(e) list())
if(!is.list(peakformula)){ if(!is.list(peakformula)){
return(t(c(mzFound=as.numeric(as.character(mass)), return(t(c(mzFound=as.numeric(as.character(mass)),
formula=NA, mzCalc=NA))) formula=NA, mzCalc=NA)))
}else }else{
{ return(t(sapply(peakformula, function(f)
return(t(sapply(peakformula, function(f) {
{ mzCalc <- f@mass - mode.charge * .emass
mzCalc <- f@mass - mode.charge * .emass c(mzFound=mass,
c(mzFound=mass, formula=f@string,
formula=f@string, mzCalc=mzCalc)
mzCalc=mzCalc) })))
}))) }
} })
}))
} }
childPeaks <- as.data.frame(do.call(rbind, peakmatrix)) childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
......
This diff is collapsed.
...@@ -52,12 +52,12 @@ NULL ...@@ -52,12 +52,12 @@ NULL
#' @export #' @export
to.limits.rcdk <- function(formula) to.limits.rcdk <- function(formula)
{ {
if(!is.list(formula)) if(!is.list(formula))
formula <- formulastring.to.list(formula) formula <- formulastring.to.list(formula)
elelist <- lapply(names(formula), function(element) { elelist <- lapply(names(formula), function(element) {
return(c(element, 0, formula[[element]])) return(c(element, 0, formula[[element]]))
}) })
return(elelist) return(elelist)
} }
...@@ -342,3 +342,15 @@ ppm <- function(mass, dppm, l=FALSE, p=FALSE) ...@@ -342,3 +342,15 @@ ppm <- function(mass, dppm, l=FALSE, p=FALSE)
.emass <- 0.0005485799 .emass <- 0.0005485799
## pmass <- 1.007276565 ## pmass <- 1.007276565
## hmass <- 1.007825 ## hmass <- 1.007825
split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE)
{
if(!is.list(f)) f <- formulastring.to.list(f)
pos <- f[which(f > 0)]
neg <- f[which(f < 0)]
if(as.formula & !as.list)
return(list(pos=list.to.formula(pos), neg=list.to.formula(neg)))
else
return(list(pos=pos, neg=neg))
}
\ No newline at end of file
## Some constants
BINS <- 10;
BIN_SIZE <- 100;
EPS_CORRECTION = 1.0e-7
## FINAL_SCALE_FACTOR <- 9; ## Base 10
FINAL_SCALE_FACTOR <- 35; ## Base 36
decimalavoidance <- function(x) {
format(floor (x * 1000000), scientific=FALSE)
}
getBlockHash <- function(peaks,
RELATIVE_INTENSITY_SCALE=100,
MAX_HASH_CHARATERS_ENCODED_SPECTRUM=20) {
max_intensity = max(peaks[,2])
## Scale to maximum intensity
peaks[,2] <- as.integer(peaks[,2] / max(peaks[,2]) * RELATIVE_INTENSITY_SCALE + EPS_CORRECTION)
## Sorted by ascending m/z and ties broken by descending intensity
o <- order(peaks[,1], -1*peaks[,2], decreasing=FALSE)
peakString <- paste(apply(peaks[o,,drop=FALSE], MARGIN=1,
FUN=function(x) {paste(c(decimalavoidance(x[1]+EPS_CORRECTION), x[2]),
collapse=":")}), collapse=" ")
## cat("Prehash: ", peakString, sep="", file="/tmp/prehash-R") ## Debugging of spectrum-string
block2 <- substr(digest(peakString, algo="sha256", serialize=FALSE),
1, MAX_HASH_CHARATERS_ENCODED_SPECTRUM)
}
integer2base36 <- function(i) {
integer2base36code <- sapply(c(48:57, 97:122), function(i) rawToChar(as.raw(i)))
paste(integer2base36code[i+1], collapse="")
}
getBlockHist <- function(peaks) {
## Initialise output
wrappedhist <- integer(BINS)
binindex <- as.integer(peaks[,1] / BIN_SIZE)
summedintensities <- tapply(peaks[,2], binindex, sum)
wrappedbinindex <- unique(binindex) %% BINS
wrappedintensities <- tapply(summedintensities, wrappedbinindex, sum)
normalisedintensities <- as.integer(wrappedintensities/max(wrappedintensities)*FINAL_SCALE_FACTOR)
wrappedhist[sort(unique(wrappedbinindex))+1] <- normalisedintensities
paste(integer2base36(wrappedhist), collapse="")
}
getSplash <- function(peaks) {
block2 <- getBlockHist(peaks)
block3 <- getBlockHash(peaks)
splash <- paste("splash10",
block2,
block3,
sep="-")
return(splash)
}
This diff is collapsed.
This diff is collapsed.
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#' @import rcdk #' @import rcdk
#' @import rjson #' @import rjson
#' @import yaml #' @import yaml
#' @import digest
NULL # This is required so that roxygen knows where the first manpage starts NULL # This is required so that roxygen knows where the first manpage starts
...@@ -84,6 +85,9 @@ NULL # This is required so that roxygen knows where the first manpage starts ...@@ -84,6 +85,9 @@ NULL # This is required so that roxygen knows where the first manpage starts
#' @param rtMargin The retention time tolerance to use. #' @param rtMargin The retention time tolerance to use.
#' @param deprofile Whether deprofiling should take place, and what method should be #' @param deprofile Whether deprofiling should take place, and what method should be
#' used (cf. \code{\link{deprofile}}) #' used (cf. \code{\link{deprofile}})
#' @param retrieval A value that determines whether the files should be handled either as "standard",
#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
#' if the only know thing is the m/z
#' @return An \code{RmbSpectraSet} (for \code{findMsMsHR}). Contains parent MS1 spectrum (\code{@@parent}), a block of dependent MS2 spectra ((\code{@@children}) #' @return An \code{RmbSpectraSet} (for \code{findMsMsHR}). Contains parent MS1 spectrum (\code{@@parent}), a block of dependent MS2 spectra ((\code{@@children})
#' and some metadata (\code{id},\code{mz},\code{name},\code{mode} in which the spectrum was acquired. #' and some metadata (\code{id},\code{mz},\code{name},\code{mode} in which the spectrum was acquired.
#' #'
...@@ -112,7 +116,8 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo ...@@ -112,7 +116,8 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
rtMargin = getOption("RMassBank")$rtMargin, rtMargin = getOption("RMassBank")$rtMargin,
deprofile = getOption("RMassBank")$deprofile, deprofile = getOption("RMassBank")$deprofile,
headerCache = NULL, headerCache = NULL,
peaksCache = NULL) peaksCache = NULL,
retrieval="standard")
{ {
# access data directly for finding the MS/MS data. This is done using # access data directly for finding the MS/MS data. This is done using
...@@ -122,7 +127,7 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo ...@@ -122,7 +127,7 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
if(!is.null(fileName)) if(!is.null(fileName))
msRaw <- openMSfile(fileName) msRaw <- openMSfile(fileName)
mzLimits <- findMz(cpdID, mode) mzLimits <- findMz(cpdID, mode, retrieval=retrieval)
mz <- mzLimits$mzCenter mz <- mzLimits$mzCenter
limit.fine <- ppm(mz, ppmFine, p=TRUE) limit.fine <- ppm(mz, ppmFine, p=TRUE)
if(!useRtLimit) if(!useRtLimit)
...@@ -143,7 +148,12 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo ...@@ -143,7 +148,12 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
#sp@mz <- mzLimits #sp@mz <- mzLimits
sp@id <- as.character(as.integer(cpdID)) sp@id <- as.character(as.integer(cpdID))
sp@name <- findName(cpdID) sp@name <- findName(cpdID)
sp@formula <- findFormula(cpdID) ENV <- environment()
if(retrieval == "unknown"){
sp@formula <- ""
} else{
sp@formula <- findFormula(cpdID, retrieval=retrieval)
}
sp@mode <- mode sp@mode <- mode
# If we had to open the file, we have to close it again # If we had to open the file, we have to close it again
...@@ -268,7 +278,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, ...@@ -268,7 +278,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
tic = line["totIonCurrent"], tic = line["totIonCurrent"],
peaksCount = line["peaksCount"], peaksCount = line["peaksCount"],
rt = line["retentionTime"], rt = line["retentionTime"],
acquisitionNum = as.integer(line["acquisitionNum"]), acquisitionNum = as.integer(line["seqNum"]),
centroided = TRUE centroided = TRUE
) )
}) })
...@@ -283,7 +293,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA, ...@@ -283,7 +293,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
polarity = as.integer(masterHeader$polarity), polarity = as.integer(masterHeader$polarity),
peaksCount = as.integer(masterHeader$peaksCount), peaksCount = as.integer(masterHeader$peaksCount),
rt = masterHeader$retentionTime, rt = masterHeader$retentionTime,
acquisitionNum = as.integer(masterHeader$acquisitionNum), acquisitionNum = as.integer(masterHeader$seqNum),
tic = masterHeader$totIonCurrent, tic = masterHeader$totIonCurrent,
centroided = TRUE centroided = TRUE
) )
......
...@@ -42,12 +42,13 @@ ...@@ -42,12 +42,13 @@
#' @export #' @export
msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE, readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE,
Args = NULL, settings = getOption("RMassBank"), progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){ Args = NULL, settings = getOption("RMassBank"),
progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){
.checkMbSettings() .checkMbSettings()
##Read the files and cpdids according to the definition ##Read the files and cpdids according to the definition
##All cases are silently accepted, as long as they can be handled according to one definition ##All cases are silently accepted, as long as they can be handled according to one definition
if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown.")) if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
if(is.null(filetable)){ if(is.null(filetable)){
##If no filetable is supplied, filenames must be named explicitly ##If no filetable is supplied, filenames must be named explicitly
if(is.null(files)) if(is.null(files))
...@@ -83,23 +84,24 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, ...@@ -83,23 +84,24 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
if(!all(file.exists(w@files))){ if(!all(file.exists(w@files))){
stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist") stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist")
} }
na.ids <- which(is.na(sapply(cpdids, findSmiles))) # na.ids <- which(is.na(sapply(cpdids, findSmiles)))
if(length(na.ids)){ # if(length(na.ids)){
stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list") # stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list")
} # }
##This should work ##This should work
if(readMethod == "minimal"){ if(readMethod == "minimal"){
##Edit options ##Edit options
opt <- getOption("RMassBank") opt <- getOption("RMassBank")
opt$recalibrator$MS1 <- "recalibrate.identity" opt$recalibrator$MS1 <- "recalibrate.identity"
opt$recalibrator$MS2 <- "recalibrate.identity" opt$recalibrator$MS2 <- "recalibrate.identity"
options(RMassBank=opt) opt$add_annotation==FALSE
##Edit analyzemethod options(RMassBank=opt)
analyzeMethod <- "intensity" ##Edit analyzemethod
} analyzeMethod <- "intensity"
}
if(readMethod == "mzR"){ if(readMethod == "mzR"){
##Progressbar ##Progressbar
...@@ -113,7 +115,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, ...@@ -113,7 +115,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
# Find compound ID # Find compound ID
cpdID <- cpdids[count] cpdID <- cpdids[count]
retrieval <- findLevel(cpdID,TRUE)
# Set counter up # Set counter up
envir$count <- envir$count + 1 envir$count <- envir$count + 1
...@@ -124,7 +126,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, ...@@ -124,7 +126,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
mzCoarse = settings$findMsMsRawSettings$mzCoarse, mzCoarse = settings$findMsMsRawSettings$mzCoarse,
fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan, fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan,
rtMargin = settings$rtMargin, rtMargin = settings$rtMargin,
deprofile = settings$deprofile) deprofile = settings$deprofile, retrieval=retrieval)
gc() gc()
# Progress: # Progress:
...@@ -310,7 +312,7 @@ msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NU ...@@ -310,7 +312,7 @@ msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NU
} else{ } else{
psp[[i]] <- which.min( abs(getRT(anmsms[[i]]) - RT) ) psp[[i]] <- which.min( abs(getRT(anmsms[[i]]) - RT) )
} }
## Now find the pspec for compound ## Now find the pspec for compound
## 2nd best: Spectrum closest to MS1 ## 2nd best: Spectrum closest to MS1
##psp <- which.min( abs(getRT(anmsms) - actualRT)) ##psp <- which.min( abs(getRT(anmsms) - actualRT))
...@@ -345,7 +347,7 @@ msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NU ...@@ -345,7 +347,7 @@ msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NU
spectraNum <- length(w@spectra[[which(IDindex)]]@children) spectraNum <- length(w@spectra[[which(IDindex)]]@children)
w@spectra[[which(IDindex)]]@children[[spectraNum+1]] <- sp@children[[1]] w@spectra[[which(IDindex)]]@children[[spectraNum+1]] <- sp@children[[1]]
} else { } else {
w@spectra[[length(www@spectra)+1]] <- sp w@spectra[[length(w@spectra)+1]] <- sp
} }
} else{ } else{
w@spectra[[1]] <- sp w@spectra[[1]] <- sp
......
...@@ -167,18 +167,18 @@ parseMassBank <- function(Files){ ...@@ -167,18 +167,18 @@ parseMassBank <- function(Files){
} }
##Extract the peaks and write the data into a data.frame ##Extract the peaks and write the data into a data.frame
PKStart <- grep('PK$PEAK:',record, fixed = TRUE) + 1 PKStart <- grep('PK$PEAK:',record, fixed = TRUE) + 1
endslash <- grep('//',record, fixed = TRUE) endslash <- tail(grep('//',record, fixed = TRUE),1)
if(PKStart < endslash){ if(PKStart < endslash){
splitted <- strsplit(record[PKStart:(endslash-1)]," ") splitted <- strsplit(record[PKStart:(endslash-1)]," ")
PKPeak <- matrix(nrow = endslash - PKStart, ncol = 3) PKPeak <- matrix(nrow = endslash - PKStart, ncol = 3)
for(k in 1:length(splitted)){ for(k in 1:length(splitted)){
splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")] splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
PKPeak[k,] <- splitted[[k]] PKPeak[k,] <- splitted[[k]]
} }
PKPeak <- as.data.frame(PKPeak, stringsAsFactors = FALSE) PKPeak <- as.data.frame(PKPeak, stringsAsFactors = FALSE)
PKPeak[] <- lapply(PKPeak, type.convert) PKPeak[] <- lapply(PKPeak, type.convert)
colnames(PKPeak) <- c("m/z", "int", "rel.int.") colnames(PKPeak) <- c("m/z", "int", "rel.int.")
} }
mb@compiled_ok[[i]][['PK$PEAK']] <- PKPeak mb@compiled_ok[[i]][['PK$PEAK']] <- PKPeak
......
...@@ -187,7 +187,7 @@ NULL ...@@ -187,7 +187,7 @@ NULL
authors = "Nomen Nescio, The Unseen University", authors = "Nomen Nescio, The Unseen University",
copyright = "Copyright (C) XXX", copyright = "Copyright (C) XXX",
publication = "", publication = "",
license = "CC BY-SA", license = "CC BY",
instrument = "LTQ Orbitrap XL Thermo Scientific", instrument = "LTQ Orbitrap XL Thermo Scientific",
instrument_type = "LC-ESI-ITFT", instrument_type = "LC-ESI-ITFT",
confidence_comment = "standard compound", confidence_comment = "standard compound",
......
File deleted
...@@ -116,15 +116,15 @@ findProgress <- function(workspace) ...@@ -116,15 +116,15 @@ findProgress <- function(workspace)
#' #'
updateSettings <- function(settings, warn=TRUE) updateSettings <- function(settings, warn=TRUE)
{ {
settings.new <- .settingsList settings.new <- .settingsList
settings.old <- settings settings.old <- settings
renew <- setdiff(names(settings.new), names(settings.old)) renew <- setdiff(names(settings.new), names(settings.old))
if(length(renew) > 0 && warn==TRUE){ if(length(renew) > 0 && warn==TRUE){
warning(paste0("Your settings are outdated! The following fields were taken from default values: ", warning(paste0("Your settings are outdated! The following fields were taken from default values: ",
paste(renew,collapse=", "))) paste(renew,collapse=", ")))
if("filterSettings" %in% renew) if("filterSettings" %in% renew)
warning("The default values of filterSettings could change the processing behaviour if you have negative-mode spectra. Check ?updateSettings for details.") warning("The default values of filterSettings could change the processing behaviour if you have negative-mode spectra. Check ?updateSettings for details.")
} }
settings.old[renew] <- settings.new[renew] settings.old[renew] <- settings.new[renew]
return(settings.old) return(settings.old)
} }
...@@ -175,7 +175,7 @@ smiles2mass <- function(SMILES){ ...@@ -175,7 +175,7 @@ smiles2mass <- function(SMILES){
return(mass) return(mass)
} }
.unitTestRMB <- function(WD=getwd()){ .unitTestmzR <- function(WD=getwd()){
requireNamespace("RUnit",quietly=TRUE) requireNamespace("RUnit",quietly=TRUE)
requireNamespace("RMassBankData",quietly=TRUE) requireNamespace("RMassBankData",quietly=TRUE)
oldwd <- getwd() oldwd <- getwd()
...@@ -217,19 +217,12 @@ smiles2mass <- function(SMILES){ ...@@ -217,19 +217,12 @@ smiles2mass <- function(SMILES){
#testFuncRegexp = "^test.+", #testFuncRegexp = "^test.+",
rngKind = "Marsaglia-Multicarry", rngKind = "Marsaglia-Multicarry",
rngNormalKind = "Kinderman-Ramage") rngNormalKind = "Kinderman-Ramage")
testSuite3 <- RUnit::defineTestSuite("Evaluation of correct handling if no peaks are found", dirs = system.file("unitTests",
package="RMassBank"), testFileRegexp = "runit.NOPEAKS.R",
#testFuncRegexp = "^test.+",
rngKind = "Marsaglia-Multicarry",
rngNormalKind = "Kinderman-Ramage")
testData <- suppressWarnings(RUnit::runTestSuite(testSuite)) testData <- suppressWarnings(RUnit::runTestSuite(testSuite))
testData2 <- suppressWarnings(RUnit::runTestSuite(testSuite2)) testData2 <- suppressWarnings(RUnit::runTestSuite(testSuite2))
testData3 <- suppressWarnings(RUnit::runTestSuite(testSuite3))
file.remove(c("pH_narcotics_Failpeaks.csv","pH_narcotics.RData","pH_narcotics_RA.RData","pH_narcotics_RF.RData")) file.remove(c("pH_narcotics_Failpeaks.csv","pH_narcotics.RData","pH_narcotics_RA.RData","pH_narcotics_RF.RData"))
RUnit::printTextProtocol(testData) RUnit::printTextProtocol(testData)
RUnit::printTextProtocol(testData2) RUnit::printTextProtocol(testData2)
RUnit::printTextProtocol(testData3)
setwd(oldwd) setwd(oldwd)
return(testData) return(testData)
} }
......
...@@ -41,7 +41,7 @@ getCactus <- function(identifier, representation) ...@@ -41,7 +41,7 @@ getCactus <- function(identifier, representation)
ret <- tryCatch( ret <- tryCatch(
getURLContent(paste( getURLContent(paste(
"http://cactus.nci.nih.gov/chemical/structure/", "https://cactus.nci.nih.gov/chemical/structure/",
URLencode(identifier), "/", representation, sep='')), URLencode(identifier), "/", representation, sep='')),
error = function(e) NA) error = function(e) NA)
if(is.na(ret)) if(is.na(ret))
......
...@@ -13,7 +13,7 @@ deprofile: ...@@ -13,7 +13,7 @@ deprofile:
# Deviation (in minutes) allowed the for retention time # Deviation (in minutes) allowed the for retention time
rtMargin: 0.4 rtMargin: 0.4
# Systematic retention time shift # Systematic retention time shift
rtShift: -0.3 rtShift: 0.0
# Directory to OpenBabel. Required for creating molfiles for MassBank export. # Directory to OpenBabel. Required for creating molfiles for MassBank export.
# If no OpenBabel directory is given, RMassBank will attempt to use the CACTUS webservice # If no OpenBabel directory is given, RMassBank will attempt to use the CACTUS webservice
...@@ -39,7 +39,7 @@ annotations: ...@@ -39,7 +39,7 @@ annotations:
authors: Nomen Nescio, The Unseen University authors: Nomen Nescio, The Unseen University
copyright: Copyright (C) XXX copyright: Copyright (C) XXX
publication: publication:
license: CC BY-SA license: CC BY
instrument: LTQ Orbitrap XL Thermo Scientific instrument: LTQ Orbitrap XL Thermo Scientific
instrument_type: LC-ESI-ITFT instrument_type: LC-ESI-ITFT
confidence_comment: standard compound confidence_comment: standard compound
......
w <- newMsmsWorkspace()
RmbDefaultSettings()
files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
w@files <- files
loadList(system.file("list/NarcoticsDataset.csv", package="RMassBankData"))
w <- msmsWorkflow(w, mode="pH", steps=c(1:4), archivename="pH_narcotics")
w <- msmsWorkflow(w, mode="pH", steps=c(5:8), archivename="pH_narcotics")
mb <- newMbWorkspace(w)
mb <- loadInfolists(mb, system.file("infolists", package="RMassBankData"))
mb <- mbWorkflow(mb)
### Error message code structure borrowed from rcppgls:
### https://github.com/eddelbuettel/rcppgsl/blob/master/inst/unitTests/runTests.R
testElec <- RUnit::defineTestSuite("Electronic noise and formula calculation Test", dirs = system.file("unitTests",
package="RMassBank"), testFileRegexp = "test_El.*")
testAcqu <- RUnit::defineTestSuite("Evaluation of data acquisition process", dirs = system.file("unitTests",
package="RMassBank"), testFileRegexp = "test_mzR.R")
testNope <- RUnit::defineTestSuite("Evaluation of correct handling if no peaks are found", dirs = system.file("unitTests",
package="RMassBank"), testFileRegexp = "test_NOPEAKS.R")
errEval <- function(testSuite){
Res <- RUnit::runTestSuite(testSuite)
err <- getErrors(Res)
if((err$nFail + err$nErr) > 0){
stop(sprintf('unit test problems: %d failures, %d errors in testfunction: "%s"', err$nFail, err$nErr, testSuite$name) )
} else{
success <- err$nTestFunc - err$nFail - err$nErr - err$nDeactivated
cat( sprintf( "%d / %d\n", success, err$nTestFunc ) )
}
}
errEval(testElec)
errEval(testAcqu)
errEval(testNope)
\ No newline at end of file
...@@ -5,6 +5,5 @@ test.eletronicnoise <- function(){ ...@@ -5,6 +5,5 @@ test.eletronicnoise <- function(){
test.formulacalculation <- function(){ test.formulacalculation <- function(){
failpeaks <- read.csv("pH_narcotics_Failpeaks.csv") failpeaks <- read.csv("pH_narcotics_Failpeaks.csv")
RUnit::checkTrue(!any(apply(failpeaks,1,function(x) all(x[2:5] == c(70,2758,321,56.04933))))) RUnit::checkTrue(!any(apply(failpeaks,1,function(x) all(x[2:5] == c(70,2758,321,56.04933)))))
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment