Push current fixed version

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/RMassBank@116140 bc3139a8-67e5-0310-9ffc-ced21a209358

Push current fixed version
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/RMassBank@116140 bc3139a8-67e5-0310-9ffc-ced21a209358
3271f25f · Emma Schymanski · 423104df · 3271f25f · 3271f25f · 3271f25f
Commit 3271f25f authored 8 years ago by Emma Schymanski
--- a/DESCRIPTION
+++ b/DESCRIPTION
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 1.99.7
+Version: 1.99.11
 Authors@R: c(
    person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
    role=c("cre")),
@@ -28,8 +28,8 @@ Depends:
    Rcpp
 Encoding: UTF-8
 Imports:
-    XML,RCurl,rjson,
+    XML,RCurl,rjson,S4Vectors,digest,
-    rcdk,yaml,mzR,methods,Biobase,MSnbase,S4Vectors
+    rcdk,yaml,mzR,methods,Biobase,MSnbase
 Suggests:
    gplots,RMassBankData,
    xcms (>= 1.37.1),
@@ -41,6 +41,7 @@ Collate:
    'alternateAnalyze.R'
    'createMassBank.R'
    'formulaCalculator.R'
+    'getSplash.R'
    'leCsvAccess.R'
    'leMsMs.r'
    'leMsmsRaw.R'

--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,6 +34,7 @@ export(filterPeaksMultiplicity)
 export(findCAS)
 export(findEIC)
 export(findFormula)
+export(findLevel)
 export(findMass)
 export(findMsMsHR)
 export(findMsMsHR.direct)
@@ -52,6 +53,7 @@ export(formulastring.to.list)
 export(gatherCompound)
 export(gatherData)
 export(gatherDataBabel)
+export(gatherDataUnknown)
 export(gatherPubChem)
 export(gatherSpectrum)
 export(getCSID)
@@ -129,6 +131,7 @@ import(RCurl)
 import(Rcpp)
 import(S4Vectors)
 import(XML)
+import(digest)
 import(methods)
 import(mzR)
 import(rcdk)

--- a/R/Isotopic_Annotation.R
+++ b/R/Isotopic_Annotation.R
--- a/R/alternateAnalyze.R
+++ b/R/alternateAnalyze.R
@@ -32,11 +32,7 @@ newStep2WorkFlow <- function(w, mode="pH",
                             confirmMode=FALSE, progressbar = "progressBarHook", 
                             settings = getOption("RMassBank"), analyzeMethod="formula", fragdataFile = NA){
  ##Load the fragment data (locally or from RMassBank package)
-  if(is.na(fragdataFile)){
+  fragData <- read.csv(fragdataFile,colClasses = c("character","numeric"))
-    fragData <- nfragData
-  } else{
-    fragData <- read.csv(fragdataFile,colClasses = c("character","numeric"))
-  }
  ##Progress bar
  nLen <- length(w@files)
@@ -95,41 +91,41 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
                                filterSettings = getOption("RMassBank")$filterSettings,
                                spectraList = getOption("RMassBank")$spectraList, fragData, fragDataIndex)
 {
-  cut <- 0
+	cut <- 0
-  cut_ratio <- 0
+	cut_ratio <- 0
-  if(run=="preliminary")
+	if(run=="preliminary")
-  {
+	{
-    mzColname <- "mz"
+		mzColname <- "mz"
-    filterMode <- "coarse"
+		filterMode <- "coarse"
-    cut <- filterSettings$prelimCut
+		cut <- filterSettings$prelimCut
-    if(is.na(cut))
+		if(is.na(cut))
-    {
+		{
-      if(mode %in% c("pH", "pM", "pNa", "pNH4"))
+			if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-        cut <- 1e4
+			cut <- 1e4
-      else if(mode %in% c("mH", "mFA","mM"))
+			else if(mode %in% c("mH", "mFA","mM"))
-        cut <- 0
+			cut <- 0
-      else stop(paste("The ionization mode", mode, "is unknown."))
+			else stop(paste("The ionization mode", mode, "is unknown."))
-    }
+		}
-    cutRatio <- filterSettings$prelimCutRatio
+		cutRatio <- filterSettings$prelimCutRatio
-  } else{
+	} else{
-    mzColname <- "mzRecal"
+		mzColname <- "mzRecal"
-    filterMode <- "fine"
+		filterMode <- "fine"
-    cut <- filterSettings$fineCut
+		cut <- filterSettings$fineCut
-    cut_ratio <- filterSettings$fineCutRatio
+		cut_ratio <- filterSettings$fineCutRatio
-    if(is.na(cut)) cut <- 0
+		if(is.na(cut)) cut <- 0
-  }
+	}
-  # find whole spectrum of parent peak, so we have reasonable data to feed into
-  # MolgenMsMs
-  parentSpectrum <- msmsPeaks$parentPeak
+	# find whole spectrum of parent peak, so we have reasonable data to feed into
-  # Check whether the spectra can be fitted to the spectra list correctly!
+	# MolgenMsMs
-  if(nrow(msmsPeaks$childHeaders) != length(spectraList))
+	parentSpectrum <- msmsPeaks$parentPeak
-  {
-    warning(paste0(
-      "The spectra count of the substance ", msmsPeaks$id, " (", nrow(msmsPeaks$childHeaders), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)."
+	# Check whether the spectra can be fitted to the spectra list correctly!
-    ))
+	if(nrow(msmsPeaks$childHeaders) != length(spectraList))
+	{
+		warning(paste0("The spectra count of the substance ", msmsPeaks$id, " (", nrow(msmsPeaks$childHeaders), " spectra) doesn't match the provided spectra list (", 
+						length(spectraList), " spectra).")
+	)
    return(list(specOK=FALSE))
  }
@@ -240,52 +236,52 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
    if(usePrecalcData){
-      # if yes, split the peaks into 2 sets: Those which can be identified using the
+		# if yes, split the peaks into 2 sets: Those which can be identified using the
-      # fragment data, and those which can (by maximum fragment Data m/z - 0.5)
+		# fragment data, and those which can't (by maximum fragment Data m/z - 0.5)
-      precalcIndex <- which(shot[,mzColname] < (max(newfragData$mass)-0.5))
+		precalcIndex <- which(shot[,mzColname] < (max(newfragData$mass)-0.5))
-      smallShots <- shot[precalcIndex,]
+		smallShots <- shot[precalcIndex,]
-      bigShots <- shot[-precalcIndex,]
+		bigShots <- shot[-precalcIndex,]
-      # for smaller peaks use the precalculated fragment data
+		# for smaller peaks use the precalculated fragment data
-      if(nrow(smallShots)){
+		if(nrow(smallShots)){
-        system.time(peakmatrixSmall <- lapply(smallShots[,mzColname],function(mass){
+		peakmatrixSmall <- lapply(smallShots[,mzColname],function(mass){
-          mass.calc <- mass + mode.charge * .emass
+			mass.calc <- mass + mode.charge * .emass
-          maxminMass <- ppm(mass.calc, ppmlimit, l=TRUE)
+			maxminMass <- ppm(mass.calc, ppmlimit, l=TRUE)
-          # retrieve only possibly relevant rows of the fragment Data (use the index)
+			# retrieve only possibly relevant rows of the fragment Data (use the index)
-          indices <- newfragDataIndex[floor(maxminMass[2]*100)]:newfragDataIndex[ceiling(maxminMass[1]*100)]
+			indices <- newfragDataIndex[floor(maxminMass[2]*100)]:newfragDataIndex[ceiling(maxminMass[1]*100)]
-          fragmentedFragmentData <- newfragData[indices,]
+			fragmentedFragmentData <- newfragData[indices,]
-          # narrow it down further using the ppm
+			# narrow it down further using the ppm
-          fragmentedFragmentData <- fragmentedFragmentData[which(fragmentedFragmentData$mass > maxminMass[2] & fragmentedFragmentData $mass < maxminMass[1]),]
+			fragmentedFragmentData <- fragmentedFragmentData[which(fragmentedFragmentData$mass > maxminMass[2] & fragmentedFragmentData $mass < maxminMass[1]),]
-          # return nothing if the narrowed down data is empty at this point
+			# return nothing if the narrowed down data is empty at this point
-          if(!nrow(fragmentedFragmentData)){
+			if(!nrow(fragmentedFragmentData)){
-            return(t(c(mzFound=as.numeric(as.character(mass)),
+				return(t(c(mzFound=as.numeric(as.character(mass)),
-                       formula=NA, mzCalc=NA)))
+						formula=NA, mzCalc=NA)))
-          }
+			}
-          # find out if the narrowed down fragments have a sub-formula of the parentformula
+			# find out if the narrowed down fragments have a sub-formula of the parentformula
-          # return the indexes of relevant formulas
+			# return the indexes of relevant formulas
-          fragmentedFragmentData <- fragmentedFragmentData[which(sapply(fragmentedFragmentData$formula, function(currentFormula) is.sub.formula(currentFormula,parentAtomList))),]
+			fragmentedFragmentData <- fragmentedFragmentData[which(sapply(fragmentedFragmentData$formula, function(currentFormula) is.sub.formula(currentFormula,parentAtomList))),]
-          # return nothing if the narrowed down data is empty at this point
+			# return nothing if the narrowed down data is empty at this point
-          if(!nrow(fragmentedFragmentData)){
+			if(!nrow(fragmentedFragmentData)){
-            return(t(c(mzFound=as.numeric(as.character(mass)),
+			return(t(c(mzFound=as.numeric(as.character(mass)),
-                       formula=NA, mzCalc=NA)))
+						formula=NA, mzCalc=NA)))
-          }
+			}
-          return(t(sapply(1:nrow(fragmentedFragmentData), function(f)
+			return(t(sapply(1:nrow(fragmentedFragmentData), function(f)
-          {
+			{
-            mzCalc <- fragmentedFragmentData$mass[f] - mode.charge * .emass 
+			mzCalc <- fragmentedFragmentData$mass[f] - mode.charge * .emass 
-            c(mzFound=as.numeric(as.character(mass)),
+			c(mzFound=as.numeric(as.character(mass)),
-              formula=fragmentedFragmentData$formula[f], 
+				formula=fragmentedFragmentData$formula[f], 
-              mzCalc=mzCalc)
+				mzCalc=mzCalc)
-          })))
+			})))
-        }))
+        })
      } else{
        peakmatrixSmall <- list()
      }
@@ -323,31 +319,30 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
      peakmatrix <- c(peakmatrixSmall,peakmatrixBig)
    } else{
-        system.time(peakmatrix <- lapply(shot[,mzColname], function(mass){
+			peakmatrix <- lapply(shot[,mzColname], function(mass){
-          # Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
+			# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
-          # finally back-correct calculated masses for the charge
+			# finally back-correct calculated masses for the charge
-          mass.calc <- mass + mode.charge * .emass
+			mass.calc <- mass + mode.charge * .emass
-          peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE), 
+			peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE), 
-                                                   limits, charge=0)), error=function(e) NA)
+												   limits, charge=0)), error=function(e) NA)
-          #peakformula <- tryCatch( 
+			#peakformula <- tryCatch( 
-          #  generate.formula(mass, 
+			#  generate.formula(mass, 
-          #                   ppm(mass, ppmlimit, p=TRUE),
+			#                   ppm(mass, ppmlimit, p=TRUE),
-          #                   limits, charge=1),
+			#                   limits, charge=1),
-          #error= function(e) list())
+			#error= function(e) list())
-          if(!is.list(peakformula)){
+			if(!is.list(peakformula)){
-            return(t(c(mzFound=as.numeric(as.character(mass)),
+				return(t(c(mzFound=as.numeric(as.character(mass)),
-                       formula=NA, mzCalc=NA)))
+						formula=NA, mzCalc=NA)))
-          }else
+			}else{
-          {
+				return(t(sapply(peakformula, function(f)
-            return(t(sapply(peakformula, function(f)
+				{
-            {
+					mzCalc <- f@mass - mode.charge * .emass 
-              mzCalc <- f@mass - mode.charge * .emass 
+					c(mzFound=mass,
-              c(mzFound=mass,
+					formula=f@string, 
-                formula=f@string, 
+					mzCalc=mzCalc)
-                mzCalc=mzCalc)
+				})))
-            })))
+			}
-          }
+        })
-        }))
    }
    childPeaks <- as.data.frame(do.call(rbind, peakmatrix))

--- a/R/createMassBank.R
+++ b/R/createMassBank.R
--- a/R/formulaCalculator.R
+++ b/R/formulaCalculator.R
@@ -52,12 +52,12 @@ NULL
 #' @export
 to.limits.rcdk <- function(formula)
 {
-  if(!is.list(formula))
+    if(!is.list(formula))
-    formula <- formulastring.to.list(formula)
+        formula <- formulastring.to.list(formula)
-  elelist <- lapply(names(formula), function(element) {
+    elelist <- lapply(names(formula), function(element) {
-    return(c(element, 0, formula[[element]]))
+                    return(c(element, 0, formula[[element]]))
-  })
+                })
-  return(elelist)
+    return(elelist)
 }
@@ -342,3 +342,15 @@ ppm <- function(mass, dppm, l=FALSE, p=FALSE)
 .emass <- 0.0005485799
 ## pmass <- 1.007276565
 ## hmass <- 1.007825
+split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE)
+{
+	if(!is.list(f)) f <- formulastring.to.list(f)
+	pos <- f[which(f > 0)]
+	neg <- f[which(f < 0)]
+	if(as.formula & !as.list)
+		return(list(pos=list.to.formula(pos), neg=list.to.formula(neg)))
+	else
+		return(list(pos=pos, neg=neg))
+}
\ No newline at end of file
--- a/R/getSplash.R
+++ b/R/getSplash.R
+## Some constants
+BINS <- 10;
+BIN_SIZE <- 100;
+EPS_CORRECTION = 1.0e-7
+## FINAL_SCALE_FACTOR <- 9; ## Base 10
+FINAL_SCALE_FACTOR <- 35; ## Base 36 
+decimalavoidance <- function(x) {
+    format(floor (x * 1000000), scientific=FALSE)
+}
+getBlockHash <- function(peaks,
+                          RELATIVE_INTENSITY_SCALE=100,
+                          MAX_HASH_CHARATERS_ENCODED_SPECTRUM=20) {
+    max_intensity = max(peaks[,2])
+    ## Scale to maximum intensity
+    peaks[,2] <-    as.integer(peaks[,2] / max(peaks[,2]) * RELATIVE_INTENSITY_SCALE + EPS_CORRECTION)
+    ## Sorted by ascending m/z and ties broken by descending intensity
+    o <- order(peaks[,1], -1*peaks[,2], decreasing=FALSE)
+    peakString <- paste(apply(peaks[o,,drop=FALSE], MARGIN=1,
+                              FUN=function(x) {paste(c(decimalavoidance(x[1]+EPS_CORRECTION), x[2]),
+                                  collapse=":")}), collapse=" ")
+    ## cat("Prehash: ", peakString, sep="", file="/tmp/prehash-R") ## Debugging of spectrum-string
+    block2 <- substr(digest(peakString, algo="sha256", serialize=FALSE),
+                     1, MAX_HASH_CHARATERS_ENCODED_SPECTRUM)    
+}
+integer2base36 <- function(i) {
+    integer2base36code <- sapply(c(48:57, 97:122), function(i) rawToChar(as.raw(i)))
+    paste(integer2base36code[i+1], collapse="")
+}
+getBlockHist <- function(peaks) {
+    ## Initialise output
+    wrappedhist <- integer(BINS)
+    binindex <- as.integer(peaks[,1] / BIN_SIZE) 
+    summedintensities <- tapply(peaks[,2], binindex, sum)
+    wrappedbinindex <- unique(binindex) %% BINS 
+    wrappedintensities <- tapply(summedintensities, wrappedbinindex, sum)
+    normalisedintensities <- as.integer(wrappedintensities/max(wrappedintensities)*FINAL_SCALE_FACTOR)
+    wrappedhist[sort(unique(wrappedbinindex))+1] <- normalisedintensities
+    paste(integer2base36(wrappedhist), collapse="")
+}
+getSplash <- function(peaks) {
+    block2 <- getBlockHist(peaks)
+    block3 <- getBlockHash(peaks)
+    splash <- paste("splash10",
+                    block2,
+                    block3,
+                    sep="-")
+    return(splash)
+}
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -10,6 +10,7 @@
 #' @import rcdk 
 #' @import rjson 
 #' @import yaml 
+#' @import digest
 NULL # This is required so that roxygen knows where the first manpage starts
@@ -84,6 +85,9 @@ NULL # This is required so that roxygen knows where the first manpage starts
 #' @param rtMargin	The retention time tolerance to use.
 #' @param deprofile	Whether deprofiling should take place, and what method should be
 #' 			used (cf. \code{\link{deprofile}}) 
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
 #' @return	An \code{RmbSpectraSet} (for \code{findMsMsHR}). Contains parent MS1 spectrum (\code{@@parent}), a block of dependent MS2 spectra ((\code{@@children})
 #' 			and some metadata (\code{id},\code{mz},\code{name},\code{mode} in which the spectrum was acquired.
 #' 
@@ -112,7 +116,8 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
 		rtMargin = getOption("RMassBank")$rtMargin,
 		deprofile = getOption("RMassBank")$deprofile,
 		headerCache = NULL,
-		peaksCache = NULL)
+		peaksCache = NULL,
+        retrieval="standard")
 {
 	# access data directly for finding the MS/MS data. This is done using
@@ -122,7 +127,7 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
 	if(!is.null(fileName))
 		msRaw <- openMSfile(fileName)
-	mzLimits <- findMz(cpdID, mode)
+	mzLimits <- findMz(cpdID, mode, retrieval=retrieval)
 	mz <- mzLimits$mzCenter
 	limit.fine <- ppm(mz, ppmFine, p=TRUE)
 	if(!useRtLimit)
@@ -143,7 +148,12 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
 	#sp@mz <- mzLimits
 	sp@id <- as.character(as.integer(cpdID))
 	sp@name <- findName(cpdID)
-	sp@formula <- findFormula(cpdID)
+    ENV <- environment()
+	if(retrieval == "unknown"){
+        sp@formula <- ""
+    } else{
+        sp@formula <- findFormula(cpdID, retrieval=retrieval)
+    }
 	sp@mode <- mode
 	# If we had to open the file, we have to close it again
@@ -268,7 +278,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
 									tic = line["totIonCurrent"],
 									peaksCount = line["peaksCount"],
 									rt = line["retentionTime"],
-									acquisitionNum = as.integer(line["acquisitionNum"]),
+									acquisitionNum = as.integer(line["seqNum"]),
 									centroided = TRUE
 									)
 						})
@@ -283,7 +293,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
 						polarity = as.integer(masterHeader$polarity),
 						peaksCount = as.integer(masterHeader$peaksCount),
 						rt = masterHeader$retentionTime,
-						acquisitionNum = as.integer(masterHeader$acquisitionNum),
+						acquisitionNum = as.integer(masterHeader$seqNum),
 						tic = masterHeader$totIonCurrent,
 						centroided = TRUE
 						)

--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -42,12 +42,13 @@
 #' @export
 msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, 
 					readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE, 
-					Args = NULL, settings = getOption("RMassBank"), progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){
+					Args = NULL, settings = getOption("RMassBank"),
+                    progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){
 	.checkMbSettings()
 	##Read the files and cpdids according to the definition
 	##All cases are silently accepted, as long as they can be handled according to one definition
 	if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
 	if(is.null(filetable)){
 		##If no filetable is supplied, filenames must be named explicitly
 		if(is.null(files))
@@ -83,23 +84,24 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 	if(!all(file.exists(w@files))){
 		stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist")
 	}
-	na.ids <- which(is.na(sapply(cpdids, findSmiles)))
+    # na.ids <- which(is.na(sapply(cpdids, findSmiles)))
-	if(length(na.ids)){
+    # if(length(na.ids)){
-		stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list")
+        # stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list")
-	}
+    # }
 	##This should work
-	if(readMethod == "minimal"){
+    if(readMethod == "minimal"){
-		##Edit options
+        ##Edit options
-		opt <- getOption("RMassBank")
+        opt <- getOption("RMassBank")
-		opt$recalibrator$MS1 <- "recalibrate.identity"
+        opt$recalibrator$MS1 <- "recalibrate.identity"
-		opt$recalibrator$MS2 <- "recalibrate.identity"
+        opt$recalibrator$MS2 <- "recalibrate.identity"
-		options(RMassBank=opt)
+        opt$add_annotation==FALSE
-		##Edit analyzemethod
+        options(RMassBank=opt)
-		analyzeMethod <- "intensity"
+        ##Edit analyzemethod
-	}
+        analyzeMethod <- "intensity"
+    }
 	if(readMethod == "mzR"){
 		##Progressbar
@@ -113,7 +115,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 							# Find compound ID
 							cpdID <- cpdids[count]
+							retrieval <- findLevel(cpdID,TRUE)
 							# Set counter up
 							envir$count <- envir$count + 1
@@ -124,7 +126,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 									mzCoarse = settings$findMsMsRawSettings$mzCoarse,
 									fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan,
 									rtMargin = settings$rtMargin,
-									deprofile = settings$deprofile)
+									deprofile = settings$deprofile, retrieval=retrieval)
 							gc()
 							# Progress:
@@ -310,7 +312,7 @@ msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NU
 			} else{
 				psp[[i]] <- which.min( abs(getRT(anmsms[[i]]) - RT) )
 			}
-			## Now find the pspec for compound       
+			## Now find the pspec for compound
 			## 2nd best: Spectrum closest to MS1
 			##psp <- which.min( abs(getRT(anmsms) - actualRT))
@@ -345,7 +347,7 @@ msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NU
 			spectraNum <- length(w@spectra[[which(IDindex)]]@children)
 			w@spectra[[which(IDindex)]]@children[[spectraNum+1]] <- sp@children[[1]]
 		} else {
-			w@spectra[[length(www@spectra)+1]] <- sp
+			w@spectra[[length(w@spectra)+1]] <- sp
 		}
 	} else{
 		w@spectra[[1]] <- sp

--- a/R/parseMassBank.R
+++ b/R/parseMassBank.R
@@ -167,18 +167,18 @@ parseMassBank <- function(Files){
 			}
 		##Extract the peaks and write the data into a data.frame
 		PKStart <- grep('PK$PEAK:',record, fixed = TRUE) + 1
-		endslash <- grep('//',record, fixed = TRUE)
+		endslash <- tail(grep('//',record, fixed = TRUE),1)
-			if(PKStart < endslash){
+        if(PKStart < endslash){
-				splitted <- strsplit(record[PKStart:(endslash-1)]," ")
+            splitted <- strsplit(record[PKStart:(endslash-1)]," ")
-				PKPeak <- matrix(nrow = endslash - PKStart, ncol = 3)
+            PKPeak <- matrix(nrow = endslash - PKStart, ncol = 3)
-				for(k in 1:length(splitted)){
+            for(k in 1:length(splitted)){
-					splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
+                splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
-					PKPeak[k,] <- splitted[[k]]
+                PKPeak[k,] <- splitted[[k]]
-				}
+            }
-				PKPeak <- as.data.frame(PKPeak, stringsAsFactors = FALSE)
+            PKPeak <- as.data.frame(PKPeak, stringsAsFactors = FALSE)
-				PKPeak[] <- lapply(PKPeak, type.convert)
+            PKPeak[] <- lapply(PKPeak, type.convert)
-				colnames(PKPeak) <- c("m/z", "int", "rel.int.")
+            colnames(PKPeak) <- c("m/z", "int", "rel.int.")
-			}
+        }
 		mb@compiled_ok[[i]][['PK$PEAK']] <- PKPeak	

--- a/R/settings_example.R
+++ b/R/settings_example.R
@@ -187,7 +187,7 @@ NULL
    authors = "Nomen Nescio, The Unseen University",
    copyright = "Copyright (C) XXX",
    publication = "",
-    license = "CC BY-SA",
+    license = "CC BY",
    instrument = "LTQ Orbitrap XL Thermo Scientific",
    instrument_type = "LC-ESI-ITFT",
    confidence_comment = "standard compound",

--- a/R/sysData.rda
+++ b/R/sysData.rda
--- a/R/tools.R
+++ b/R/tools.R
@@ -116,15 +116,15 @@ findProgress <- function(workspace)
 #' 
 updateSettings <- function(settings, warn=TRUE)
 {
-  settings.new <- .settingsList
+    settings.new <- .settingsList
-  settings.old <- settings
+    settings.old <- settings
-  renew <- setdiff(names(settings.new), names(settings.old))
+    renew <- setdiff(names(settings.new), names(settings.old))
-  if(length(renew) > 0 && warn==TRUE){
+    if(length(renew) > 0 && warn==TRUE){
    warning(paste0("Your settings are outdated! The following fields were taken from default values: ", 
            paste(renew,collapse=", ")))
    if("filterSettings" %in% renew)
      warning("The default values of filterSettings could change the processing behaviour if you have negative-mode spectra. Check ?updateSettings for details.")
-  }
+    }
-  settings.old[renew] <- settings.new[renew]
+    settings.old[renew] <- settings.new[renew]
-  return(settings.old)
+    return(settings.old)
 }
--- a/R/validateMassBank.R
+++ b/R/validateMassBank.R
@@ -175,7 +175,7 @@ smiles2mass <- function(SMILES){
 	return(mass)
 }
-.unitTestRMB <- function(WD=getwd()){
+.unitTestmzR <- function(WD=getwd()){
 	requireNamespace("RUnit",quietly=TRUE)
 	requireNamespace("RMassBankData",quietly=TRUE)
 	oldwd <- getwd()
@@ -217,19 +217,12 @@ smiles2mass <- function(SMILES){
 					#testFuncRegexp = "^test.+",
 					rngKind = "Marsaglia-Multicarry",
 					rngNormalKind = "Kinderman-Ramage")
-	testSuite3 <- RUnit::defineTestSuite("Evaluation of correct handling if no peaks are found", dirs = system.file("unitTests", 
-		package="RMassBank"), testFileRegexp = "runit.NOPEAKS.R",
-					#testFuncRegexp = "^test.+",
-					rngKind = "Marsaglia-Multicarry",
-					rngNormalKind = "Kinderman-Ramage")	
 	testData <- suppressWarnings(RUnit::runTestSuite(testSuite))
 	testData2 <- suppressWarnings(RUnit::runTestSuite(testSuite2))
-	testData3 <- suppressWarnings(RUnit::runTestSuite(testSuite3))
 	file.remove(c("pH_narcotics_Failpeaks.csv","pH_narcotics.RData","pH_narcotics_RA.RData","pH_narcotics_RF.RData"))
 	RUnit::printTextProtocol(testData)
 	RUnit::printTextProtocol(testData2)
-	RUnit::printTextProtocol(testData3)
 	setwd(oldwd)
 	return(testData)
 }

--- a/R/webAccess.R
+++ b/R/webAccess.R
@@ -41,7 +41,7 @@ getCactus <- function(identifier, representation)
  ret <- tryCatch(
    getURLContent(paste(
-      "http://cactus.nci.nih.gov/chemical/structure/",
+      "https://cactus.nci.nih.gov/chemical/structure/",
      URLencode(identifier), "/", representation, sep='')),
    error = function(e) NA)
  if(is.na(ret))

--- a/inst/RMB_options.ini
+++ b/inst/RMB_options.ini
@@ -13,7 +13,7 @@ deprofile:
 # Deviation (in minutes) allowed the for retention time
 rtMargin: 0.4
 # Systematic retention time shift
-rtShift: -0.3
+rtShift: 0.0
 # Directory to OpenBabel. Required for creating molfiles for MassBank export.
 # If no OpenBabel directory is given, RMassBank will attempt to use the CACTUS webservice
@@ -39,7 +39,7 @@ annotations:
    authors: Nomen Nescio, The Unseen University
    copyright: Copyright (C) XXX
    publication: 
-    license: CC BY-SA
+    license: CC BY
    instrument: LTQ Orbitrap XL Thermo Scientific
    instrument_type: LC-ESI-ITFT
    confidence_comment: standard compound

--- a/inst/unitTests/runTests.R
+++ b/inst/unitTests/runTests.R
+w <- newMsmsWorkspace()
+RmbDefaultSettings()
+files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
+w@files <- files
+loadList(system.file("list/NarcoticsDataset.csv", package="RMassBankData"))
+w <- msmsWorkflow(w, mode="pH", steps=c(1:4), archivename="pH_narcotics")
+w <- msmsWorkflow(w, mode="pH", steps=c(5:8), archivename="pH_narcotics")	
+mb <- newMbWorkspace(w)
+mb <- loadInfolists(mb, system.file("infolists", package="RMassBankData"))
+mb <- mbWorkflow(mb)
+### Error message code structure borrowed from rcppgls:
+### https://github.com/eddelbuettel/rcppgsl/blob/master/inst/unitTests/runTests.R
+testElec <- RUnit::defineTestSuite("Electronic noise and formula calculation Test", dirs = system.file("unitTests", 
+									package="RMassBank"), testFileRegexp = "test_El.*")
+testAcqu <- RUnit::defineTestSuite("Evaluation of data acquisition process", dirs = system.file("unitTests", 
+									package="RMassBank"), testFileRegexp = "test_mzR.R")
+testNope <- RUnit::defineTestSuite("Evaluation of correct handling if no peaks are found", dirs = system.file("unitTests", 
+									package="RMassBank"), testFileRegexp = "test_NOPEAKS.R")
+errEval <- function(testSuite){
+	Res <- RUnit::runTestSuite(testSuite)
+	err <- getErrors(Res)
+	if((err$nFail + err$nErr) > 0){
+            stop(sprintf('unit test problems: %d failures, %d errors in testfunction: "%s"', err$nFail, err$nErr, testSuite$name) )
+        } else{
+            success <- err$nTestFunc - err$nFail - err$nErr - err$nDeactivated
+            cat( sprintf( "%d / %d\n", success, err$nTestFunc ) )
+    }
+}
+errEval(testElec)
+errEval(testAcqu)
+errEval(testNope)
\ No newline at end of file
--- a/inst/unitTests/runit.EN_FC.R
+++ b/inst/unitTests/runit.EN_FC.R
@@ -5,6 +5,5 @@ test.eletronicnoise <- function(){
 test.formulacalculation <- function(){
 	failpeaks <- read.csv("pH_narcotics_Failpeaks.csv")
 	RUnit::checkTrue(!any(apply(failpeaks,1,function(x) all(x[2:5] == c(70,2758,321,56.04933)))))
 }
\ No newline at end of file