diff --git a/DESCRIPTION b/DESCRIPTION
index 5f46eca1efab0e58a02c93982a074acc7f381d49..d92f4806a506325fc860be2cbf6743f5039fe043 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RMassBank
 Type: Package
 Title: Workflow to process tandem MS files and build MassBank records
-Version: 1.99.7
+Version: 1.99.11
 Authors@R: c(
     person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
     role=c("cre")),
@@ -28,8 +28,8 @@ Depends:
     Rcpp
 Encoding: UTF-8
 Imports:
-    XML,RCurl,rjson,
-    rcdk,yaml,mzR,methods,Biobase,MSnbase,S4Vectors
+    XML,RCurl,rjson,S4Vectors,digest,
+    rcdk,yaml,mzR,methods,Biobase,MSnbase
 Suggests:
     gplots,RMassBankData,
     xcms (>= 1.37.1),
@@ -41,6 +41,7 @@ Collate:
     'alternateAnalyze.R'
     'createMassBank.R'
     'formulaCalculator.R'
+    'getSplash.R'
     'leCsvAccess.R'
     'leMsMs.r'
     'leMsmsRaw.R'
diff --git a/NAMESPACE b/NAMESPACE
index e2b792db594fda8fe65ad0f7d5483ef8faf69471..a589a8bf59e847da04320388d567b325827428df 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,6 +34,7 @@ export(filterPeaksMultiplicity)
 export(findCAS)
 export(findEIC)
 export(findFormula)
+export(findLevel)
 export(findMass)
 export(findMsMsHR)
 export(findMsMsHR.direct)
@@ -52,6 +53,7 @@ export(formulastring.to.list)
 export(gatherCompound)
 export(gatherData)
 export(gatherDataBabel)
+export(gatherDataUnknown)
 export(gatherPubChem)
 export(gatherSpectrum)
 export(getCSID)
@@ -129,6 +131,7 @@ import(RCurl)
 import(Rcpp)
 import(S4Vectors)
 import(XML)
+import(digest)
 import(methods)
 import(mzR)
 import(rcdk)
diff --git a/R/Isotopic_Annotation.R b/R/Isotopic_Annotation.R
index 11e21c539a6a3a23236878f6a3244d259b56afef..760f20875c303499a329ef97491729188736f5ee 100644
--- a/R/Isotopic_Annotation.R
+++ b/R/Isotopic_Annotation.R
@@ -4,6 +4,7 @@
 #' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
 #' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
 #' @param intensity_cutoff The cutoff (as an absolute intensity value) under which isotopic peaks shouldn't be checked for or accepted as valid.
+#'        Please note: The cutoff is not hard in the sense that it interacts with the intensity_precision parameter.
 #' @param intensity_precision The difference that is accepted between the calculated and observed intensity of a possible isotopic peak. Further details down below.
 #' @param conflict Either "isotopic"(Peak formulas are always chosen if they fit the requirements for an isotopic peak)
 #' 		  or "strict"(Peaks are only marked as isotopic when there hasn't been a formula assigned before.)
@@ -25,8 +26,8 @@
 #' @author Michael Stravs, Eawag <michael.stravs@@eawag.ch>
 #' @author Erik Mueller, UFZ
 #' @export
-checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_precision = "low", conflict = "dppm", 
-							isolationWindow = 4, evalMode = "complete", plotSpectrum = TRUE, settings = getOption("RMassBank")){
+checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 0, intensity_precision = "none", conflict = "strict", 
+							isolationWindow = 2, evalMode = "complete", plotSpectrum = TRUE, settings = getOption("RMassBank")){
 
 	# Load library and data
 	requireNamespace("enviPat",quietly=TRUE)
@@ -78,12 +79,6 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 		stop("mode = \"", mode, "\" not defined")
 	}
 	
-	if(!(evalMode %in% c("complete"))){
-	    stop('evalMode must currently be specified as "complete"')
-	}
-	
-	evalMode <- c("add","check")
-	
 	
 	# "default" isotopes (i.e. those with the highest abundance)
 	defIsotopes <- c("107Ag", "27Al", "40Ar", "75As", "197Au", "11B", "138Ba", "9Be", "209Bi",
@@ -98,7 +93,7 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 
 	
 	# Get the ppm limit from the settings
-	ppmlimit <- 2.25 * filterSettings$ppmFine
+	ppmlimit <- filterSettings$ppmFine
 	
 	# Get the cutoff from the settings
 	cut <- filterSettings$fineCut
@@ -118,12 +113,10 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 		id <- as.numeric(spec@id)
 		specNum <- 0
 		specEnv <- environment()
-
 		# lapply over all extracted MS2 spectra
 		lapply(spec@children, function(msmsdata){
 			
 			specEnv$specNum <- specEnv$specNum + 1
-			
 			# Extract currently relevant peaks
 			currentMPeaks <- matchedPeaks[(matchedPeaks$cpdID == id) & (matchedPeaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
 			currentUPeaks <- unmatchedPeaks[(unmatchedPeaks$cpdID == id) & (unmatchedPeaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
@@ -145,8 +138,10 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 			# Generate isotopic patterns of the matched peaks
 			# sort out possible isotopic peaks according to
 			# isolationwindow and intensity
-			isoMPatterns <- apply(currentMPeaks, 1, .findPattern, defIsotopes = defIsotopes, intensity_cutoff = intensity_cutoff, 
+			isoMPatterns <- lapply(1:nrow(currentMPeaks), function(index){
+                                .findPattern(currentMPeaks[index,,drop=FALSE], defIsotopes = defIsotopes, intensity_cutoff = intensity_cutoff, 
 									precisionVal = precisionVal, ppmlimit = ppmlimit, isolationWindow = isolationWindow)
+                            })
 			
 			# Name the isopatterns
 			names(isoMPatterns) <- currentMPeaks$formula
@@ -155,8 +150,9 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 			# Which isotope patterns still have theoretical intensities above the cutoff?
 			peaksToCheck <- which(as.logical(sapply(isoMPatterns,nrow)))
 			
+            # If there are no peaks left, then abort for this spectrum
 			if(!length(peaksToCheck)){
-				message(paste0("The peaks of compound ", id, " in spectrum #", specEnv$specNum," are not intense enough to search for isotopic peaks"))
+				message(paste0("The already annotated peaks of compound ", id, " in spectrum #", specEnv$specNum," are not intense enough to search for isotopic peaks"))
 				if(plotSpectrum){
 					plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
 				}
@@ -166,135 +162,60 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 			# Now, look for isotopic patterns in unmatched peaks with all specified parameters
 
 			# Which peaks have no formula annotation within the dppm as of now?
-			peaksNoAnnotation <- currentUPeaks[which(is.na(currentUPeaks$dppm) | abs(currentUPeaks$dppmBest) > ppmlimit/2.25),]
+			peaksNoAnnotation <- currentUPeaks[which(is.na(currentUPeaks$dppm) | abs(currentUPeaks$dppmBest) > ppmlimit),]
 			
+            # What is the mean of the dppm of the currently "OK" peaks?
+            # (Used for calculating the score parameter for intensities)
+            dppmMean <- mean(abs(currentMPeaks[currentMPeaks$filterOK,]$dppm))
+            
 			# If there are any peaks without annotation:
 			if(nrow(peaksNoAnnotation)){
-				UPList <- .findMatchingPeaks(peaksNoAnnotation,isoMPatterns[peaksToCheck])
+				UPList <- .findMatchingPeaks(peaksNoAnnotation,isoMPatterns[peaksToCheck], dppmMean)
 			} else{
 				# If there are no peaks, fake an empty dataset
-				UPList <- list(list(data.frame(dummy=character())),list(data.frame(dummy=character())))
-				
+				UPList <- list(matrix(character(0),0,29))
 			}
-			
-			# Generate matrix of peaks that should be added
-			additionMatrix <- .peakReasoner(UPList, currentMPeaks)
-			
-			# If conflict is "strict", end here
+            
+			# If conflict is "strict", end here (And plot, maybe)
 			if(conflict == "strict"){
-				if(nrow(additionMatrix)){
-					wEnv$w@aggregated[additionMatrix$index,] <- additionMatrix
-				}
-				
+                # Generate matrix of peaks that should be added
+                additionMatrix <- .peakReasoner(list(matrix(character(0),0,29)) , UPList, currentMPeaks)
 				if(plotSpectrum){
 					plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
 					if(nrow(additionMatrix)){
 						points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
-						# Add all the peaks that could be annotated as isotopic
-						wEnv$w@aggregated[additionMatrix$index,] <- additionMatrix
 					}
 				}
+                # If there is something in the matrix
+                if(nrow(additionMatrix)){
+                    # Add all the peaks that could be annotated as isotopic
+                    wEnv$w@aggregated[additionMatrix$index,] <- additionMatrix
+                }
 				return(0)
 			}
 			
 			
 			# Now check the matched peaks for the patterns and put all peaks in a matrix
-			MPList <- .findMatchingPeaks(currentMPeaks[currentMPeaks$filterOK,], isoMPatterns[peaksToCheck])
+			MPList <- .findMatchingPeaks(currentMPeaks[currentMPeaks$filterOK,], isoMPatterns[peaksToCheck], dppmMean)
 			
 			# Generate matrix of peaks that should be corrected
-			correctionMatrix <- .peakReasoner(MPList, currentMPeaks)
-		
-			IsoPeaksMindex   <- which(sapply(MPList, function(x) any(sapply(x,nrow))))
-			noIsoPeaksMindex <- which(!sapply(MPList, function(x) any(sapply(x,nrow))))
-			IsoPeaksUindex   <- which(sapply(UPList, function(x) any(sapply(x,nrow))))
-			noIsoPeaksUindex <- which(!sapply(UPList, function(x) any(sapply(x,nrow))))
-			
-			if(conflict=="isotopic"){
-			    if(nrow(correctionMatrix)){
-			        
-			        # Peaks that are changed but also seem to have isotopes themselves
-			        confPeaksIndex <- which(correctionMatrix$index %in% currentMPeaks$index[peaksToCheck[IsoPeaksMindex]])
-			        conflictedMatrix <- correctionMatrix[confPeaksIndex,,drop=FALSE]
-			        
-			        if(length(confPeaksIndex)){
-			            correctionMatrix <- correctionMatrix[-confPeaksIndex,]
-			        }
-			        
-			        
-			        if(nrow(correctionMatrix)){
-			            wEnv$w@aggregated[correctionMatrix$index,] <- correctionMatrix
-			        }
-			    }
-			    
-			    if(!("check" %in% evalMode)){
-			        if(plotSpectrum){
-			            plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
-			            if(nrow(additionMatrix)){
-			                points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
-			            }
-			            if(nrow(correctionMatrix)){
-			                points(correctionMatrix$mzFound, correctionMatrix$intensity,type="h", col="yellow", lwd=3)
-			            }
-			            if(nrow(conflictedMatrix)){
-			                points(conflictedMatrix$mzFound, conflictedMatrix$intensity,type="h", col="red", lwd=3)
-			            }
-			        }
-			        return(0)
-			    }
-			}
-			
-			if("check" %in% evalMode){
-
-			    if(plotSpectrum){
-			        plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), xlim=c(min(currentMPeaks$mzFound), max(currentMPeaks$mzFound)), col="black", xlab="m/z", ylab="intensity", lwd=3)
-			    }
-			    
-				indexList <- lapply(unique(currentMPeaks$mzFound[peaksToCheck]),function(mz){
-					which(currentMPeaks$mzFound[peaksToCheck] == mz)
-				})
-				
-				# Matched Peaks where no isotopes have been found
-				hasNoIsoIndex <- unlist(sapply(indexList,function(indices){
-					if(any(IsoPeaksMindex %in% indices) || any(IsoPeaksUindex %in% indices)){
-						return(vector())
-					} else{
-						return(currentMPeaks$index[peaksToCheck[indices]])
-					}
-				}))
-				
-				# Matched Peaks which should have isotopes and are no isotopes themselves
-				isNoIsoIndex <- currentMPeaks$index[peaksToCheck[
-					which(!(currentMPeaks$mzFound[peaksToCheck] %in% additionMatrix$mzFound) | !(currentMPeaks$mzFound[peaksToCheck] %in% correctionMatrix$mzFound))
-				]]
-				
-				noIsoPeaksMatrix <- wEnv$w@aggregated[intersect(hasNoIsoIndex,isNoIsoIndex),]
-				
-				
-				
-			    if(nrow(noIsoPeaksMatrix)){
-			        wEnv$w@aggregated[noIsoPeaksMatrix$index,]$good <- FALSE
-			        wEnv$w@aggregated[noIsoPeaksMatrix$index,]$filterOK <- FALSE
-			        wEnv$w@aggregated[noIsoPeaksMatrix$index,]$matchedReanalysis <- FALSE
-			        if(plotSpectrum){
-			            points(noIsoPeaksMatrix$mzFound, noIsoPeaksMatrix$intensity, type="h", col="orange", lwd=3)
-			        }
-			    }
-			    
-			    if("add" %in% evalMode && conflict=="isotopic" && plotSpectrum){
-			        if(nrow(additionMatrix)){
-			            points(additionMatrix$mzFound, additionMatrix$intensity,type="h", col="green", lwd=3)
-			        }
-			        if(nrow(correctionMatrix)){
-			            points(correctionMatrix$mzFound, correctionMatrix$intensity,type="h", col="yellow", lwd=3)
-						if(nrow(conflictedMatrix)){
-							points(conflictedMatrix$mzFound, conflictedMatrix$intensity,type="h", col="red", lwd=3)
-						}
-			        }
-
-			    }
-			}
-			return(0)
+			correctionMatrix <- .peakReasoner(MPList, UPList, currentMPeaks)
+            
+            
+            # If there is something in the matrix
+            if(nrow(correctionMatrix)){
+                # Add all the peaks that could be annotated as isotopic
+                wEnv$w@aggregated[correctionMatrix$index,] <- correctionMatrix
+            }
 			
+            # If the newly annotated peaks should be plotted, plot them
+            if(plotSpectrum){
+                plot(currentMPeaks$mzFound, currentMPeaks$intensity,type="h", main=paste(id,findName(id)), col="black", xlab="m/z", ylab="intensity", lwd=3)
+                if(nrow(correctionMatrix)){
+                    points(correctionMatrix$mzFound, correctionMatrix$intensity,type="h", col="green", lwd=3)
+                }
+            }
+            return(0)
 		})
 	})
 	return(w)
@@ -304,63 +225,18 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 # So modularize this function
 .findPattern <- function(aggregateRow, defIsotopes, intensity_cutoff, precisionVal, ppmlimit, isolationWindow){
 	
-	# Find pattern
-	capture.output(pattern <- as.data.frame(enviPat::isopattern(aggregateRow["formula"], isotopes = isotopes)[[1]]))
-	mass <- findMz.formula(as.character(aggregateRow["formula"]),"")$mzCenter
+	# Find pattern and mass
+	outp <- capture.output(pattern <- as.data.frame(enviPat::isopattern(aggregateRow[,"formula"], isotopes = isotopes)[[1]]))
+    rm("outp")
+	mass <- as.numeric(aggregateRow[,"mzCalc"])
 	
 	# Find index of monoisotopic molecule and
 	# normalize abundance that monoisotopic molecule has always "1"
 	mainIndex <- which.min(abs(pattern[,"m/z"]-mass))
 	pattern[,"abundance"] <- round(pattern[,"abundance"] * (100/pattern[mainIndex,"abundance"]),3)
 	
-	# Find all isotope atom names (only in colnames of patterns, sadly)
-	isoCols <- colnames(pattern)[3:ncol(pattern)]
-	
-	# Order the formulas to be in Hill system:
-	# First the Cs, then the Hs, then lexicographical
-	# First: Split isotope names so that there only are atoms
-	# Find position of C and H
-	splitNames <- gsub('^([0-9]+)(.+)$', '\\2', isoCols)
-	CHPos <- c(which(splitNames == "C"),which(splitNames == "H"))
-	
-	# Account for special case: No Cs or Hs
-	if(length(CHPos)){
-		# If there are Cs and Hs, overwrite the positions so they always get sorted to the front
-		splitNames[CHPos] <- ""
-	}
-	
-	# Default isotopes are always first in the colnames, so no need to order internally between isotopes
-	atomOrder <- order(splitNames)
-	
-	# If there are Cs and Hs, the order must be preserved
-	if(length(CHPos)){
-		atomOrder[1:length(CHPos)] <- CHPos
-	}
-	# else it is already ordered
-	
-	# Mark new names for formula creation.
-	newnames <- unname(sapply(isoCols, function(currentCol){
-		if(currentCol %in% defIsotopes){
-			# "Default" isotope (no isotope mass in formula, e.g. "C")
-			return(gsub('^(.{0})([0-9]+)(.+)$', '\\3', currentCol))
-		}else{
-			# Other isotopes (isotope mass in formula, e.g. "[13]C")
-			return(gsub('^(.{0})([0-9]+)(.+)$', '\\1[\\2]\\3', currentCol))
-		}
-	}))
-	
-	# Generate the formula for every pattern
-	pattern$formula <- apply(pattern,1,function(p){
-		paste0(sapply(atomOrder+2,function(isoIndex){
-			if(p[isoIndex] == 0){ 
-				return("")
-			}
-			if(p[isoIndex] == 1){
-				return(newnames[isoIndex-2])
-			}
-			return(paste0(newnames[isoIndex-2],p[isoIndex]))
-		}),collapse="")
-	})
+    # Add the formula of every isotope to every row in the pattern
+	pattern <- .annotateFormulaToEnviPatTable(pattern, defIsotopes)
 	
 	# Sort pattern by abundance
 	pattern <- pattern[order(pattern[,"abundance"],decreasing = T),][-mainIndex,]
@@ -378,18 +254,21 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 	
 	# Find the absolute intensity ranges
 	intensities <- apply(pattern, 1, function(patternRow){
-		as.numeric(aggregateRow["intensity"]) * as.numeric(patternRow["abundance"])/100
+		as.numeric(aggregateRow[,"intensity"]) * as.numeric(patternRow["abundance"])/100
 	})
 	
 	roundedInt <- round(intensities,digits=-2)
-	keepIntIndex <- which(roundedInt >= intensity_cutoff)
+	keepIntIndex <- which((roundedInt + roundedInt * precisionVal) >= intensity_cutoff)
 	pattern <- pattern[keepIntIndex,,drop=FALSE]
 	if(nrow(pattern)){
 		pattern$minintensity <- roundedInt[keepIntIndex] - roundedInt[keepIntIndex] * precisionVal
 		pattern$maxintensity <- roundedInt[keepIntIndex] + roundedInt[keepIntIndex] * precisionVal
-		
+        pattern$expectedintensity <- roundedInt[keepIntIndex]
+		pattern$monoisotopicFormula <- aggregateRow[,"formula"]
+        pattern$monoisotopicIndex <- aggregateRow[,"index"]
+        pattern$monoisotopicMz <- aggregateRow[,"mzFound"]
 		# Calculate the expected mz range of the isotope peak
-		mzCols <- t(sapply(pattern[,"m/z"], ppm, dppm=ppmlimit/2.25,l=T))
+		mzCols <- t(sapply(pattern[,"m/z"], ppm, dppm=ppmlimit,l=T))
 		colnames(mzCols) <- c("maxMZ","minMZ")
 		pattern <- cbind(pattern,mzCols)
 	}
@@ -397,7 +276,7 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 	return(pattern)
 }
 
-.findMatchingPeaks <- function(aggregatedPeakMatrix, isoPatterns){
+.findMatchingPeaks <- function(aggregatedPeakMatrix, isoPatterns, dppmMean){
 	# Only unique mzs (peaks can be annotated multiple times and will therefore be included multiple times)
 	# It doesn't matter which one, so take the first
 	aggregatedPeakMatrix <- aggregatedPeakMatrix[sapply(unique(aggregatedPeakMatrix$mzFound), function(mz){
@@ -406,90 +285,230 @@ checkIsotopes <- function(w, mode = "pH", intensity_cutoff = 1000, intensity_pre
 	
 	# Iterate over all supplied patterns
 	lapply(isoPatterns, function(pattern){
-		x <- list()
-		for(r in 1:nrow(pattern)){}
-		
-		apply(pattern, 1, function(patternRow){
+
+		do.call(rbind,lapply(1:nrow(pattern), function(index){
+            patternRow <- pattern[index,,drop=FALSE]
 			# Find peaks that fit the specified intensities and m/z
-			pIndex <- which(aggregatedPeakMatrix[,"mzFound"] < as.numeric(patternRow["maxMZ"])
-									& aggregatedPeakMatrix[,"mzFound"] > as.numeric(patternRow["minMZ"])
-									& aggregatedPeakMatrix[,"intensity"] < as.numeric(patternRow["maxintensity"])
-									& aggregatedPeakMatrix[,"intensity"] > as.numeric(patternRow["minintensity"])
+			pIndex <- which(aggregatedPeakMatrix[,"mzFound"] < patternRow[,"maxMZ"]
+									& aggregatedPeakMatrix[,"mzFound"] > patternRow[,"minMZ"]
+									& aggregatedPeakMatrix[,"intensity"] < patternRow[,"maxintensity"]
+									& aggregatedPeakMatrix[,"intensity"] > patternRow[,"minintensity"]
 			)
 			
 			# Note these Peaks
 			candidates <- aggregatedPeakMatrix[pIndex,]
 			# If there are any: Change parameters in the aggregated matrix
 			if(nrow(candidates)){
-				candidates$dppm <- (candidates$mzFound /  as.numeric(patternRow["m/z"]) - 1) * 1e6
-				candidates$mzCalc <-  as.numeric(patternRow["m/z"])
-				candidates$formula <-  patternRow["formula"]
+                # General parameters that need to be changed
+				candidates$dppm <- (candidates$mzFound /  as.numeric(patternRow[,"m/z"]) - 1) * 1e6
+				candidates$mzCalc <- patternRow[,"m/z"]
+				candidates$formula <- patternRow[,"formula"]
 				candidates$matchedReanalysis <- NA
 				candidates$filterOK <- TRUE
 				candidates$good <- TRUE
 				candidates$dppmBest <- candidates$dppm
 				candidates$formulaCount <- 1
-			}
+                # New parameters (are used to make peak reasoning easier, we would
+                # need to find them out later anyways, so do it now when it's easy)
+                
+                # 1) The monoisotopic formula of the now added peak
+                candidates$monoisotopicFormula <- patternRow[,"monoisotopicFormula"]
+                
+                # 2) The calculated expected intensity (added for debug reasons)
+                candidates$intCalc <- patternRow[,"expectedintensity"]
+                
+                # 3) Produce "scores" in the range of dppm and scales nicely
+                #candidates$dint <- (max(as.numeric(candidates$intensity),as.numeric(patternRow["expectedintensity"]))/min(as.numeric(candidates$intensity),as.numeric(patternRow["expectedintensity"])))^2
+                maxInt <- max(as.numeric(candidates$intensity),as.numeric(patternRow[,"expectedintensity"]))
+                minInt <- min(as.numeric(candidates$intensity),as.numeric(patternRow[,"expectedintensity"]))
+
+                candidates$dint  <- (maxInt/minInt) * dppmMean
+                candidates$monoisotopicIndex <- patternRow[,"monoisotopicIndex"]
+                
+                candidates$monoisotopicMz <- as.numeric(patternRow[,"monoisotopicMz"])
+			} else{
+                candidates$monoisotopicFormula <- character(0)
+                candidates$intCalc <- numeric(0)
+                candidates$dint <- numeric(0)
+                candidates$monoisotopicIndex <- integer(0)
+                candidates$monoisotopicMz <- numeric(0)
+            }
 			return(candidates)
-		})
+		}))
 	})
 }
 
-.peakReasoner <- function(adjustmentList, aggregatedPeakMatrix){
-				
-	adjustmentMatrix <- do.call(rbind, unlist(adjustmentList,recursive=FALSE))
-	
-	if(!as.logical(nrow(adjustmentMatrix))){
+.peakReasoner <- function(MPList, UPList, currentMPeaks){
+	
+    # Unlist the possible isotope peak lists for the previously matched and previously unmatched peaks
+	adjustmentMatrix <- rbind(do.call(rbind, MPList),do.call(rbind, UPList))
+	
+    # Remove peaks with a much too high dint
+    dintRemoval <- which(adjustmentMatrix$dint>50)
+    if(as.logical(length(dintRemoval))){
+        adjustmentMatrix <-  adjustmentMatrix[-which(adjustmentMatrix$dint>50),]
+    }
+    # No isotopes found, abort
+    if(!as.logical(nrow(adjustmentMatrix))){
 		return(adjustmentMatrix)
 	}
+    
+    # Now we need to remove rows from this matrix, so that 4 conditions are fulfilled:
+    # a) Values in the mzFound can not occur in monoisotopicMz (because an isotope can't be monoisotopic)
+    # b) No monoisotopicMz can have more than one index assigned (because this would mean a peak has 2 formulas)
+    # c) All unique mzFound can only be included once
+    # d) The number of indices must be minimal (it is highly likely that the monoisotopic peak with the most 
+    # isotope peaks is correctly annotated)
+
+    uniqueMonoMz <- unique(adjustmentMatrix$monoisotopicMz)
+    uniqueMzFound <- unique(adjustmentMatrix$mzFound)
+    
+    checkAllMzFound <- function(){
+        all(uniqueMzFound %in% adjustmentMatrix$mzFound) && all(table(adjustmentMatrix$mzFound) == 1)
+    }
+    
+    # Condition a) Go through all monoisotopicMz
+    # and remove those that happen to be in mzFound
+    removalIndex <- which(adjustmentMatrix$monoisotopicMz %in% uniqueMzFound)
+    if(length(removalIndex)){
+        adjustmentMatrix <- adjustmentMatrix[-removalIndex,]
+    }
+    
+    
+    # Condition b) and d)
+    for(monoMz in uniqueMonoMz){
+        monoMzRows <- which(adjustmentMatrix$monoisotopicMz == monoMz)
+        
+        # If there are different indices, use the one that appears most often, else there is no problem
+        if(!all(adjustmentMatrix$monoisotopicIndex[monoMzRows] == adjustmentMatrix$monoisotopicIndex[monoMzRows][1])){
+            # Store all indices that occur equally as often and most often
+            maxIndex <- vector()
+            maxlength <- 0
+            for(index in unique(adjustmentMatrix$monoisotopicIndex[monoMzRows])){
+                currlength <- length(which(adjustmentMatrix$monoisotopicIndex[monoMzRows] == index))
+                if(currlength > maxlength){
+                    maxlength <- currlength
+                    maxIndex <- index
+                }
+                if(currlength == maxlength){
+                    maxlength <- currlength
+                    if(!(index %in% maxIndex)){
+                        maxIndex <-  c(maxIndex,index)
+                    }
+                }
+            }
+            
+            # One index appears most often, so use that one
+            if(length(maxIndex) == 1){
+                monoMzRows <- monoMzRows[-which(adjustmentMatrix$monoisotopicIndex[monoMzRows] == maxIndex)]
+                adjustmentMatrix <- adjustmentMatrix[-monoMzRows,]
+            } else { # Sometimes different indices appear equally as often and most often, use scoring metric
+                bestscore <- 1000
+                bestIndex <- 0
+                # Go through all possible indices
+                for(conflictIndex in maxIndex){
+                    # Calculate Score and note the best score
+                    score <- mean(apply(abs(adjustmentMatrix[which(adjustmentMatrix$monoisotopicIndex == conflictIndex),c("dint","dppm")]),1,sum))
+                    if(score < bestscore){
+                        bestscore <- score
+                        bestIndex <- conflictIndex
+                    }
+                }
+                # Throw out all indices that don't have the best average score
+                monoMzRows <- monoMzRows[-which(adjustmentMatrix$monoisotopicIndex[monoMzRows] == bestIndex)]
+                adjustmentMatrix <- adjustmentMatrix[-monoMzRows,]
+            }
+        }
+    }
+    
+    # Everything should be fine, but if it is not
+    # some peaks have the same monoisotopicMz but not the same Formula,
+    # i.e. one monoisotopic peak has 2 isotope formulas that fit the
+    # same peak
+    if(length(unique(adjustmentMatrix$mzFound)) != length(adjustmentMatrix$mzFound)){
+        occurrences <- as.data.frame(table(adjustmentMatrix$mzFound))
+        for(value in occurrences[which(occurrences[,2] > 1),1]){
+            rows <- which(adjustmentMatrix$mzFound == value)
+            scores <- apply(adjustmentMatrix[rows,c("dint","dppm")],1,sum)
+            notMinRows <- rows[-which.min(scores)]
+            adjustmentMatrix <- adjustmentMatrix[-notMinRows,]
+        }
+    }
+    
+    # Now: Check for every peak if the monoisotopic peak is already the peak that is passed
+    # else change the peak that gets passed
+    problemPeaks <- which(!sapply(adjustmentMatrix$monoisotopicIndex, function(index) currentMPeaks$filterOK[which(currentMPeaks$index == index)]))
+    problemMz <- adjustmentMatrix$monoisotopicMz[problemPeaks]
+    correctIndex <- adjustmentMatrix$monoisotopicIndex[problemPeaks]
+    
+    adjustmentMatrix <- adjustmentMatrix[,-(25:29)]
+    
+    if(as.logical(length(problemMz))){
+        # Go through every mz and change the peak to the correct one
+        for(p in 1:length(problemMz)){
+            addPeak <- currentMPeaks[which(currentMPeaks$index == correctIndex[p]),,drop=FALSE]
+            addPeak$filterOK <- TRUE
+            removePeak <- currentMPeaks[which((currentMPeaks$mzFound == problemMz[p]) & currentMPeaks$filterOK),,drop=FALSE]
+            removePeak$filterOK <- FALSE
+            adjustmentMatrix <- rbind(adjustmentMatrix,addPeak,removePeak)
+        }
+    }
+    
+    return(adjustmentMatrix)
+}
+
+
+# Further modularization of formula addition
+# Is essentially one task, so this makes it more understandable
+.annotateFormulaToEnviPatTable <- function(pattern, defIsotopes){
+    # Find all isotope atom names (only in colnames of patterns, sadly)
+	isoCols <- colnames(pattern)[3:ncol(pattern)]
 	
-	# Peaks can turn up multiple times, take the one that actually fits the "base" formula,
-	# i.e. that of the peak that is actually written into the record
-	
-	# For each possible parent peak: Find out which peaks have possible children
-	# and how many, group these peaks (by adding a column to the adjustmentMatrix)
-	# Note: Grouping happens here, because we also need to split the peaks by mz
-	summaryList <- lapply(adjustmentList, function(x) sapply(x,nrow))
-	groupLengths <- unlist(lapply(summaryList, function(entry){
-		if(all(entry == 0)) return(vector()) else return(length(which(entry > 0)))
-	}))
-	adjustmentMatrix$group <- unlist(lapply(1:length(groupLengths),function(i) rep(i,groupLengths[i])))
+	# Order the formulas to be in Hill system:
+	# First the Cs, then the Hs, then lexicographical
+	# First: Split isotope names so that there only are atoms
+	# Find position of C and H
+	splitNames <- gsub('^([0-9]+)(.+)$', '\\2', isoCols)
+	CHPos <- c(which(splitNames == "C"),which(splitNames == "H"))
 	
-	# Technically, you only need to look at one peak in each group and can adjust the rest alongside
-	# Since we only want to check if the parent formula is currently passed through
-	groupAdjustmentMatrix <- adjustmentMatrix[sapply(1:length(groupLengths),function(i) which(adjustmentMatrix$group == i)[1]),]
+	# Account for special case: No Cs or Hs
+	if(length(CHPos)){
+		# If there are Cs and Hs, overwrite the positions so they always get sorted to the front
+		splitNames[CHPos] <- ""
+	}
 	
-	# Extract the parent formulas of the isotopic formula groups
-	parentFormulas <- names(groupLengths)
+	# Default isotopes are always first in the colnames, so no need to order internally between isotopes
+	atomOrder <- order(splitNames)
 	
-	# Find out which of the possible found isotopes formulas are competing (by checking which have the same mz)
-	indexList <- lapply(unique(groupAdjustmentMatrix$mzFound),function(mz){
-		which(groupAdjustmentMatrix$mzFound == mz)
-	})
+	# If there are Cs and Hs, the order must be preserved
+	if(length(CHPos)){
+		atomOrder[1:length(CHPos)] <- CHPos
+	}
+	# else it is already ordered
 	
-	# For every set of indices, find the parent formula in the aggregated matrix
-	return(do.call(rbind, lapply(indexList, function(indices){
-		# All peaks which fit the parent formulas
-		cutAggregateMatrix <- aggregatedPeakMatrix[which(aggregatedPeakMatrix$formula %in% parentFormulas[indices]),,drop=FALSE]
-		correctParentIndex <- which(cutAggregateMatrix$filterOK == TRUE)
-		if(length(correctParentIndex)){
-			# If there is a Peak that will be passed and fits one of the parent formulas, take that Peak and all belonging to that group
-			# Also, drop the group column
-			correctGroup <- groupAdjustmentMatrix$group[indices[correctParentIndex]]
-			returnMatrix <- adjustmentMatrix[which(adjustmentMatrix$group == correctGroup),-25,drop=FALSE]
-			return(returnMatrix)
-		} else{
-			# Else calculate the Peak-isotope relationship with the lowest average dppm
-			# And correct the Peak that currently has filterOK to not have it
-			correctParentIndex <- which.min((abs(cutAggregateMatrix$dppm) + abs(adjustmentMatrix[indices,]$dppm))/2)
-			correctionLine <- aggregatedPeakMatrix[which(aggregatedPeakMatrix$mzFound == groupAdjustmentMatrix$mzFound[indices[1]] & aggregatedPeakMatrix$filterOK),,drop=FALSE]
-			correctionLine$filterOK <- FALSE
-			correctGroup <- groupAdjustmentMatrix$group[indices[correctParentIndex]]
-			returnMatrix <- adjustmentMatrix[which(adjustmentMatrix$group == correctGroup),-25,drop=FALSE]
-			return(rbind(
-				returnMatrix,
-				correctionLine
-			))
+	# Mark new names for formula creation.
+	newnames <- unname(sapply(isoCols, function(currentCol){
+		if(currentCol %in% defIsotopes){
+			# "Default" isotope (no isotope mass in formula, e.g. "C")
+			return(gsub('^(.{0})([0-9]+)(.+)$', '\\3', currentCol))
+		}else{
+			# Other isotopes (isotope mass in formula, e.g. "[13]C")
+			return(gsub('^(.{0})([0-9]+)(.+)$', '\\1[\\2]\\3', currentCol))
 		}
-	})))
-}
\ No newline at end of file
+	}))
+	
+	# Generate the formula for every pattern
+	pattern$formula <- apply(pattern,1,function(p){
+		paste0(sapply(atomOrder+2,function(isoIndex){
+			if(p[isoIndex] == 0){ 
+				return("")
+			}
+			if(p[isoIndex] == 1){
+				return(newnames[isoIndex-2])
+			}
+			return(paste0(newnames[isoIndex-2],p[isoIndex]))
+		}),collapse="")
+	})
+    
+    return(pattern)
+}
diff --git a/R/alternateAnalyze.R b/R/alternateAnalyze.R
index c708634b66a65d8701ced4c6626d3d042e4b70dc..15f7d582044e853b2650de8ad67a4e47ec90e915 100644
--- a/R/alternateAnalyze.R
+++ b/R/alternateAnalyze.R
@@ -32,11 +32,7 @@ newStep2WorkFlow <- function(w, mode="pH",
                              confirmMode=FALSE, progressbar = "progressBarHook", 
                              settings = getOption("RMassBank"), analyzeMethod="formula", fragdataFile = NA){
   ##Load the fragment data (locally or from RMassBank package)
-  if(is.na(fragdataFile)){
-    fragData <- nfragData
-  } else{
-    fragData <- read.csv(fragdataFile,colClasses = c("character","numeric"))
-  }
+  fragData <- read.csv(fragdataFile,colClasses = c("character","numeric"))
   
   ##Progress bar
   nLen <- length(w@files)
@@ -95,41 +91,41 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
                                 filterSettings = getOption("RMassBank")$filterSettings,
                                 spectraList = getOption("RMassBank")$spectraList, fragData, fragDataIndex)
 {
-  cut <- 0
-  cut_ratio <- 0
-  if(run=="preliminary")
-  {
-    mzColname <- "mz"
-    filterMode <- "coarse"
-    cut <- filterSettings$prelimCut
-    if(is.na(cut))
-    {
-      if(mode %in% c("pH", "pM", "pNa", "pNH4"))
-        cut <- 1e4
-      else if(mode %in% c("mH", "mFA","mM"))
-        cut <- 0
-      else stop(paste("The ionization mode", mode, "is unknown."))
-    }
-    cutRatio <- filterSettings$prelimCutRatio
-  } else{
-    mzColname <- "mzRecal"
-    filterMode <- "fine"
-    cut <- filterSettings$fineCut
-    cut_ratio <- filterSettings$fineCutRatio
-    if(is.na(cut)) cut <- 0
-  }
-  
-  # find whole spectrum of parent peak, so we have reasonable data to feed into
-  # MolgenMsMs
-  parentSpectrum <- msmsPeaks$parentPeak
+	cut <- 0
+	cut_ratio <- 0
+	if(run=="preliminary")
+	{
+		mzColname <- "mz"
+		filterMode <- "coarse"
+		cut <- filterSettings$prelimCut
+		if(is.na(cut))
+		{
+			if(mode %in% c("pH", "pM", "pNa", "pNH4"))
+			cut <- 1e4
+			else if(mode %in% c("mH", "mFA","mM"))
+			cut <- 0
+			else stop(paste("The ionization mode", mode, "is unknown."))
+		}
+		cutRatio <- filterSettings$prelimCutRatio
+	} else{
+		mzColname <- "mzRecal"
+		filterMode <- "fine"
+		cut <- filterSettings$fineCut
+		cut_ratio <- filterSettings$fineCutRatio
+		if(is.na(cut)) cut <- 0
+	}
   
-  
-  # Check whether the spectra can be fitted to the spectra list correctly!
-  if(nrow(msmsPeaks$childHeaders) != length(spectraList))
-  {
-    warning(paste0(
-      "The spectra count of the substance ", msmsPeaks$id, " (", nrow(msmsPeaks$childHeaders), " spectra) doesn't match the provided spectra list (", length(spectraList), " spectra)."
-    ))
+	# find whole spectrum of parent peak, so we have reasonable data to feed into
+	# MolgenMsMs
+	parentSpectrum <- msmsPeaks$parentPeak
+
+
+	# Check whether the spectra can be fitted to the spectra list correctly!
+	if(nrow(msmsPeaks$childHeaders) != length(spectraList))
+	{
+		warning(paste0("The spectra count of the substance ", msmsPeaks$id, " (", nrow(msmsPeaks$childHeaders), " spectra) doesn't match the provided spectra list (", 
+						length(spectraList), " spectra).")
+	)
     return(list(specOK=FALSE))
     
   }
@@ -240,52 +236,52 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
     
     if(usePrecalcData){
       
-      # if yes, split the peaks into 2 sets: Those which can be identified using the
-      # fragment data, and those which can (by maximum fragment Data m/z - 0.5)
-      
-      precalcIndex <- which(shot[,mzColname] < (max(newfragData$mass)-0.5))
-      smallShots <- shot[precalcIndex,]
-      bigShots <- shot[-precalcIndex,]
-      
-      # for smaller peaks use the precalculated fragment data
-      if(nrow(smallShots)){
-        system.time(peakmatrixSmall <- lapply(smallShots[,mzColname],function(mass){
-          mass.calc <- mass + mode.charge * .emass
-          maxminMass <- ppm(mass.calc, ppmlimit, l=TRUE)
-          # retrieve only possibly relevant rows of the fragment Data (use the index)
-          indices <- newfragDataIndex[floor(maxminMass[2]*100)]:newfragDataIndex[ceiling(maxminMass[1]*100)]
-          
-          fragmentedFragmentData <- newfragData[indices,]
-          
-          # narrow it down further using the ppm
-          fragmentedFragmentData <- fragmentedFragmentData[which(fragmentedFragmentData$mass > maxminMass[2] & fragmentedFragmentData $mass < maxminMass[1]),]
-          
-          # return nothing if the narrowed down data is empty at this point
-          if(!nrow(fragmentedFragmentData)){
-            return(t(c(mzFound=as.numeric(as.character(mass)),
-                       formula=NA, mzCalc=NA)))
-          }
-          
-          
-          # find out if the narrowed down fragments have a sub-formula of the parentformula
-          # return the indexes of relevant formulas
-          fragmentedFragmentData <- fragmentedFragmentData[which(sapply(fragmentedFragmentData$formula, function(currentFormula) is.sub.formula(currentFormula,parentAtomList))),]
-          
-          # return nothing if the narrowed down data is empty at this point
-          if(!nrow(fragmentedFragmentData)){
-            return(t(c(mzFound=as.numeric(as.character(mass)),
-                       formula=NA, mzCalc=NA)))
-          }
-          
-          return(t(sapply(1:nrow(fragmentedFragmentData), function(f)
-          {
-            mzCalc <- fragmentedFragmentData$mass[f] - mode.charge * .emass 
-            c(mzFound=as.numeric(as.character(mass)),
-              formula=fragmentedFragmentData$formula[f], 
-              mzCalc=mzCalc)
-          })))
-          
-        }))
+		# if yes, split the peaks into 2 sets: Those which can be identified using the
+		# fragment data, and those which can't (by maximum fragment Data m/z - 0.5)
+
+		precalcIndex <- which(shot[,mzColname] < (max(newfragData$mass)-0.5))
+		smallShots <- shot[precalcIndex,]
+		bigShots <- shot[-precalcIndex,]
+
+		# for smaller peaks use the precalculated fragment data
+		if(nrow(smallShots)){
+		peakmatrixSmall <- lapply(smallShots[,mzColname],function(mass){
+			mass.calc <- mass + mode.charge * .emass
+			maxminMass <- ppm(mass.calc, ppmlimit, l=TRUE)
+			# retrieve only possibly relevant rows of the fragment Data (use the index)
+			indices <- newfragDataIndex[floor(maxminMass[2]*100)]:newfragDataIndex[ceiling(maxminMass[1]*100)]
+			
+			fragmentedFragmentData <- newfragData[indices,]
+			
+			# narrow it down further using the ppm
+			fragmentedFragmentData <- fragmentedFragmentData[which(fragmentedFragmentData$mass > maxminMass[2] & fragmentedFragmentData $mass < maxminMass[1]),]
+			
+			# return nothing if the narrowed down data is empty at this point
+			if(!nrow(fragmentedFragmentData)){
+				return(t(c(mzFound=as.numeric(as.character(mass)),
+						formula=NA, mzCalc=NA)))
+			}
+			
+			
+			# find out if the narrowed down fragments have a sub-formula of the parentformula
+			# return the indexes of relevant formulas
+			fragmentedFragmentData <- fragmentedFragmentData[which(sapply(fragmentedFragmentData$formula, function(currentFormula) is.sub.formula(currentFormula,parentAtomList))),]
+			
+			# return nothing if the narrowed down data is empty at this point
+			if(!nrow(fragmentedFragmentData)){
+			return(t(c(mzFound=as.numeric(as.character(mass)),
+						formula=NA, mzCalc=NA)))
+			}
+			
+			return(t(sapply(1:nrow(fragmentedFragmentData), function(f)
+			{
+			mzCalc <- fragmentedFragmentData$mass[f] - mode.charge * .emass 
+			c(mzFound=as.numeric(as.character(mass)),
+				formula=fragmentedFragmentData$formula[f], 
+				mzCalc=mzCalc)
+			})))
+			
+        })
       } else{
         peakmatrixSmall <- list()
       }
@@ -323,31 +319,30 @@ analyzeMsMs.formula.optimized <- function(msmsPeaks, mode="pH", detail=FALSE, ru
       peakmatrix <- c(peakmatrixSmall,peakmatrixBig)
       
     } else{
-        system.time(peakmatrix <- lapply(shot[,mzColname], function(mass){
-          # Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
-          # finally back-correct calculated masses for the charge
-          mass.calc <- mass + mode.charge * .emass
-          peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE), 
-                                                   limits, charge=0)), error=function(e) NA)
-          #peakformula <- tryCatch( 
-          #  generate.formula(mass, 
-          #                   ppm(mass, ppmlimit, p=TRUE),
-          #                   limits, charge=1),
-          #error= function(e) list())
-          if(!is.list(peakformula)){
-            return(t(c(mzFound=as.numeric(as.character(mass)),
-                       formula=NA, mzCalc=NA)))
-          }else
-          {
-            return(t(sapply(peakformula, function(f)
-            {
-              mzCalc <- f@mass - mode.charge * .emass 
-              c(mzFound=mass,
-                formula=f@string, 
-                mzCalc=mzCalc)
-            })))
-          }
-        }))
+			peakmatrix <- lapply(shot[,mzColname], function(mass){
+			# Circumvent bug in rcdk: correct the mass for the charge first, then calculate uncharged formulae
+			# finally back-correct calculated masses for the charge
+			mass.calc <- mass + mode.charge * .emass
+			peakformula <- tryCatch(suppressWarnings(generate.formula(mass.calc, ppm(mass.calc, ppmlimit, p=TRUE), 
+												   limits, charge=0)), error=function(e) NA)
+			#peakformula <- tryCatch( 
+			#  generate.formula(mass, 
+			#                   ppm(mass, ppmlimit, p=TRUE),
+			#                   limits, charge=1),
+			#error= function(e) list())
+			if(!is.list(peakformula)){
+				return(t(c(mzFound=as.numeric(as.character(mass)),
+						formula=NA, mzCalc=NA)))
+			}else{
+				return(t(sapply(peakformula, function(f)
+				{
+					mzCalc <- f@mass - mode.charge * .emass 
+					c(mzFound=mass,
+					formula=f@string, 
+					mzCalc=mzCalc)
+				})))
+			}
+        })
     }
     
     childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
diff --git a/R/createMassBank.R b/R/createMassBank.R
index a3e1c1f7ce282adc35535a8581a25baa85eed07d..cc95afd8850ac418d31a64643b0a4c581b1c5080 100755
--- a/R/createMassBank.R
+++ b/R/createMassBank.R
@@ -162,7 +162,7 @@ resetInfolists <- function(mb)
 #' @param mb The \code{mbWorkspace} to work in.
 #' @param gatherData A variable denoting whether to retrieve information using several online databases \code{gatherData= "online"}
 #' or to use the local babel installation \code{gatherData= "babel"}. Note that babel is used either way, if a directory is given 
-#' in the settings
+#' in the settings. This setting will be ignored if retrieval is set to "standard"
 #' @return The processed \code{mbWorkspace}.
 #' @seealso \code{\link{mbWorkspace-class}}
 #' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
@@ -175,110 +175,109 @@ resetInfolists <- function(mb)
 #' @export
 mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.csv", gatherData = "online")
 {
-  # Step 1: Find which compounds don't have annotation information yet. For these
-  # compounds, pull information from CTS (using gatherData).
-  if(1 %in% steps)
-  {
-      mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id)
-	  if(gatherData == "online"){
-		message("mbWorkflow: Step 1. Gather info from several databases")
-	  } 
-	  if(gatherData == "babel"){
-		message("mbWorkflow: Step 1. Gather info using babel")
-	  }
-      # Which IDs are not in mbdata_archive yet?
-      new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id)
-      mb@mbdata <- lapply(new_ids, function(id) 
-      {
-        #print(id)
-		if(gatherData == "online"){
-			d <- gatherData(id)
-		} 
-		if(gatherData == "babel"){
-			d <- gatherDataBabel(id)
-		}
-        #print(d$dataused)
-		message(paste(id, ": ", d$dataused, sep=''))
-        return(d)
-      })
-  }
-  # Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
-  # Otherwise, continue!
-  if(2 %in% steps)
-  {
-	message("mbWorkflow: Step 2. Export infolist (if required)")
-    if(length(mb@mbdata)>0)
+    # Step 1: Find which compounds don't have annotation information yet. For these
+    # compounds, pull information from CTS (using gatherData).
+    if(1 %in% steps)
     {
-      mbdata_mat <- flatten(mb@mbdata)
-      write.csv(as.data.frame(mbdata_mat),infolist_path, na="")
-      message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder.")
-            )
-      return(mb)
+        mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id)
+        message("mbWorkflow: Step 1. Gather info from several databases")
+        # Which IDs are not in mbdata_archive yet?
+        new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id)
+        mb@mbdata <- lapply(new_ids, function(id) 
+        {
+            if(findLevel(id,TRUE) == "standard"){
+                if(gatherData == "online"){
+                    
+                    d <- gatherData(id)
+                } 
+                if(gatherData == "babel"){
+                    # message("mbWorkflow: Step 1. Gather info using babel")
+                    d <- gatherDataBabel(id)
+                }
+            } else{
+                # message("mbWorkflow: Step 1. Gather no info - Unknown structure")
+                d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE))
+            }
+            message(paste(id, ": ", d$dataused, sep=''))
+            return(d)
+        })
     }
-    else
-      message("No new data added.")
-  }
-  # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
-  if(3 %in% steps)
-  {
-	message("mbWorkflow: Step 3. Data reformatting")
-    mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata)
-  }
-  # Step 4: Compile the spectra! Using the skeletons from the archive data, create
-  # MassBank records per compound and fill them with peak data for each spectrum.
-  # Also, assign accession numbers based on scan mode and relative scan no.
-  if(4 %in% steps)
-  {
-	  message("mbWorkflow: Step 4. Spectra compilation")
-	  mb@compiled <- lapply(
-			  selectSpectra(mb@spectra, "found", "object"),
-			  function(r) {
-				  message(paste("Compiling: ", r@name, sep=""))
-				  mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
-				  if(nrow(mb@additionalPeaks) > 0)
-					  res <-compileRecord(r, mbdata, mb@aggregated, mb@additionalPeaks)
-				  else
-					  res <-compileRecord(r, mbdata, mb@aggregated, NULL)
-				  return(res)
-			  })
-	  # check which compounds have useful spectra
-	  mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
-	  mb@problems <- which(is.na(mb@compiled))
-	  mb@compiled_ok <- mb@compiled[mb@ok]
-  }
-  # Step 5: Convert the internal tree-like representation of the MassBank data into
-  # flat-text string arrays (basically, into text-file style, but still in memory)
-  if(5 %in% steps)
-  {
-	message("mbWorkflow: Step 5. Flattening records")
-    mb@mbfiles <- lapply(mb@compiled_ok, function(c) lapply(c, toMassbank))
-  }
-  # Step 6: For all OK records, generate a corresponding molfile with the structure
-  # of the compound, based on the SMILES entry from the MassBank record. (This molfile
-  # is still in memory only, not yet a physical file)
-  if(6 %in% steps)
-  {
-	message("mbWorkflow: Step 6. Generate molfiles")
-    mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(c[[1]][["CH$SMILES"]]))
-  }
-  # Step 7: If necessary, generate the appropriate subdirectories, and actually write
-  # the files to disk.
-  if(7 %in% steps)
-  {
-	message("mbWorkflow: Step 7. Generate subdirs and export")
-    dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "moldata", sep='/'),recursive=TRUE)
-    dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "recdata", sep='/'),recursive=TRUE)
-    for(cnt in 1:length(mb@compiled_ok))
-      exportMassbank(mb@compiled_ok[[cnt]], mb@mbfiles[[cnt]], mb@molfile[[cnt]])    
-  }
-  # Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
-  # to attribute substances to their corresponding structure molfiles.
-  if(8 %in% steps)
-  {
-	message("mbWorkflow: Step 8. Create list.tsv")
-    makeMollist(mb@compiled_ok)
-  }
-  return(mb)
+    # Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
+    # Otherwise, continue!
+    if(2 %in% steps)
+    {
+        message("mbWorkflow: Step 2. Export infolist (if required)")
+        if(length(mb@mbdata)>0)
+        {
+            mbdata_mat <- flatten(mb@mbdata)
+            write.csv(as.data.frame(mbdata_mat),infolist_path, na="")
+            message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder."))
+            return(mb)
+        }
+        else
+            message("No new data added.")
+    }
+    # Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
+    if(3 %in% steps)
+    {
+        message("mbWorkflow: Step 3. Data reformatting")
+        mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata)
+    }
+    # Step 4: Compile the spectra! Using the skeletons from the archive data, create
+    # MassBank records per compound and fill them with peak data for each spectrum.
+    # Also, assign accession numbers based on scan mode and relative scan no.
+    if(4 %in% steps)
+    {
+        message("mbWorkflow: Step 4. Spectra compilation")
+        mb@compiled <- lapply(
+                selectSpectra(mb@spectra, "found", "object"),
+                function(r) {
+                    message(paste("Compiling: ", r@name, sep=""))
+                    mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
+                    if(nrow(mb@additionalPeaks) > 0)
+                        res <-compileRecord(r, mbdata, mb@aggregated, mb@additionalPeaks)
+                    else
+                        res <-compileRecord(r, mbdata, mb@aggregated, NULL, retrieval=findLevel(r@id,TRUE))
+                    return(res)
+                })
+        # check which compounds have useful spectra
+        mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
+        mb@problems <- which(is.na(mb@compiled))
+        mb@compiled_ok <- mb@compiled[mb@ok]
+    }
+    # Step 5: Convert the internal tree-like representation of the MassBank data into
+    # flat-text string arrays (basically, into text-file style, but still in memory)
+    if(5 %in% steps)
+    {
+        message("mbWorkflow: Step 5. Flattening records")
+        mb@mbfiles <- lapply(mb@compiled_ok, function(c) lapply(c, toMassbank))
+    }
+    # Step 6: For all OK records, generate a corresponding molfile with the structure
+    # of the compound, based on the SMILES entry from the MassBank record. (This molfile
+    # is still in memory only, not yet a physical file)
+    if(6 %in% steps)
+    {
+        message("mbWorkflow: Step 6. Generate molfiles")
+        mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c[[1]][['COMMENT']][[getOption("RMassBank")$annotations$internal_id_fieldname]])))
+    }
+    # Step 7: If necessary, generate the appropriate subdirectories, and actually write
+    # the files to disk.
+    if(7 %in% steps)
+    {
+        message("mbWorkflow: Step 7. Generate subdirs and export")
+        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "moldata", sep='/'),recursive=TRUE)
+        dir.create(paste(getOption("RMassBank")$annotations$entry_prefix, "recdata", sep='/'),recursive=TRUE)
+        for(cnt in 1:length(mb@compiled_ok))
+            exportMassbank(mb@compiled_ok[[cnt]], mb@mbfiles[[cnt]], mb@molfile[[cnt]])    
+    }
+    # Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
+    # to attribute substances to their corresponding structure molfiles.
+    if(8 %in% steps)
+    {
+        message("mbWorkflow: Step 8. Create list.tsv")
+        makeMollist(mb@compiled_ok)
+    }
+    return(mb)
 }
 
 
@@ -318,11 +317,15 @@ createMolfile <- function(id_or_smiles, fileName = FALSE)
 {
 	.checkMbSettings()
 	babeldir <- getOption("RMassBank")$babeldir
-
-	if(!is.numeric(id_or_smiles))
+    
+	if(!is.numeric(id_or_smiles)){
 		smiles <- id_or_smiles
-	else
-		smiles <- findSmiles(id_or_smiles)
+    } else{
+        if(findLevel(id_or_smiles,TRUE) != "standard"){
+            return(c(" ","$$$$"))
+        }
+        smiles <- findSmiles(id_or_smiles)
+    }
     # if no babeldir was set, get the result from cactus.
 	if(is.na(babeldir))
 	{
@@ -348,7 +351,8 @@ createMolfile <- function(id_or_smiles, fileName = FALSE)
 		# If we wrote to a file, read it back as return value.
 		if(is.character(fileName))
 			res <- readLines(fileName)
-	}
+	} 
+    return(c(" ","$$$$"))
 	return(res)
 }
 
@@ -574,8 +578,45 @@ gatherData <- function(id)
 	# COMMENT: EAWAG_UCHEM_ID 1234
 	# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
 	mbdata[["COMMENT"]] <- list()
-	mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
-	mbdata[["COMMENT"]][["ID"]] = id
+    if(findLevel(id) == "0"){
+        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+	} else{
+        level <- findLevel(id)
+        if(level %in% c("1","1a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+        }
+        if(level == c("2")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+        }
+        if(level == c("2a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+        }
+        if(level == c("2b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+        }
+        if(level == c("3")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+        }
+        if(level == c("3a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+        }
+        if(level == c("3b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+        }
+        if(level == c("3c")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+        }
+        if(level == c("3d")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+        }
+        if(level == c("4")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+        }
+        if(level == c("5")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+        }
+    }
+    mbdata[["COMMENT"]][["ID"]] = id
 	# here compound info starts
 	mbdata[['CH$NAME']] <- names
 	# Currently we use a fixed value for Compound Class, since there is no useful
@@ -756,8 +797,46 @@ gatherDataBabel <- function(id){
 			# COMMENT: EAWAG_UCHEM_ID 1234
 			# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
 			mbdata[["COMMENT"]] <- list()
-			mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+			if(findLevel(id) == "0"){
+                mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+            } else{
+                level <- findLevel(id)
+                if(level %in% c("1","1a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+                }
+                if(level == c("2")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+                }
+                if(level == c("2a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+                }
+                if(level == c("2b")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+                }
+                if(level == c("3")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+                }
+                if(level == c("3a")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+                }
+                if(level == c("3b")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+                }
+                if(level == c("3c")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+                }
+                if(level == c("3d")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+                }
+                if(level == c("4")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+                }
+                if(level == c("5")){
+                     mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+                }
+            }
 			mbdata[["COMMENT"]][["ID"]] <- id
+
 			# here compound info starts
 			mbdata[['CH$NAME']] <- as.list(dbname)
 			
@@ -781,6 +860,139 @@ gatherDataBabel <- function(id){
 		return(mbdata)
 }
 
+# Retrieve annotation data for a compound, using only babel
+#' Retrieve annotation data
+#' 
+#' Retrieves annotation data for an unknown compound by using basic information present
+#'
+#' Composes the "upper part" of a MassBank record filled with chemical data
+#' about the compound: name, exact mass, structure, CAS no..  
+#' The instrument type is also written into this block (even
+#' if not strictly part of the chemical information). Additionally, index
+#' fields are added at the start of the record, which will be removed later:
+#' \code{id, dbcas, dbname} from the compound list.
+#' 
+#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
+#' inserted empty and will be filled later on.
+#' 
+#' This function is used to generate the data in case a substance is unknown,
+#' i.e. not enough information is present to derive anything about formulas or links
+#'
+#' @usage gatherDataUnknown(id, mode, retrieval)
+#' @param id The compound ID.
+#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
+#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
+#' @author Michael Stravs, Erik Mueller
+#' @seealso \code{\link{mbWorkflow}}
+#' @references MassBank record format:
+#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+#' @examples
+#' 
+#' # Gather data for compound ID 131
+#' \dontrun{gatherDataUnknown(131,"pH")}
+#' 
+#' @export
+gatherDataUnknown <- function(id, mode, retrieval){
+    .checkMbSettings()
+    
+    ##Read from Compoundlist
+    smiles <- ""
+    if(retrieval == "unknown"){
+        mass <- findMass(id, "unknown", mode)
+        formula <- ""
+    }    
+    if(retrieval == "tentative"){
+        mass <- findMass(id, "tentative", mode)
+        formula <- findFormula(id, "tentative")
+    }
+    dbcas <- NA
+    dbname <- findName(id)
+    if(is.na(dbname)) dbname <- paste("Unknown ID:",id)
+    if(is.na(dbcas)) dbcas <- ""
+    
+
+    
+    ##Create 
+    mbdata <- list()
+    mbdata[['id']] <- id
+    mbdata[['dbcas']] <- dbcas
+    mbdata[['dbname']] <- dbname
+    mbdata[['dataused']] <- "none"
+    mbdata[['ACCESSION']] <- ""
+    mbdata[['RECORD_TITLE']] <- ""
+    mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
+    mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
+    mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
+    mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
+    # Confidence annotation and internal ID annotation.
+    # The ID of the compound will be written like:
+    # COMMENT: EAWAG_UCHEM_ID 1234
+    # if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
+    mbdata[["COMMENT"]] <- list()
+    if(findLevel(id) == "0"){
+        mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
+	} else{
+        level <- findLevel(id)
+        if(level %in% c("1","1a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
+        }
+        if(level == c("2")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
+        }
+        if(level == c("2a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
+        }
+        if(level == c("2b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
+        }
+        if(level == c("3")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
+        }
+        if(level == c("3a")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
+        }
+        if(level == c("3b")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
+        }
+        if(level == c("3c")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
+        }
+        if(level == c("3d")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
+        }
+        if(level == c("4")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
+        }
+        if(level == c("5")){
+             mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
+        }
+    }
+    mbdata[["COMMENT"]][["ID"]] <- id
+
+    # here compound info starts
+    mbdata[['CH$NAME']] <- as.list(dbname)
+    
+    # Currently we use a fixed value for Compound Class, since there is no useful
+    # convention of what should go there and what shouldn't, and the field is not used
+    # in search queries.
+    mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
+    mbdata[['CH$FORMULA']] <- formula
+    mbdata[['CH$EXACT_MASS']] <- mass
+    mbdata[['CH$SMILES']] <- ""
+    mbdata[['CH$IUPAC']] <- ""
+    
+    link <- list()
+    mbdata[['CH$LINK']] <- link
+    mbdata[['AC$INSTRUMENT']] <- getOption("RMassBank")$annotations$instrument
+    mbdata[['AC$INSTRUMENT_TYPE']] <- getOption("RMassBank")$annotations$instrument_type
+
+    return(mbdata)
+}
 
 # Flatten the internal tree-like representation of MassBank data to a flat table.
 # Note that this limits us, in that the fields should be constant over all records!
@@ -975,10 +1187,10 @@ readMbdata <- function(row)
 #' list('FRAGMENTATION_MODE' = 'CID', ...), ...)} etc.
 #' 
 #' @aliases gatherCompound gatherSpectrum
-#' @usage gatherCompound(spec, aggregated, additionalPeaks = NULL)
+#' @usage gatherCompound(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
 #' 
 #' 		gatherSpectrum(spec, msmsdata, ac_ms, ac_lc, aggregated, 
-#'	 		additionalPeaks = NULL)
+#'	 		additionalPeaks = NULL, retrieval="standard")
 #' @param spec A \code{RmbSpectraSet} object, representing a compound with multiple spectra.
 #' @param aggregated An aggregate peak table where the peaks are extracted from.
 #' @param msmsdata A \code{RmbSpectrum2} object from the \code{spec} spectra set, representing a single spectrum to give a record.
@@ -987,6 +1199,9 @@ readMbdata <- function(row)
 #' \code{gatherCompound} and then fed into \code{gatherSpectrum}.
 #' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
 #' 		As loaded with \code{\link{addPeaks}}.
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
 #' @return \code{gatherCompound} returns a list of tree-like MassBank data
 #' blocks. \code{gatherSpectrum} returns one single MassBank data block or
 #' \code{NA} if no useful peak is in the spectrum. 
@@ -1009,47 +1224,47 @@ readMbdata <- function(row)
 #' 
 #' 
 #' @export
-gatherCompound <- function(spec, aggregated, additionalPeaks = NULL)
+gatherCompound <- function(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
 {
-  # compound ID
-  id <- spec@id
-  # processing mode
-  imode <- spec@mode
-  
-  # define positive or negative, based on processing mode.
-  ion_modes <- list(
-    "pH" = "POSITIVE",
-    "pNa" = "POSITIVE",
-    "mH" = "NEGATIVE",
-    "mFA" = "NEGATIVE",
-    "pM" = "POSITIVE",
-    "mM" = "NEGATIVE",
-	"pNH4" = "POSITIVE")
-  mode <- ion_modes[[imode]]
-  
-  # for format 2.01
-  ac_ms <- list();
-  ac_ms[['MS_TYPE']] <- getOption("RMassBank")$annotations$ms_type
-  ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
-  ac_ms[['ION_MODE']] <- mode
-  
-  # This list could be made customizable.
-  ac_lc <- list();
-  rt  <- spec@parent@rt / 60
-  ac_lc[['COLUMN_NAME']] <- getOption("RMassBank")$annotations$lc_column
-  ac_lc[['FLOW_GRADIENT']] <- getOption("RMassBank")$annotations$lc_gradient
-  ac_lc[['FLOW_RATE']] <- getOption("RMassBank")$annotations$lc_flow
-  ac_lc[['RETENTION_TIME']] <- sprintf("%.1f min", rt)  
-  ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a
-  ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b
+    # compound ID
+    id <- spec@id
+    # processing mode
+    imode <- spec@mode
+
+    # define positive or negative, based on processing mode.
+    ion_modes <- list(
+        "pH" = "POSITIVE",
+        "pNa" = "POSITIVE",
+        "mH" = "NEGATIVE",
+        "mFA" = "NEGATIVE",
+        "pM" = "POSITIVE",
+        "mM" = "NEGATIVE",
+        "pNH4" = "POSITIVE")
+    mode <- ion_modes[[imode]]
+
+    # for format 2.01
+    ac_ms <- list();
+    ac_ms[['MS_TYPE']] <- getOption("RMassBank")$annotations$ms_type
+    ac_ms[['IONIZATION']] <- getOption("RMassBank")$annotations$ionization
+    ac_ms[['ION_MODE']] <- mode
   
-  # Go through all child spectra, and fill our skeleton with scan data!
-  # Pass them the AC_LC and AC_MS data, which are added at the right place
-  # directly in there.
-  allSpectra <- lapply(spec@children, function(m)
-    gatherSpectrum(spec, m, ac_ms, ac_lc, aggregated, additionalPeaks))
-  allSpectra <- allSpectra[which(!is.na(allSpectra))]
-  return(allSpectra)
+    # This list could be made customizable.
+    ac_lc <- list();
+    rt  <- spec@parent@rt / 60
+    ac_lc[['COLUMN_NAME']] <- getOption("RMassBank")$annotations$lc_column
+    ac_lc[['FLOW_GRADIENT']] <- getOption("RMassBank")$annotations$lc_gradient
+    ac_lc[['FLOW_RATE']] <- getOption("RMassBank")$annotations$lc_flow
+    ac_lc[['RETENTION_TIME']] <- sprintf("%.3f min", rt)  
+    ac_lc[['SOLVENT A']] <- getOption("RMassBank")$annotations$lc_solvent_a
+    ac_lc[['SOLVENT B']] <- getOption("RMassBank")$annotations$lc_solvent_b
+
+    # Go through all child spectra, and fill our skeleton with scan data!
+    # Pass them the AC_LC and AC_MS data, which are added at the right place
+    # directly in there.
+    allSpectra <- lapply(spec@children, function(m)
+        gatherSpectrum(spec, m, ac_ms, ac_lc, aggregated, additionalPeaks, retrieval=retrieval))
+    allSpectra <- allSpectra[which(!is.na(allSpectra))]
+    return(allSpectra)
 }
 
 
@@ -1069,17 +1284,17 @@ gatherCompound <- function(spec, aggregated, additionalPeaks = NULL)
 #       peaksProblematic. Currently we use peaksOK and peaksReanOK to create the files.
 #       (Also, the global additionalPeaks table is used.)
 #' @export
-gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalPeaks = NULL)
+gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalPeaks = NULL, retrieval = "standard")
 {
-  # If the spectrum is not filled, return right now. All "NA" spectra will
-  # not be treated further.
-  if(msmsdata@ok == FALSE)
-      return(NA)
-  # get data
-  scan <- msmsdata@acquisitionNum
-  id <- spec@id
-  # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
-  precursor_types <- list(
+    # If the spectrum is not filled, return right now. All "NA" spectra will
+    # not be treated further.
+    if(msmsdata@ok == FALSE)
+        return(NA)
+    # get data
+    scan <- msmsdata@acquisitionNum
+    id <- spec@id
+    # Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
+    precursor_types <- list(
     "pH" = "[M+H]+",
     "pNa" = "[M+Na]+",
     "mH" = "[M-H]-",
@@ -1087,187 +1302,194 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
     "pM" = "[M]+",
     "mM" = "[M]-",
 	"pNH4" = "[M+NH4]+")
-  ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
-  #ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
-  ac_ms[['COLLISION_ENERGY']] <- msmsdata@info$ce
-  ac_ms[['RESOLUTION']] <- msmsdata@info$res
-  
-  # Calculate exact precursor mass with Rcdk, and find the base peak from the parent
-  # spectrum. (Yes, that's what belongs here, I think.)
-  precursorMz <- findMz(spec@id, spec@mode)
-  ms_fi <- list()
-  ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
-  ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
-  ms_fi[['PRECURSOR_TYPE']] <- precursor_types[spec@mode]
-  
-  # Select all peaks which belong to this spectrum (correct cpdID and scan no.)
-  # from peaksOK
-  # Note: Here and below it would be easy to customize the source of the peaks.
-  # Originally the peaks came from msmsdata$childFilt, and the subset
-  # was used where dppm == dppmBest (because childFilt still contains multiple formulas)
-  # per peak.
-	peaks <- aggregated[aggregated$filterOK,,drop=FALSE]
-	peaks <- peaks[(peaks$cpdID == id) & (peaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
+    ac_ms[['FRAGMENTATION_MODE']] <- msmsdata@info$mode
+    #ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
+    ac_ms[['COLLISION_ENERGY']] <- msmsdata@info$ce
+    ac_ms[['RESOLUTION']] <- msmsdata@info$res
+
+    # Calculate exact precursor mass with Rcdk, and find the base peak from the parent
+    # spectrum. (Yes, that's what belongs here, I think.)
+    precursorMz <- findMz(spec@id, spec@mode, retrieval=retrieval)
+    ms_fi <- list()
+    ms_fi[['BASE_PEAK']] <- round(mz(spec@parent)[which.max(intensity(spec@parent))],4)
+    ms_fi[['PRECURSOR_M/Z']] <- round(precursorMz$mzCenter,4)
+    ms_fi[['PRECURSOR_TYPE']] <- precursor_types[spec@mode]
+
+    # Select all peaks which belong to this spectrum (correct cpdID and scan no.)
+    # from peaksOK
+    # Note: Here and below it would be easy to customize the source of the peaks.
+    # Originally the peaks came from msmsdata$childFilt, and the subset
+    # was used where dppm == dppmBest (because childFilt still contains multiple formulas)
+    # per peak.
+    peaks <- aggregated[aggregated$filterOK,,drop=FALSE]
+    peaks <- peaks[(peaks$cpdID == id) & (peaks$scan == msmsdata@acquisitionNum),,drop=FALSE]
   
-  # No peaks? Aha, bye
+    # No peaks? Aha, bye
     if(nrow(peaks) == 0)
-      return(NA)
+        return(NA)
   
-  # If we don't include the reanalyzed peaks:
-  if(!getOption("RMassBank")$use_rean_peaks)
-	peaks <- peaks[is.na(peaks$matchedReanalysis),,drop=FALSE]
-  # but if we include them:
-  else
-  {
-	  # for info, the following data will be used in the default annotator:
-	  # annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
-	  # and in the peaklist itself:
-	  # c("mzSpec", "int", "intrel")
-	  peaks[!is.na(peaks$matchedReanalysis),"formula"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formula"]
-	  peaks[!is.na(peaks$matchedReanalysis),"mzCalc"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.mzCalc"]
-	  peaks[!is.na(peaks$matchedReanalysis),"dppm"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dppm"]
-	  peaks[!is.na(peaks$matchedReanalysis),"dbe"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dbe"]
-	  peaks[!is.na(peaks$matchedReanalysis),"formulaCount"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formulaCount"]
-  }
+    # If we don't include the reanalyzed peaks:
+    if(!getOption("RMassBank")$use_rean_peaks)
+        peaks <- peaks[is.na(peaks$matchedReanalysis),,drop=FALSE]
+    # but if we include them:
+    else
+    {
+        # for info, the following data will be used in the default annotator:
+        # annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
+        # and in the peaklist itself:
+        # c("mzSpec", "int", "intrel")
+        peaks[!is.na(peaks$matchedReanalysis),"formula"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formula"]
+        peaks[!is.na(peaks$matchedReanalysis),"mzCalc"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.mzCalc"]
+        peaks[!is.na(peaks$matchedReanalysis),"dppm"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dppm"]
+        peaks[!is.na(peaks$matchedReanalysis),"dbe"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.dbe"]
+        peaks[!is.na(peaks$matchedReanalysis),"formulaCount"]  <- peaks[!is.na(peaks$matchedReanalysis),"reanalyzed.formulaCount"]
+    }
 
-  # Calculate relative intensity and make a formatted m/z to use in the output
-  # (mzSpec, for "spectrum")
-  peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
-  peaks$mzSpec <- round(peaks$mzFound, 4)
-  # reorder peaks after addition of the reanalyzed ones
-  peaks <- peaks[order(peaks$mzSpec),]
-  
-  # Also format the other values, which are used in the annotation
-  peaks$dppm <- round(peaks$dppm, 2)
-  peaks$mzCalc <- round(peaks$mzCalc, 4)
-  peaks$intensity <- round(peaks$intensity, 1)
-  # copy the peak table to the annotation table. (The peak table will then be extended
-  # with peaks from the global "additional_peaks" table, which can be used to add peaks
-  # to the spectra by hand.
-  annotation <- peaks
-  # Keep only peaks with relative intensity >= 1 o/oo, since the MassBank record
-  # makes no sense otherwise. Also, keep only the columns needed in the output.
-  peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
+    # Calculate relative intensity and make a formatted m/z to use in the output
+    # (mzSpec, for "spectrum")
+    peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
+    peaks$mzSpec <- round(peaks$mzFound, 4)
+    # reorder peaks after addition of the reanalyzed ones
+    peaks <- peaks[order(peaks$mzSpec),]
 
-  # Here add the additional peaks if there are any for this compound!
-  # They are added without any annotation.
-  if(!is.null(additionalPeaks))
-  {
-    # select the peaks from the corresponding spectrum which were marked with "OK=1" in the table.
-    spec_add_peaks <- additionalPeaks[ 
-			(additionalPeaks$OK == 1) & 
-			(additionalPeaks$cpdID == spec@id) &
-			(additionalPeaks$scan == msmsdata@acquisitionNum),
-            c("mzFound", "intensity")]
-    # If there are peaks to add:
-    if(nrow(spec_add_peaks)>0)
+    # Also format the other values, which are used in the annotation
+    peaks$dppm <- round(peaks$dppm, 2)
+    peaks$mzCalc <- round(peaks$mzCalc, 4)
+    peaks$intensity <- round(peaks$intensity, 1)
+    # copy the peak table to the annotation table. (The peak table will then be extended
+    # with peaks from the global "additional_peaks" table, which can be used to add peaks
+    # to the spectra by hand.
+    annotation <- peaks
+    # Keep only peaks with relative intensity >= 1 o/oo, since the MassBank record
+    # makes no sense otherwise. Also, keep only the columns needed in the output.
+    peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
+
+    # Here add the additional peaks if there are any for this compound!
+    # They are added without any annotation.
+    if(!is.null(additionalPeaks))
     {
-      # add the column for rel. int.
-      spec_add_peaks$intrel <- 0
-      # format m/z value
-      spec_add_peaks$mzSpec <- round(spec_add_peaks$mzFound, 4)
-      # bind tables together
-      peaks <- rbind(peaks, spec_add_peaks[,c("mzSpec", "intensity", "intrel")])
-      # recalculate rel.int.  and reorder list
-      peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
-      # Again, select the correct columns, and drop values with rel.int. <1 o/oo
-      # NOTE: If the highest additional peak is > than the previous highest peak,
-      # this can lead to the situation that a peak is in "annotation" but not in "peaks"!
-      # See below.
-      peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
-      # Reorder again.
-      peaks <- peaks[order(peaks$mzSpec),]
+        # select the peaks from the corresponding spectrum which were marked with "OK=1" in the table.
+        spec_add_peaks <- additionalPeaks[ 
+                (additionalPeaks$OK == 1) & 
+                (additionalPeaks$cpdID == spec@id) &
+                (additionalPeaks$scan == msmsdata@acquisitionNum),
+                c("mzFound", "intensity")]
+        # If there are peaks to add:
+        if(nrow(spec_add_peaks)>0)
+        {
+            # add the column for rel. int.
+            spec_add_peaks$intrel <- 0
+            # format m/z value
+            spec_add_peaks$mzSpec <- round(spec_add_peaks$mzFound, 4)
+            # bind tables together
+            peaks <- rbind(peaks, spec_add_peaks[,c("mzSpec", "intensity", "intrel")])
+            # recalculate rel.int.  and reorder list
+            peaks$intrel <- floor(peaks$intensity / max(peaks$intensity) * 999)
+            # Again, select the correct columns, and drop values with rel.int. <1 o/oo
+            # NOTE: If the highest additional peak is > than the previous highest peak,
+            # this can lead to the situation that a peak is in "annotation" but not in "peaks"!
+            # See below.
+            peaks <- peaks[ peaks$intrel >= 1, c("mzSpec", "intensity", "intrel")]
+            # Reorder again.
+            peaks <- peaks[order(peaks$mzSpec),]
+        }
     }
-  }
   
 
   
-  # add + or - to fragment formulas
-  formula_tag <- list(
-    "pH" = "+",
-    "pNa" = "+",
-    "mH" = "-",
-    "mFA" = "-",
-    "pM" = "+",
-    "mM" = "-",
-	"pNH4" = "+")
-  type <- formula_tag[[spec@mode]]
-  
-  annotator <- getOption("RMassBank")$annotator
-  if(is.null(annotator))
-    annotator <- "annotator.default"
-  
+    # add + or - to fragment formulas
+    formula_tag <- list(
+        "pH" = "+",
+        "pNa" = "+",
+        "mH" = "-",
+        "mFA" = "-",
+        "pM" = "+",
+        "mM" = "-",
+        "pNH4" = "+")
+    type <- formula_tag[[spec@mode]]
   
+    annotator <- getOption("RMassBank")$annotator
+    if(is.null(annotator))
+        annotator <- "annotator.default"
   
-  # Here, the relative intensity is recalculated using the newly added additional
-  # peaks from the peak list. Therefore, we throw superfluous peaks out again.
-  # NOTE: It is a valid question whether or not we should kick peaks out at this stage.
-  # The alternative would be to leave the survivors at 1 o/oo, but keep them in the spectrum.
-  annotation$intrel <- floor(annotation$intensity / max(peaks$intensity) * 999)
-  annotation <- annotation[annotation$intrel >= 1,]
   
-  annotation <- do.call(annotator, list(annotation= annotation, type=type))
   
-  
-  # Name the columns correctly.
-  colnames(peaks) <- c("m/z", "int.", "rel.int.")
-  peaknum <- nrow(peaks)
-  
-  # Create the "lower part" of the record.  
-  mbdata <- list()
-  # Add the AC$MS, AC$LC info.
-  if(getOption("RMassBank")$use_version == 2)
-  {
-    mbdata[["AC$MASS_SPECTROMETRY"]] <- ac_ms
-    mbdata[["AC$CHROMATOGRAPHY"]] <- ac_lc
-  }
-  else
-  {
-      # Fix for MassBank data format 1, where ION_MODE must be renamed to MODE
-      mbdata[["AC$ANALYTICAL_CONDITION"]] <- c(ac_ms, ac_lc)
-      names(mbdata[["AC$ANALYTICAL_CONDITION"]])[[3]] <- "MODE"
-  }
-  # Add the MS$FOCUSED_ION info.
-  mbdata[["MS$FOCUSED_ION"]] <- ms_fi
+    # Here, the relative intensity is recalculated using the newly added additional
+    # peaks from the peak list. Therefore, we throw superfluous peaks out again.
+    # NOTE: It is a valid question whether or not we should kick peaks out at this stage.
+    # The alternative would be to leave the survivors at 1 o/oo, but keep them in the spectrum.
+    annotation$intrel <- floor(annotation$intensity / max(peaks$intensity) * 999)
+    annotation <- annotation[annotation$intrel >= 1,]
+
+    annotation <- do.call(annotator, list(annotation= annotation, type=type))
+
+
+    # Name the columns correctly.
+    colnames(peaks) <- c("m/z", "int.", "rel.int.")
+    peaknum <- nrow(peaks)
 
-  # the data processing tag :)
-  # Change by Tobias:
-  # I suggest to add here the current version number of the clone due to better distinction between different makes of MB records
-  # Could be automatised from DESCRIPTION file?
-  if(getOption("RMassBank")$use_rean_peaks)
-      processingComment <- list("REANALYZE" = "Peaks with additional N2/O included")
-  else
-      processingComment <- list()
-  mbdata[["MS$DATA_PROCESSING"]] <- c(
-    getOption("RMassBank")$annotations$ms_dataprocessing,
-    processingComment,
-    list("WHOLE" = paste("RMassBank", packageVersion("RMassBank")))
+    # Create the "lower part" of the record.  
+    mbdata <- list()
+    # Add the AC$MS, AC$LC info.
+    if(getOption("RMassBank")$use_version == 2)
+    {
+        mbdata[["AC$MASS_SPECTROMETRY"]] <- ac_ms
+        mbdata[["AC$CHROMATOGRAPHY"]] <- ac_lc
+    }
+    else
+    {
+        # Fix for MassBank data format 1, where ION_MODE must be renamed to MODE
+        mbdata[["AC$ANALYTICAL_CONDITION"]] <- c(ac_ms, ac_lc)
+        names(mbdata[["AC$ANALYTICAL_CONDITION"]])[[3]] <- "MODE"
+    }
+    # Add the MS$FOCUSED_ION info.
+    mbdata[["MS$FOCUSED_ION"]] <- ms_fi
+
+    ## The SPLASH is a hash value calculated across all peaks
+    ## http://splash.fiehnlab.ucdavis.edu/
+    ## Has to be temporarily added as "PK$SPLASH" in the "lower" part
+    ## of the record, but will later be moved "up" when merging parts in compileRecord()  
+
+    # the data processing tag :)
+    # Change by Tobias:
+    # I suggest to add here the current version number of the clone due to better distinction between different makes of MB records
+    # Could be automatised from DESCRIPTION file?
+    if(getOption("RMassBank")$use_rean_peaks)
+        processingComment <- list("REANALYZE" = "Peaks with additional N2/O included")
+    else
+        processingComment <- list()
+    mbdata[["MS$DATA_PROCESSING"]] <- c(
+        getOption("RMassBank")$annotations$ms_dataprocessing,
+        processingComment,
+        list("WHOLE" = paste("RMassBank", packageVersion("RMassBank")))
     )
   
-  # Annotation:
-  if(getOption("RMassBank")$add_annotation==TRUE)
-    mbdata[["PK$ANNOTATION"]] <- annotation
-  # Peak table
-  mbdata[["PK$NUM_PEAK"]] <- peaknum
-  mbdata[["PK$PEAK"]] <- peaks
-  # These two entries will be thrown out later, but they are necessary to build the
-  # record title and the accession number.
-  mbdata[["RECORD_TITLE_CE"]] <- msmsdata@info$ces #formatted collision energy
-  # Mode of relative scan calculation: by default it is calculated relative to the
-  # parent scan. If a corresponding option is set, it will be calculated from the first
-  # present child scan in the list.
-  relativeScan <- "fromParent"
-  if(!is.null(getOption("RMassBank")$recomputeRelativeScan))
-	  if(getOption("RMassBank")$recomputeRelativeScan == "fromFirstChild")
-		  relativeScan <- "fromFirstChild"
-  if(relativeScan == "fromParent")
-	  mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - spec@parent@acquisitionNum #relative scan
-  else if(relativeScan == "fromFirstChild")
-  {
-	  firstChild <- min(unlist(lapply(spec@children,function(d) d@acquisitionNum)))
-	  mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - firstChild + 1
-  }
-  return(mbdata)
+    mbdata[["PK$SPLASH"]] <- list(SPLASH = getSplash(peaks[,c("m/z", "int.")]))
+
+    # Annotation:
+    if(getOption("RMassBank")$add_annotation && (findLevel(id,TRUE)!="unknown"))
+        mbdata[["PK$ANNOTATION"]] <- annotation
+    
+    # Peak table
+    mbdata[["PK$NUM_PEAK"]] <- peaknum
+    mbdata[["PK$PEAK"]] <- peaks
+    # These two entries will be thrown out later, but they are necessary to build the
+    # record title and the accession number.
+    mbdata[["RECORD_TITLE_CE"]] <- msmsdata@info$ces #formatted collision energy
+    # Mode of relative scan calculation: by default it is calculated relative to the
+    # parent scan. If a corresponding option is set, it will be calculated from the first
+    # present child scan in the list.
+    relativeScan <- "fromParent"
+    if(!is.null(getOption("RMassBank")$recomputeRelativeScan))
+        if(getOption("RMassBank")$recomputeRelativeScan == "fromFirstChild")
+            relativeScan <- "fromFirstChild"
+    if(relativeScan == "fromParent")
+        mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - spec@parent@acquisitionNum #relative scan
+    else if(relativeScan == "fromFirstChild"){
+            firstChild <- min(unlist(lapply(spec@children,function(d) d@acquisitionNum)))
+            mbdata[["SUBSCAN"]] <- msmsdata@acquisitionNum - firstChild + 1
+        }
+    return(mbdata)
 }
 
 
@@ -1291,7 +1513,7 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
 #' spectrum data, and finally fills in the record title and accession number,
 #' renames the "internal ID" comment field and removes dummy fields.
 #' 
-#' @usage compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL)
+#' @usage compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
 #' @param spec A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}).
 #' Note that \bold{peaks are not read from this
 #' object anymore}: Peaks come from the \code{aggregated} dataframe (and from
@@ -1302,6 +1524,9 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
 #' @param aggregated An aggregated peak data table containing information about refiltered spectra etc.
 #' @param additionalPeaks If present, a table with additional peaks to add into the spectra.
 #' 		As loaded with \code{\link{addPeaks}}.
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
 #' @return Returns a MassBank record in list format: e.g.
 #' \code{list("ACCESSION" = "XX123456", "RECORD_TITLE" = "Cubane", ...,
 #' "CH\$LINK" = list( "CAS" = "12-345-6", "CHEMSPIDER" = 1111, ...))}
@@ -1319,43 +1544,44 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
 #' \dontrun{compiled <- compileRecord(myspec, mbdata, w@@aggregated)}
 #' 
 #' @export
-compileRecord <- function(spec, mbdata, aggregated, additionalPeaks = NULL)
+compileRecord <- function(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
 {
-  # gather the individual spectra data
-  mblist <- gatherCompound(spec, aggregated, additionalPeaks)
-  # this returns a n-member list of "lower parts" of spectra (one for each subscan).
-  # (n being the number of child scans per parent scan.)
-  # Now we put the two parts together.
-  # (lapply on all n subscans, returns a list.)
-  mblist_c <- lapply(mblist, function(l)
+    # gather the individual spectra data
+    mblist <- gatherCompound(spec, aggregated, additionalPeaks, retrieval=retrieval)
+    # this returns a n-member list of "lower parts" of spectra (one for each subscan).
+    # (n being the number of child scans per parent scan.)
+    # Now we put the two parts together.
+    # (lapply on all n subscans, returns a list.)
+    mblist_c <- lapply(mblist, function(l)
     {
-      # This is the step which sticks together the upper and the lower part of the
-      # record (the upper being compound-specific and the lower being scan-specific.)
-      # Note that the accession number and record title (in the upper part) must of course
-      # be filled in with scan-specific info.
-      mbrecord <- c(mbdata, l)
-      # Here is the right place to fix the name of the INTERNAL ID field.
-      names(mbrecord[["COMMENT"]])[[which(names(mbrecord[["COMMENT"]]) == "ID")]] <-
+        # This is the step which sticks together the upper and the lower part of the
+        # record (the upper being compound-specific and the lower being scan-specific.)
+        # Note that the accession number and record title (in the upper part) must of course
+        # be filled in with scan-specific info.
+        mbrecord <- c(mbdata, l)
+
+        # Here is the right place to fix the name of the INTERNAL ID field.
+        names(mbrecord[["COMMENT"]])[[which(names(mbrecord[["COMMENT"]]) == "ID")]] <-
         getOption("RMassBank")$annotations$internal_id_fieldname
-	  # get mode parameter (for accession number generation) depending on version 
-	  # of record definition
-	  # Change by Tobias:
-	  # I suggest to include fragmentation mode here for information
-      if(getOption("RMassBank")$use_version == 2)
+        # get mode parameter (for accession number generation) depending on version 
+        # of record definition
+        # Change by Tobias:
+        # I suggest to include fragmentation mode here for information
+        if(getOption("RMassBank")$use_version == 2)
         mode <- mbrecord[["AC$MASS_SPECTROMETRY"]][["ION_MODE"]]
-      else
+        else
         mode <- mbrecord[["AC$ANALYTICAL_CONDITION"]][["MODE"]]
-	  # Generate the title and then delete the temprary RECORD_TITLE_CE field used before
-	  mbrecord[["RECORD_TITLE"]] <- .parseTitleString(mbrecord)
-      mbrecord[["RECORD_TITLE_CE"]] <- NULL
-      # Calculate the accession number from the options.
-      shift <- getOption("RMassBank")$accessionNumberShifts[[spec@mode]]
-      mbrecord[["ACCESSION"]] <- sprintf("%s%04d%02d", getOption("RMassBank")$annotations$entry_prefix, as.numeric(spec@id), as.numeric(mbrecord[["SUBSCAN"]])+shift)
-      # Clear the "SUBSCAN" field.
-      mbrecord[["SUBSCAN"]] <- NULL
-      # return the record.
-      return(mbrecord)
-  })
+        # Generate the title and then delete the temprary RECORD_TITLE_CE field used before
+        mbrecord[["RECORD_TITLE"]] <- .parseTitleString(mbrecord)
+        mbrecord[["RECORD_TITLE_CE"]] <- NULL
+        # Calculate the accession number from the options.
+        shift <- getOption("RMassBank")$accessionNumberShifts[[spec@mode]]
+        mbrecord[["ACCESSION"]] <- sprintf("%s%04d%02d", getOption("RMassBank")$annotations$entry_prefix, as.numeric(spec@id), as.numeric(mbrecord[["SUBSCAN"]])+shift)
+        # Clear the "SUBSCAN" field.
+        mbrecord[["SUBSCAN"]] <- NULL
+        # return the record.
+        return(mbrecord)
+    })
 }
 
 
@@ -1385,11 +1611,11 @@ compileRecord <- function(spec, mbdata, aggregated, additionalPeaks = NULL)
 annotator.default <- function(annotation, type)
 {
   
-  annotation$formula <- paste(annotation$formula, type, sep='')
-  # Select the right columns and name them correctly for output.
-  annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
-  colnames(annotation) <- c("m/z", "tentative_formula", "formula_count", "mass", "error(ppm)")
-  return(annotation)
+    annotation$formula <- paste(annotation$formula, type, sep='')
+    # Select the right columns and name them correctly for output.
+    annotation <- annotation[,c("mzSpec","formula", "formulaCount", "mzCalc", "dppm")]
+    colnames(annotation) <- c("m/z", "tentative_formula", "formula_count", "mass", "error(ppm)")
+    return(annotation)
 }
 
 #' Parse record title
@@ -1462,7 +1688,6 @@ annotator.default <- function(annotation, type)
 				# I.e. from a string like "R={BLA: BLUB}" return "BLA: BLUB"
 				args <- regexec("\\{(.*)\\}", var)
 				arg <- regmatches(var, args)[[1]][[2]]
-				
 				# Split the parameter by colon if necessary
 				splitVar <- strsplit(arg, ": ")[[1]]
 				# Read the parameter value from the record
@@ -1480,6 +1705,12 @@ annotator.default <- function(annotation, type)
 				# Fix problems: Names will have >= 1 match. Take the first
 				if(length(replaceVar) > 1)
 					replaceVar <- replaceVar[[1]]
+                
+                # Fix problems: Unknowns might have no name
+                if(!length(replaceVar)){
+                    replaceVar <- ""
+                }
+                
 				# Substitute the parameter value into the string
 				parsedVar <- sub("\\{(.*)\\}", replaceVar, var)	
 				return(parsedVar)
@@ -1594,8 +1825,11 @@ toMassbank <- function (mbdata)
         
         lapply(names(mbdata[[entry]]), function(subentry)
         {
-          # todo
-          mbf[[count]] <<- paste(entry,": ",subentry, " ", mbdata[[entry]][[subentry]], sep='')
+          if(subentry != "SPLASH"){
+            mbf[[count]] <<- paste(entry,": ",subentry, " ", mbdata[[entry]][[subentry]], sep='')
+          } else {
+            mbf[[count]] <<- paste(entry,": ", mbdata[[entry]][[subentry]], sep='')
+          }
           #print(mbf)
           count <<- count + 1
         })
@@ -1669,24 +1903,26 @@ toMassbank <- function (mbdata)
 #' @export
 exportMassbank <- function(compiled, files, molfile)
 {
-  molnames <- c()
-  for(file in 1:length(compiled))
-  {
-    # Read the accession no. from the corresponding "compiled" entry
-    filename <- compiled[[file]]["ACCESSION"]
-    # use this accession no. as filename
-    filename <- paste(filename, ".txt", sep="")
-    write(files[[file]], 
-          file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata",filename)
-    )
-  }
-  # Use internal ID for naming the molfiles
-  molname <- sprintf("%04d", as.numeric(
-    compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
-  molname <- paste(molname, ".mol", sep="")
-  write(molfile,
-        file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata",molname)
-  )
+    molnames <- c()
+    for(file in 1:length(compiled))
+    {
+        # Read the accession no. from the corresponding "compiled" entry
+        filename <- compiled[[file]]["ACCESSION"]
+        # use this accession no. as filename
+        filename <- paste(filename, ".txt", sep="")
+        write(files[[file]], 
+            file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata",filename)
+        )
+    }
+    # Use internal ID for naming the molfiles
+    if(findLevel(compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]],TRUE)=="standard"){
+        molname <- sprintf("%04d", as.numeric(
+        compiled[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
+        molname <- paste(molname, ".mol", sep="")
+        write(molfile,
+            file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata",molname)
+        )
+    }
 }
 
 # Makes a list.tsv with molfile -> massbank ch$name attribution.
@@ -1711,24 +1947,29 @@ exportMassbank <- function(compiled, files, molfile)
 #' @export
 makeMollist <- function(compiled)
 {
-  # For every "compiled" entry (here, compiled is not one "compiled" entry but the total
-  # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the
-  # first spectrum.) Make the ID into 0000 format.
-  tsvlist <- t(sapply(compiled, function(entry)
+    # For every "compiled" entry (here, compiled is not one "compiled" entry but the total
+    # list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the
+    # first spectrum.) Make the ID into 0000 format.
+    
+    tsvlist <- t(sapply(compiled, function(entry)
     {
-    name <- entry[[1]][["CH$NAME"]][[1]]
-    id <- sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
-    molfilename <- paste(id,".mol",sep='')
-    return(c(name,molfilename))
-  }))
-  # Write the file with the 
-  write.table(tsvlist, 
-              paste(getOption("RMassBank")$annotations$entry_prefix,"/moldata/list.tsv", sep=''),
-              quote = FALSE,
-              sep="\t",
-              row.names=FALSE,
-              col.names=FALSE
-              )
+        name <- entry[[1]][["CH$NAME"]][[1]]
+        id <- sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))
+        molfilename <- paste(id,".mol",sep='')
+        return(c(name,molfilename))
+    }))
+    
+    IDs <- sapply(compiled, function(entry) return( sprintf("%04d", as.numeric(entry[[1]][["COMMENT"]][[getOption("RMassBank")$annotations$internal_id_fieldname]][[1]]))))
+    level <- sapply(IDs, findLevel, compact=TRUE)
+    validentries <- which(level == "standard")
+    # Write the file with the 
+    write.table(tsvlist[validentries,], 
+                paste(getOption("RMassBank")$annotations$entry_prefix,"/moldata/list.tsv", sep=''),
+                quote = FALSE,
+                sep="\t",
+                row.names=FALSE,
+                col.names=FALSE
+    )
 }
 
 
diff --git a/R/formulaCalculator.R b/R/formulaCalculator.R
index 26cb54d685680aec191042ecfc26c2ba69d4f3d9..5b9257475ff0b90c116da040d69063bd73043623 100755
--- a/R/formulaCalculator.R
+++ b/R/formulaCalculator.R
@@ -52,12 +52,12 @@ NULL
 #' @export
 to.limits.rcdk <- function(formula)
 {
-  if(!is.list(formula))
-    formula <- formulastring.to.list(formula)
-  elelist <- lapply(names(formula), function(element) {
-    return(c(element, 0, formula[[element]]))
-  })
-  return(elelist)
+    if(!is.list(formula))
+        formula <- formulastring.to.list(formula)
+    elelist <- lapply(names(formula), function(element) {
+                    return(c(element, 0, formula[[element]]))
+                })
+    return(elelist)
 }
 
 
@@ -342,3 +342,15 @@ ppm <- function(mass, dppm, l=FALSE, p=FALSE)
 .emass <- 0.0005485799
 ## pmass <- 1.007276565
 ## hmass <- 1.007825
+
+
+split.formula.posneg <- function(f, as.formula = TRUE, as.list=FALSE)
+{
+	if(!is.list(f)) f <- formulastring.to.list(f)
+	pos <- f[which(f > 0)]
+	neg <- f[which(f < 0)]
+	if(as.formula & !as.list)
+		return(list(pos=list.to.formula(pos), neg=list.to.formula(neg)))
+	else
+		return(list(pos=pos, neg=neg))
+}
\ No newline at end of file
diff --git a/R/getSplash.R b/R/getSplash.R
new file mode 100644
index 0000000000000000000000000000000000000000..a3299c8201c9bc4452125c70d67aebfbe51dcfb9
--- /dev/null
+++ b/R/getSplash.R
@@ -0,0 +1,66 @@
+
+## Some constants
+BINS <- 10;
+BIN_SIZE <- 100;
+EPS_CORRECTION = 1.0e-7
+
+## FINAL_SCALE_FACTOR <- 9; ## Base 10
+FINAL_SCALE_FACTOR <- 35; ## Base 36 
+
+decimalavoidance <- function(x) {
+    format(floor (x * 1000000), scientific=FALSE)
+}
+
+getBlockHash <- function(peaks,
+                          RELATIVE_INTENSITY_SCALE=100,
+                          MAX_HASH_CHARATERS_ENCODED_SPECTRUM=20) {
+    max_intensity = max(peaks[,2])
+
+    ## Scale to maximum intensity
+    peaks[,2] <-    as.integer(peaks[,2] / max(peaks[,2]) * RELATIVE_INTENSITY_SCALE + EPS_CORRECTION)
+
+    ## Sorted by ascending m/z and ties broken by descending intensity
+    o <- order(peaks[,1], -1*peaks[,2], decreasing=FALSE)
+    
+    peakString <- paste(apply(peaks[o,,drop=FALSE], MARGIN=1,
+                              FUN=function(x) {paste(c(decimalavoidance(x[1]+EPS_CORRECTION), x[2]),
+                                  collapse=":")}), collapse=" ")
+
+    ## cat("Prehash: ", peakString, sep="", file="/tmp/prehash-R") ## Debugging of spectrum-string
+    block2 <- substr(digest(peakString, algo="sha256", serialize=FALSE),
+                     1, MAX_HASH_CHARATERS_ENCODED_SPECTRUM)    
+}
+
+integer2base36 <- function(i) {
+    integer2base36code <- sapply(c(48:57, 97:122), function(i) rawToChar(as.raw(i)))
+    paste(integer2base36code[i+1], collapse="")
+}
+
+getBlockHist <- function(peaks) {
+    ## Initialise output
+    wrappedhist <- integer(BINS)
+
+    binindex <- as.integer(peaks[,1] / BIN_SIZE) 
+
+    summedintensities <- tapply(peaks[,2], binindex, sum)
+    wrappedbinindex <- unique(binindex) %% BINS 
+    wrappedintensities <- tapply(summedintensities, wrappedbinindex, sum)
+    normalisedintensities <- as.integer(wrappedintensities/max(wrappedintensities)*FINAL_SCALE_FACTOR)
+    
+    wrappedhist[sort(unique(wrappedbinindex))+1] <- normalisedintensities
+    paste(integer2base36(wrappedhist), collapse="")
+  
+}
+
+getSplash <- function(peaks) {
+
+    block2 <- getBlockHist(peaks)
+    block3 <- getBlockHash(peaks)
+
+    splash <- paste("splash10",
+                    block2,
+                    block3,
+                    sep="-")
+
+    return(splash)
+}
diff --git a/R/leCsvAccess.R b/R/leCsvAccess.R
index bbe5fa5e25217c9a571cd1a4b1e078d8e2936966..7af0405d34992f3e273ad7f6ba9af540ed54c9a8 100755
--- a/R/leCsvAccess.R
+++ b/R/leCsvAccess.R
@@ -37,46 +37,190 @@ loadList <- function(path, listEnv = NULL, check = TRUE)
 		listEnv <- .listEnvEnv
 	if(!file.exists(path))
 		stop("The supplied file does not exist, please supply a correct path")
-	compoundList <- read.csv(path, stringsAsFactors=FALSE)
-	# check whether the necessary columns are present
+        
+    # Try out if the file is comma- or semicolon-separated
+    compoundList <- read.csv(path, stringsAsFactors=FALSE, check.names=FALSE)
 	n <- colnames(compoundList)
-	cols <- c('ID', 'Name', 'SMILES', 'RT', 'CAS')
-	d <- setdiff(cols, n)
-	if(length(d)>0){
-		compoundList <- read.csv2(path, stringsAsFactors=FALSE)
-		n <- colnames(compoundList)
-		d2 <- setdiff(cols, n)
-		if(length(d2) > 0){
-			stop("Some columns are missing in the compound list. Needs at least ID, Name, SMILES, RT, CAS.")
-		}
-	}
+    if(!("ID" %in% n)){ # If no ID column, it must be semicolon separated
+        compoundList <- read.csv2(path, stringsAsFactors=FALSE, check.names=FALSE)
+        n <- colnames(compoundList)
+        if(!("ID" %in% n)){ # ...or there must be something wrong with the column names
+             stop("There is no 'ID' column in the compound list")
+        }
+    }
+    
+    # Now everything should be fine, at least regarding csv and ssv
+    
+    # Are there duplicate compound IDs? 
+    if(any(duplicated(compoundList$ID))){
+        stop("Duplicate compound IDs are present. Please check the compound list.")
+    }
+    
+    # Do all Compoundlist IDs have 4 characters or less?
+    if(any(nchar(compoundList$ID) > 4)){
+        stop("The maximum number of digits for compound IDs is 4")
+    }
+    
+    # Evaluate if all strictly necessary columns are in the list
+    cols <- c('ID', 'Name', 'SMILES', 'RT', 'CAS')
+    d <- setdiff(cols, n)
+    if(length(d)>0){
+        stop(paste("Some columns are missing in the compound list. It needs at least", paste(cols,collapse=", ")))
+    }
+    
+    # Is there a "Level" column?
+    if("Level" %in% n){
+        newList <- TRUE
+    } else {
+        newList <- FALSE
+    }
+    
+    # Assign for now...
+    assign("listEnv", listEnv, envir=.listEnvEnv) 
+    .listEnvEnv$listEnv$compoundList <- compoundList
+    # If "level" is in the compound list we have to check several things:
+    
+    if(newList){
+    
+        # a) Are the levels part of the defined levels?
+        # b) Are the values ok for every level? (i.e. all necessary values supplied for each line in the compound list?)
+        
+        # Check a) (and translate to number levels because handling numbers and text would make the if-statements here even more confusing)
+        
+        compoundList$Level <- sapply(as.character(compoundList$Level),.translateLevel)
+        
+        # Need this variable for levels 3b, 3c, 3d, 4 and potentially 3 and 3a
+        formulaColPresent <- "Formula" %in% n
+        
+        if(any(c("3b","3c","3d","4") %in% compoundList$Level)){
+    
+            if(!(formulaColPresent)){
+                .listEnvEnv$listEnv$compoundList <- NULL
+                stop('The compound list must contain the column "Formula" if a tentative or formula compound (levels 3b, 3c, 3d, 4) is present')
+            }
+        }
+        # Need this variable for level 5
+        if("5" %in% compoundList$Level){
+            mzCol <- which(c("mz","m/z","mass","exactMass") %in% n)
+            if(length(mzCol) > 1){
+                .listEnvEnv$listEnv$compoundList <- NULL
+                stop("The compound list can only contain one column with one of the following column names: 'mz','m/z','mass','exactMass' if an unknown compound (level 5) is present")
+            }
+        
+            if(mzCol == 2){
+                    n[which(n == "m/z")] <- "mz"
+            }
+            
+            if(mzCol == 4){
+                    n[which(n == "exactMass")] <- "mass"
+            }
+            .listEnvEnv$listEnv$mzCol <- c("mz","m/z","mass","exactMass")[mzCol]
+            colnames(compoundList) <- n
+        }
+        # Check b) 
+        for(i in 1:length(compoundList$Level)){
+            level <- compoundList$Level[i] 
+            # ID must have numbers as values
+            if(!is.numeric(compoundList[i,"ID"])){
+                .listEnvEnv$listEnv$compoundList <- NULL
+                stop(paste("Value for 'ID' in line", i, "of the compound list is not a valid compound ID"))
+            }
+            
+            # If level is "1x" or "2x", the SMILES must be supplied and must be correct
+            if(level %in% c("0","1","1a","1b","1c","2","2a","2b")){
+                currEnvir <- environment()
+                
+                tryCatch(
+                    findMz(compoundList[i,"ID"]),
+                    error = function(e){
+                        .listEnvEnv$listEnv$compoundList <- NULL
+                        stop(paste("'SMILES' value for compound", compoundList[i,"ID"] ,"in line", i, "of the compound list is not a valid SMILES"))
+                    }
+                )
+            }
+            
+            # If level is "3" or "3a", a valid smiles or formula must be supplied
+            if(level %in% c("3","3a")){
 
+                if(!is.na(findSmiles(compoundList[i,"ID"]))){
+                    tryCatch(
+                        findMz(compoundList[i,"ID"]),
+                        error = function(e){
+                            .listEnvEnv$listEnv$compoundList <- NULL
+                            stop(paste("'SMILES' value for compound", compoundList[i,"ID"] ,"in line", i, "of the compound list is not a valid SMILES"))
+                        }
+                    )
+                } else{
+                    if(!formulaColPresent){
+                        .listEnvEnv$listEnv$compoundList <- NULL
+                        stop(paste("Compound", compoundList[i,"ID"], "in line", i, "of the compound list is marked as level 3, but there is no valid SMILES
+                            value nor a 'Formula' column present"))
+                    }
+                    
+                    tryCatch(
+                        rcdk::get.formula(findFormula(compoundList[i,"ID"], retrieval="tentative")),
+                        error = function(e){
+                            .listEnvEnv$listEnv$compoundList <- NULL
+                            stop(paste("'Formula' value for compound", compoundList[i,"ID"] ,"in line", i, "of the compound list is not a valid Formula"))
+                        }
+                    )
+                }
+            }
+            
+            # If level is "3b","3c","3d" or "4", a valid formula must be supplied
+            if(level %in% c("3b","3c","3d","4")){
+                
+                tryCatch(
+                    rcdk::get.formula(findFormula(compoundList[i,"ID"], retrieval="tentative")),
+                    error = function(e){
+                        .listEnvEnv$listEnv$compoundList <- NULL
+                        stop(paste("'Formula' value for compound", compoundList[i,"ID"] ,"in line", i, "of the compound list is not a valid Formula"))
+                    }
+                )
+            }
+            # If level is "5", m/z must be supplied
+           
+            if(level == "5"){
+                if(!is.numeric(findMz(compoundList[i,"ID"], retrieval="unknown")$mzCenter)){
+                    .listEnvEnv$listEnv$compoundList <- NULL
+                    stop(paste(c("mz","m/z","mass","exactMass")[mzCol],"value for compound", compoundList[i,"ID"] ,"in line", i, "of the compound list is not a valid number"))
+                }
+            }
+        }
+    }
+    
+    # If "Level" is not in the compound list it MUST be a standard list, so process just as before:
+    if(!newList){
+        cols <- c('ID', 'Name', 'SMILES', 'RT', 'CAS')
+        d <- setdiff(cols, n)
+        if(length(d)>0){
+            stop("Some columns are missing in the compound list. Needs at least ID, Name, SMILES, RT, CAS.")
+        }
 	
-	if(length(duplicated(compoundList$cpdID)) > 0)
-		stop("Duplicate compound IDs are present. Please check your list.")
-	assign("listEnv", listEnv, envir=.listEnvEnv) 
-	.listEnvEnv$listEnv$compoundList <- compoundList
-	
-	
-	###
-	###Test if all the IDs work
-	###
-	
-	if(check){
-		wrongID <- vector()
-		currEnvir <- environment()
-		sapply(compoundList[,"ID"], function(x){
-			tryCatch(
-				findMz(x),
-				error = function(e){
-					currEnvir$wrongID <- c(currEnvir$wrongID, x)
-				}
-			)
-		})
-		if(length(wrongID)){
-			stop(paste("Unable to interpret the SMILES-strings for ID(s)", paste(wrongID, collapse= " "), "\nPlease check these and load the list again."))
-		}
-	}
+        ###
+        ###Test if all the IDs work
+        ###
+        
+        if(check){
+            wrongID <- vector()
+            currEnvir <- environment()
+            sapply(compoundList[,"ID"], function(x){
+                tryCatch(
+                    findMz(x),
+                    error = function(e){
+                        currEnvir$wrongID <- c(currEnvir$wrongID, x)
+                    }
+                )
+            })
+            if(length(wrongID)){
+                .listEnvEnv$listEnv$compoundList <- NULL
+                stop(paste("Unable to interpret the SMILES-strings for ID(s)", paste(wrongID, collapse= " "), "\nPlease check these and load the list again."))
+            }
+        }
+        Level <- rep("0",nrow(compoundList))
+        .listEnvEnv$listEnv$compoundList <- cbind(compoundList,Level)
+    }
+    message("Loaded compoundlist successfully")
 }
 
 #' @export
@@ -90,6 +234,61 @@ resetList <- function()
 	assign("listEnv", NULL, envir=.listEnvEnv)
 }
 
+# Function that translates one entry of the level column of the compound list into the number system
+.translateLevel <- function(level){
+    if(!(level %in% c("0","1","1a","1b","1c","2","2a","2b","3","3a","3b","3c","3d","4","5"))){
+        switch(level,
+            standard={
+                return("1a")
+            },
+            parent={
+                return("1b")
+            },
+            confirmed={
+                return("1c")
+            },
+            probable={
+                return("2")
+            },
+            probableLibrary={
+                return("2a")
+            },
+            probableDiagnostic={
+                return("2b")
+            },
+            tentative={
+                return("3")
+            },
+            tentativeStructure={
+                return("3a")
+            },
+            tentativeIsomer={
+                return("3b")
+            },
+            tentativeTPClass={
+                return("3c")
+            },
+            tentativeBestMatch={
+                return("3d")
+            },
+            formula={
+                return("4")
+            },
+            unknown={
+                return("5")
+            },
+            exactMass={
+                return("5")
+            },
+            {
+                stop(paste(level, "is not a valid level"))
+            }
+        )
+    } else{
+        return(level)
+    }
+}
+
 #' Create Rcdk molecule from SMILES
 #' 
 #' Generates a Rcdk molecule object from SMILES code, which is fully typed and
@@ -125,8 +324,6 @@ getMolecule <- function(smiles)
   return(mol)
 }
 
-
-
 #' Find the exact mass +/- a given margin for a given formula or its ions and adducts.
 #' 
 #' @param formula The molecular formula  in text or list format (see \code{\link{formulastring.to.list}}
@@ -139,25 +336,50 @@ getMolecule <- function(smiles)
 #' @examples findMz.formula("C6H6")
 #' @seealso \code{\link{findMz}}
 #' @export
-findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0)
+findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0) 
 {
-	if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
-	mzopt <- list(addition="", charge=0)
-	if(mode=="pH") mzopt <- list(addition="H", charge=1)
-	if(mode=="pNa") mzopt <- list(addition="Na", charge=1)
-	if(mode=="pM") mzopt <- list(addition="", charge=1)
-	if(mode=="pNH4") mzopt <- list(addition="NH4", charge=-1)
-	if(mode=="mH") mzopt <- list(addition="H-1", charge=-1)
-	if(mode=="mFA") mzopt <- list(addition="C1O2", charge=-1)
-	if(mode=="mM") mzopt <- list(addition="", charge=-1)
-	if(mode=="") mzopt <- list(addition="", charge=0)
-	
-	
+	if (!any(mode %in% c("pH", "pNa", "pM", "pNH4", "mH", "mFA", 
+					"mM", ""))) 
+		stop(paste("The ionization mode", mode, "is unknown."))
+	mzopt <- list(addition = "", charge = 0)
+	if (mode == "pH") 
+		mzopt <- list(addition = "H", charge = 1)
+	if (mode == "pNa") 
+		mzopt <- list(addition = "Na", charge = 1)
+	if (mode == "pM") 
+		mzopt <- list(addition = "", charge = 1)
+	if (mode == "pNH4") 
+		mzopt <- list(addition = "NH4", charge = -1)
+	if (mode == "mH") 
+		mzopt <- list(addition = "H-1", charge = -1)
+	if (mode == "mFA") 
+		mzopt <- list(addition = "C1O2", charge = -1)
+	if (mode == "mM") 
+		mzopt <- list(addition = "", charge = -1)
+	if (mode == "") 
+		mzopt <- list(addition = "", charge = 0)
 	formula <- add.formula(formula, mzopt$addition)
-	formula <- get.formula(formula, charge=mzopt$charge)
-	m <- formula@mass
-	delta <- ppm(m, ppm, l=TRUE)
-	return(list(mzMin=delta[[2]] - deltaMz, mzMax=delta[[1]] + deltaMz, mzCenter=m))
+	# Since in special cases we want to use this with negative and zero number of atoms, we account for this case
+	# by splitting up the formula into positive and negative atom counts (this eliminates the zeroes.)
+	formula.split <- split.formula.posneg(formula)
+	m <- 0
+	if(formula.split$pos != "")
+	{
+		formula.pos <- get.formula(formula.split$pos, charge = mzopt$charge)
+		m = m + formula.pos@mass
+	}
+	if(formula.split$neg != "")
+	{
+		formula.neg <- get.formula(formula.split$neg, charge = -mzopt$charge)
+		m = m - formula.neg@mass
+	}
+	if((nchar(formula.split$pos)==0) & (nchar(formula.split$neg)==0))
+	{
+		m <- get.formula("H", charge = mzopt$charge)@mass - get.formula("H", charge = 0)@mass
+	}
+	delta <- ppm(m, ppm, l = TRUE)
+	return(list(mzMin = delta[[2]] - deltaMz, mzMax = delta[[1]] + 
+							deltaMz, mzCenter = m))
 }
 
 #' Find compound information
@@ -165,18 +387,20 @@ findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0)
 #' Retrieves compound information from the loaded compound list or calculates
 #' it from the SMILES code in the list.
 #' 
-#' @aliases findMz findSmiles findFormula findRt findCAS findName
-#' @usage  findMz(cpdID, mode = "pH", ppm = 10, deltaMz = 0)
+#' @aliases findMz findSmiles findFormula findRt findCAS findName findLevel
+#' @usage  findMz(cpdID, mode = "pH", ppm = 10, deltaMz = 0, retrieval="standard")
 #' 
 #' findRt(cpdID) 
 #' 
 #' findSmiles(cpdID) 
 #' 
-#' findFormula(cpdID) 
+#' findFormula(cpdID, retrieval="standard") 
 #' 
 #' findCAS(cpdID)
 #' 
 #' findName(cpdID)
+#'
+#' findLevel(cpdID, compact=FALSE)
 #' @param cpdID The compound ID in the compound list.
 #' @param mode Specifies the species of the molecule: An empty string specifies
 #' uncharged monoisotopic mass, \code{\var{pH}} (positive H) specifies [M+H]+,
@@ -189,6 +413,11 @@ findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0)
 #' @param deltaMz Specifies additional m/z window to add to the range (deltaMz
 #' = 0.02 will return the range of the molecular mass +- 0.02 (and additionally
 #' +- the set ppm value).
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @param compact Only for \code{findLevel}, returns the "retrieval" parameter used for many functions 
+#' within RMassBank if TRUE
 #' @return \code{findMz} will return a \code{list(mzCenter=, mzMin=, mzMax=)}
 #' with the molecular weight of the given ion, as calculated from the SMILES
 #' code and Rcdk.
@@ -207,16 +436,31 @@ findMz.formula <- function(formula, mode="pH", ppm=10, deltaMz=0)
 #' }
 #' 
 #' @export
-findMz <- function(cpdID, mode="pH", ppm=10, deltaMz=0)
+findMz <- function(cpdID, mode="pH", ppm=10, deltaMz=0, retrieval="standard")
 {
-  s <- findSmiles(cpdID)
-  #if(s=="-") s <- NA
-  if(is.na(s)){
-	stop("There was no matching SMILES-entry to the supplied cpdID(s) \n Please check the cpdIDs and the compoundlist.")
-	return(list(mzMin=NA,mzMax=NA,mzCenter=NA))
-  }
-  formula <- .get.mol2formula(getMolecule(s))
-  return(findMz.formula(formula@string, mode, ppm, deltaMz))
+    # In case of unknown: m/z is in table
+    if(retrieval == "unknown"){
+        mz <- .listEnvEnv$listEnv$compoundList[which(.listEnvEnv$listEnv$compoundList$ID == cpdID),.listEnvEnv$listEnv$mzCol]
+        delta <- ppm(mz, ppm, l=TRUE)
+        return(list(mzMin=delta[2] - deltaMz, mzMax=delta[1] + deltaMz, mzCenter=mz))
+    } 
+    
+    # In case of tentative: calculate mass from formula
+    if(retrieval == "tentative"){
+        return(findMz.formula(findFormula(cpdID, "tentative"),mode=mode))
+    }
+    
+    # All other cases: Use smiles to calculate mass
+    if(retrieval == "standard"){
+        s <- findSmiles(cpdID)
+        #if(s=="-") s <- NA
+        if(is.na(s)){
+            stop("There was no matching SMILES-entry to the supplied cpdID(s) \n  Please check the cpdIDs and the compoundlist.")
+            return(list(mzMin=NA,mzMax=NA,mzCenter=NA))
+        }
+        formula <- .get.mol2formula(getMolecule(s))
+        return(findMz.formula(formula@string, mode, ppm, deltaMz))
+    }
 }
 
 #findMz <- function(...)	findMz.rcdk(...)
@@ -228,7 +472,7 @@ findRt <- function(cpdID) {
 		stop("Compound list must be loaded first.")
 	if(is.character(cpdID))
 		cpdID <- as.numeric(cpdID)
-	rt <- .listEnvEnv$listEnv$compoundList[which(.listEnvEnv$listEnv$compoundList$ID == cpdID),"RT"]
+	rt <- as.numeric(.listEnvEnv$listEnv$compoundList[which(.listEnvEnv$listEnv$compoundList$ID == cpdID),"RT"])
 	if(!is.null(getOption("RMassBank")$rtShift))
 		rt <- rt + getOption("RMassBank")$rtShift
 	if(is.na(rt)) return(list(RT=NA))
@@ -243,15 +487,26 @@ findSmiles <- function(cpdID) {
 		stop("Compound list must be loaded first.")
 	if(!exists("compoundList", where=.listEnvEnv$listEnv))
 		stop("Compound list must be loaded first.")
+    if(.listEnvEnv$listEnv$compoundList[match(cpdID, .listEnvEnv$listEnv$compoundList$ID),"SMILES"] == "")
+        return(NA)
 	return(.listEnvEnv$listEnv$compoundList[match(cpdID, .listEnvEnv$listEnv$compoundList$ID),"SMILES"])
 }
 
 #' @export
-findFormula <- function(cpdID) {
-  smiles <- findSmiles(cpdID)
-  mol <- getMolecule(smiles)
-  f <- .get.mol2formula(mol)
-  return(f@string)
+findFormula <- function(cpdID, retrieval = "standard") {
+    
+    # In case of tentative: read formula from table
+    if(retrieval=="tentative"){
+        return(.listEnvEnv$listEnv$compoundList[which(.listEnvEnv$listEnv$compoundList$ID == cpdID),"Formula"])
+    }
+    
+    # Otherwise: Convert smiles to formula
+    if(retrieval=="standard"){
+        smiles <- findSmiles(cpdID)
+        mol <- getMolecule(smiles)
+        f <- .get.mol2formula(mol)
+        return(f@string)
+    }
 }
 
 #' @export
@@ -276,6 +531,31 @@ findName <- function(cpdID) {
 	return(.listEnvEnv$listEnv$compoundList[which(.listEnvEnv$listEnv$compoundList$ID == cpdID),"Name"])
 }
 
+#' @export
+findLevel <- function(cpdID, compact=FALSE) {
+	if(is.character(cpdID))
+		cpdID <- as.numeric(cpdID)
+	if(is.null(.listEnvEnv$listEnv))
+		stop("Compound list must be loaded first.")
+	if(!exists("compoundList", where=.listEnvEnv$listEnv))
+		stop("Compound list must be loaded first.")
+    if(compact){
+        level <- .listEnvEnv$listEnv$compoundList[which(.listEnvEnv$listEnv$compoundList$ID == cpdID),"Level"]
+        if(level %in% c("0","1","1a","1b","1c","2","2a","2b","3","3a")){
+            return("standard")
+        }
+        if(level %in% c("3b","3c","3d","4")){
+            return("tentative")
+        }
+        if(level == "5"){
+            return("unknown")
+        }
+        
+        return("tentative")
+    }
+	return(.listEnvEnv$listEnv$compoundList[which(.listEnvEnv$listEnv$compoundList$ID == cpdID),"Level"])
+}
+
 #' Calculate exact mass
 #' 
 #' Retrieves the exact mass of the uncharged molecule. It works directly from
@@ -284,9 +564,14 @@ findName <- function(cpdID) {
 #' SMILES code retrieved from the database. (Alternatively, takes also the
 #' compound ID as parameter and looks it up.) Calculation relies on Rcdk.
 #' 
-#' @usage findMass(cpdID_or_smiles)
 #' @param cpdID_or_smiles SMILES code or compound ID of the molecule. (Numerics
 #' are treated as compound ID).
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
+#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+#' 			([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+#' Only needed for retrieval="unknown"
 #' @return  Returns the exact mass of the uncharged molecule.
 #' @author Michael Stravs
 #' @seealso \code{\link{findMz}}
@@ -296,12 +581,49 @@ findName <- function(cpdID) {
 #' findMass("OC[C@@H]1OC(O)[C@@H](O)[C@@@@H](O)[C@@@@H]1O")
 #' 
 #' @export
-findMass <- function(cpdID_or_smiles)
+findMass <- function(cpdID_or_smiles, retrieval="standard", mode = "pH")
 {
-	if(!is.numeric(cpdID_or_smiles))
-		s <- cpdID_or_smiles
-	else
-		s <- findSmiles(cpdID_or_smiles)
-	mol <- getMolecule(s)
-	return(get.exact.mass(mol))
+    # Must calculate mass manually if no formula is given
+    if(retrieval == "unknown"){
+        if(mode == "pH") {
+            mass <- 1.00784
+            mode.charge <- 1
+        } else if(mode == "pNa") {
+            mass <- 22.989769
+            mode.charge <- 1
+        } else if(mode == "pM") {
+            mass <- 0
+            mode.charge <- 1
+        } else if(mode == "mM") {
+            mass <- 0
+            mode.charge <- -1
+        } else if(mode == "mH") {
+            mass <- -1.00784
+            mode.charge <- -1
+        } else if(mode == "mFA") {
+            mass <- 59.0440
+            mode.charge <- -1
+        } else if(mode == "pNH4") {
+            mass <- 18.03846
+            mode.charge <- 1
+        } else{
+          stop("mode = \"", mode, "\" not defined")
+        }
+        return(findMz(cpdID_or_smiles, mode=mode, retrieval=retrieval)$mzCenter - mass + mode.charge * .emass)
+    }
+    
+    # Calculate mass from formula
+    if(retrieval == "tentative"){
+        return(get.formula(findFormula(cpdID_or_smiles, "tentative"))@mass)
+    }
+    
+    # Calculate mass from SMILES
+    if(retrieval == "standard"){
+        if(!is.numeric(cpdID_or_smiles))
+            s <- cpdID_or_smiles
+        else
+            s <- findSmiles(cpdID_or_smiles)
+        mol <- getMolecule(s)
+        return(get.exact.mass(mol))
+    }
 }
\ No newline at end of file
diff --git a/R/leMsMs.r b/R/leMsMs.r
index 9e85bccf5629b42c2a5e488135aac7bda79d53cc..3590b7db354244f6c5728e704e6170398e2e6218 100755
--- a/R/leMsMs.r
+++ b/R/leMsMs.r
@@ -79,164 +79,205 @@ msmsWorkflow <- function(w, mode="pH", steps=c(1:8), confirmMode = FALSE, newRec
 		progressbar = "progressBarHook", MSe = FALSE)
 {
     .checkMbSettings()
-  if(!any(mode %in% c("pH","pNa","pNH4","pM","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
-  
-  if(!is.na(archivename))
-	  w@archivename <- archivename
-  
-  # Make a progress bar:
-  nProg <- 0
-  nLen <- length(w@files)
+    if(!any(mode %in% c("pH","pNa","pNH4","pM","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
+
+    if(!is.na(archivename))
+        w@archivename <- archivename
   
-  if(readMethod == "minimal"){
-	##Edit options
-	opt <- getOption("RMassBank")
-	opt$recalibrator$MS1 <- "recalibrate.identity"
-	opt$recalibrator$MS2 <- "recalibrate.identity"
-	options(RMassBank=opt)
-	##Edit analyzemethod
-	analyzeMethod <- "intensity"
-  }
+    # Make a progress bar:
+    nProg <- 0
+    nLen <- length(w@files)
+    
+    allUnknown <- FALSE
+    
+    # If all compounds are unknown some specific conditions apply
+    if(all(.listEnvEnv$listEnv$compoundList$Level == "5")){
+        allUnknown <- TRUE
+        message("All compounds are unknown, the workflow will be adjusted accordingly")
+    }
+    
+    if(readMethod == "minimal"){
+        ##Edit options
+        opt <- getOption("RMassBank")
+        opt$recalibrator$MS1 <- "recalibrate.identity"
+        opt$recalibrator$MS2 <- "recalibrate.identity"
+        opt$add_annotation <- FALSE
+        opt$multiplicityFilter <- 1
+        options(RMassBank=opt)
+        settings <- getOption("RMassBank")
+        ##Edit analyzemethod
+        analyzeMethod <- "intensity"
+    }
 
-  # clean rerun functionality:
-  # if any step after 3 has been run, rerunning steps 4 or below needs moving back to the parent workspace.
-  # However, the recalibration must be preserved, because:
-  # if someone runs       
-  # w <- msmsWorkflow(w, steps=c(1:4)),
-  # then substitutes the recalibration
-  # w@rc <- myrecal
-  # then runs step 4 again
-  # w <- msmsWorkflow(w, steps=c(4), newRecalibration=FALSE)
-  # the rc and rc.ms1 must be preserved and not taken from the parent workspace
-  if(!all(steps > 4) & !is.null(w@parent))
-  {
-    rc <- w@rc
-    rc.ms1 <- w@rc.ms1
-    w <- w@parent
-    w@rc <- rc
-    w@rc.ms1 <- rc.ms1
-  }
+    # clean rerun functionality:
+    # if any step after 3 has been run, rerunning steps 4 or below needs moving back to the parent workspace.
+    # However, the recalibration must be preserved, because:
+    # if someone runs       
+    # w <- msmsWorkflow(w, steps=c(1:4)),
+    # then substitutes the recalibration
+    # w@rc <- myrecal
+    # then runs step 4 again
+    # w <- msmsWorkflow(w, steps=c(4), newRecalibration=FALSE)
+    # the rc and rc.ms1 must be preserved and not taken from the parent workspace
+    if(!all(steps > 4) & !is.null(w@parent))
+    {
+        rc <- w@rc
+        rc.ms1 <- w@rc.ms1
+        w <- w@parent
+        w@rc <- rc
+        w@rc.ms1 <- rc.ms1
+    }
   
-  # Step 1: acquire all MSMS spectra from files
-  if(1 %in% steps)
-  {
-    message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files")
-	w <- msmsRead(w = w, files = w@files, readMethod=readMethod, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, 
-					Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe)
-  }
-  # Step 2: first run analysis before recalibration
-  if(2 %in% steps)
-  {
-	  nProg <- 0
-	  message("msmsWorkflow: Step 2. First analysis pre recalibration")
-	  pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
-	  w@spectra <- as(lapply(w@spectra, function(spec) {
-				  #print(spec$id)
-				  s <- analyzeMsMs(spec, mode=mode, detail=TRUE, run="preliminary",
-						  filterSettings = settings$filterSettings,
-						  spectraList = settings$spectraList, method = analyzeMethod)
-				  # Progress:
-				  nProg <<- nProg + 1
-				  pb <- do.call(progressbar, list(object=pb, value= nProg))
-				  
-				  return(s)
-			  }), "SimpleList")
-	## for(f in w@files)
-	## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
-	  suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
-  }
-  # Step 3: aggregate all spectra
-  if(3 %in% steps)
-  {
-	message("msmsWorkflow: Step 3. Aggregate all spectra")
-    w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
-  }
-  # Step 4: recalibrate all m/z values in raw spectra
-  if(4 %in% steps)
-  {
-	message("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra")
-	if(newRecalibration)
-	{
-		# note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra
-		recal <- makeRecalibration(w, mode,
-				recalibrateBy = settings$recalibrateBy,
-				recalibrateMS1 = settings$recalibrateMS1,
-				recalibrator = settings$recalibrator,
-				recalibrateMS1Window = settings$recalibrateMS1Window)
-		w@rc <- recal$rc
-		w@rc.ms1 <- recal$rc.ms1
-	}
-	w@parent <- w
-	w@aggregated <- data.frame()
-    spectra <- recalibrateSpectra(mode, w@spectra, w = w,
-			recalibrateBy = settings$recalibrateBy,
-			recalibrateMS1 = settings$recalibrateMS1)
-	w@spectra <- spectra
-  }
-  # Step 5: re-analysis on recalibrated spectra
-  if(5 %in% steps)
-  {
-	nProg <- 0
-	message("msmsWorkflow: Step 5. Reanalyze recalibrated spectra")
-	pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
-	
-	w@spectra <- as(lapply(w@spectra, function(spec) {
-						#print(spec$id)
-						s <- analyzeMsMs(spec, mode=mode, detail=TRUE, run="recalibrated",
-								filterSettings = settings$filterSettings,
-								spectraList = settings$spectraList, method = analyzeMethod)
-						# Progress:
-						nProg <<- nProg + 1
-						pb <- do.call(progressbar, list(object=pb, value= nProg))
-						
-						return(s)
-					}), "SimpleList")
-	## for(f in w@files)
-	## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
-	suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
-	
-  do.call(progressbar, list(object=pb, close=TRUE))
-  }
-  # Step 6: aggregate recalibrated results
-  if(6 %in% steps)
-  {
-    message("msmsWorkflow: Step 6. Aggregate recalibrated results")
-    w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
-    if(!is.na(archivename))
-      archiveResults(w, paste(archivename, ".RData", sep=''), settings)
-    w@aggregated <- cleanElnoise(w@aggregated,
-			settings$electronicNoise, settings$electronicNoiseWidth)
-  }
-  # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks
-  if(7 %in% steps)
-  {
-	message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O")
-    w@aggregated <- reanalyzeFailpeaks(
-			w@aggregated, custom_additions="N2O", mode=mode,
-				filterSettings=settings$filterSettings,
-				progressbar=progressbar)
-    if(!is.na(archivename))
-      archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
-  }
-  # Step 8: heuristic filtering based on peak multiplicity;
-  #         creation of failpeak list
-  if(8 %in% steps)
-  {
-	message("msmsWorkflow: Step 8. Peak multiplicity filtering")
-    if (is.null(settings$multiplicityFilter)) {
-      message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
-    } else {
-      # apply heuristic filter      
-      w@aggregated <- filterMultiplicity(
-			w, archivename, mode, settings$multiplicityFilter )
-	  w@aggregated <- processProblematicPeaks(w, mode, archivename)
-
-      if(!is.na(archivename))
-        archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings)   
+    # Step 1: acquire all MSMS spectra from files
+    if(1 %in% steps)
+    {
+        message("msmsWorkflow: Step 1. Acquire all MSMS spectra from files")
+        w <- msmsRead(w = w, files = w@files, readMethod=readMethod, mode=mode, confirmMode = confirmMode, useRtLimit = useRtLimit, 
+                        Args = findPeaksArgs, settings = settings, progressbar = progressbar, MSe = MSe)
     }
-  }
-  message("msmsWorkflow: Done.")
-  return(w)
+    # Step 2: first run analysis before recalibration
+    if(2 %in% steps)
+    {
+        nProg <- 0
+        message("msmsWorkflow: Step 2. First analysis pre recalibration")
+        if(allUnknown){
+            analyzeMethod <- "intensity"
+        }
+        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+        w@spectra <- as(lapply(w@spectra, function(spec) {
+                        #print(spec$id)
+                        # if(findLevel(spec@id,TRUE) == "unknown"){
+                            # analyzeMethod <- "intensity"
+                        # } else {
+                            # analyzeMethod <- "formula"
+                        # }
+                        s <- analyzeMsMs(spec, mode=mode, detail=TRUE, run="preliminary",
+                              filterSettings = settings$filterSettings,
+                              spectraList = settings$spectraList, method = analyzeMethod)
+                        # Progress:
+                        nProg <<- nProg + 1
+                        pb <- do.call(progressbar, list(object=pb, value= nProg))
+
+                        return(s)
+        }), "SimpleList")
+        ## for(f in w@files)
+        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
+        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
+    }
+    # Step 3: aggregate all spectra
+    if(3 %in% steps)
+    {
+        message("msmsWorkflow: Step 3. Aggregate all spectra")
+        w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
+    }
+    
+    if(allUnknown){
+        w@aggregated$noise <- FALSE
+        w@aggregated$noise <- FALSE
+        w@aggregated$reanalyzed.formula <- NA
+        w@aggregated$reanalyzed.mzCalc <- NA
+        w@aggregated$reanalyzed.dppm <- NA
+        w@aggregated$reanalyzed.formulaCount <- NA
+        w@aggregated$reanalyzed.dbe <- NA
+        w@aggregated$matchedReanalysis <- NA
+        w@aggregated$filterOK <- TRUE
+        w@aggregated$problematicPeak <- FALSE
+        w@aggregated$formulaMultiplicity <- unlist(sapply(table(w@aggregated$cpdID),function(x) rep(x,x)))
+        return(w)
+    }
+    
+    
+    # Step 4: recalibrate all m/z values in raw spectra
+    if(4 %in% steps)
+    {
+        message("msmsWorkflow: Step 4. Recalibrate m/z values in raw spectra")
+        if(newRecalibration)
+        {
+            # note: makeRecalibration takes w as argument now, because it needs to get the MS1 spectra from @spectra
+            recal <- makeRecalibration(w, mode,
+                    recalibrateBy = settings$recalibrateBy,
+                    recalibrateMS1 = settings$recalibrateMS1,
+                    recalibrator = settings$recalibrator,
+                    recalibrateMS1Window = settings$recalibrateMS1Window)
+            w@rc <- recal$rc
+            w@rc.ms1 <- recal$rc.ms1
+        }
+        w@parent <- w
+        w@aggregated <- data.frame()
+        spectra <- recalibrateSpectra(mode, w@spectra, w = w,
+                recalibrateBy = settings$recalibrateBy,
+                recalibrateMS1 = settings$recalibrateMS1)
+        w@spectra <- spectra
+    }
+    # Step 5: re-analysis on recalibrated spectra
+    if(5 %in% steps)
+    {
+        nProg <- 0
+        message("msmsWorkflow: Step 5. Reanalyze recalibrated spectra")
+        pb <- do.call(progressbar, list(object=NULL, value=0, min=0, max=nLen))
+        
+        w@spectra <- as(lapply(w@spectra, function(spec) {
+                            #print(spec$id)
+                            if(findLevel(spec@id,TRUE) == "unknown"){
+                                analyzeMethod <- "intensity"
+                            } else {
+                                analyzeMethod <- "formula"
+                            }
+                            s <- analyzeMsMs(spec, mode=mode, detail=TRUE, run="recalibrated",
+                                    filterSettings = settings$filterSettings,
+                                    spectraList = settings$spectraList, method = analyzeMethod)
+                            # Progress:
+                            nProg <<- nProg + 1
+                            pb <- do.call(progressbar, list(object=pb, value= nProg))
+                            
+                            return(s)
+                        }), "SimpleList")
+        ## for(f in w@files)
+        ## w@spectra[[basename(as.character(f))]]@name <- basename(as.character(f))
+        suppressWarnings(do.call(progressbar, list(object=pb, close=TRUE)))
+	
+        do.call(progressbar, list(object=pb, close=TRUE))
+    }
+    # Step 6: aggregate recalibrated results
+    if(6 %in% steps)
+    {
+        message("msmsWorkflow: Step 6. Aggregate recalibrated results")
+        w@aggregated <- aggregateSpectra(w@spectra, addIncomplete=TRUE)
+        if(!is.na(archivename))
+          archiveResults(w, paste(archivename, ".RData", sep=''), settings)
+        w@aggregated <- cleanElnoise(w@aggregated,
+                settings$electronicNoise, settings$electronicNoiseWidth)
+    }
+    # Step 7: reanalyze failpeaks for (mono)oxidation and N2 adduct peaks
+    if(7 %in% steps)
+    {
+        message("msmsWorkflow: Step 7. Reanalyze fail peaks for N2 + O")
+        w@aggregated <- reanalyzeFailpeaks(
+                    w@aggregated, custom_additions="N2O", mode=mode,
+                    filterSettings=settings$filterSettings,
+                    progressbar=progressbar)
+        if(!is.na(archivename))
+        archiveResults(w, paste(archivename, "_RA.RData", sep=''), settings)
+    }
+    # Step 8: heuristic filtering based on peak multiplicity;
+    #         creation of failpeak list
+    if(8 %in% steps)
+    {
+        message("msmsWorkflow: Step 8. Peak multiplicity filtering")
+        if (is.null(settings$multiplicityFilter)) {
+          message("msmsWorkflow: Step 8. Peak multiplicity filtering skipped because multiplicityFilter parameter is not set.")
+        } else {
+            # apply heuristic filter      
+            w@aggregated <- filterMultiplicity(
+                w, archivename, mode, settings$multiplicityFilter)
+            w@aggregated <- processProblematicPeaks(w, mode, archivename)
+
+            if(!is.na(archivename))
+            archiveResults(w, paste(archivename, "_RF.RData", sep=''), settings)   
+        }
+    }
+    message("msmsWorkflow: Done.")
+    return(w)
 }
 
 #' Analyze MSMS spectra
@@ -564,7 +605,6 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 
 	limits <- to.limits.rcdk(parent_formula)
 	
-	
 	peakmatrix <- lapply(
 			split(shot,shot$row)
 			, function(shot.row)  {
@@ -641,14 +681,13 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 	#iff_rcdk_pM_eln$maxvalence <- unlist(lapply(diff_rcdk_pM_eln$formula.rcdk, maxvalence))
 	temp.child.ok <- (childPeaks$dbe >= filterSettings$dbeMinLimit) 
 		# & dbe < dbe_parent + 3)
-	
 	# check if a peak was recognized
 	if(length(which(temp.child.ok)) == 0)
 	{
 		child@ok <- FALSE
 		return(child)
 	}
-#browser()	
+    #browser()	
 	# find the best ppm value
     bestPpm <- aggregate(as.data.frame(childPeaks[!is.na(childPeaks$dppm),"dppm"]),
 					list(childPeaks[!is.na(childPeaks$dppm),"row"]),
@@ -668,14 +707,14 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
     childPeaksFilt <- filterLowaccResults(childPeaks, filterMode, filterSettings)
     childPeaksGood <- childPeaksFilt[["TRUE"]]
     childPeaksBad <- childPeaksFilt[["FALSE"]]
-	if(is.null(childPeaksGood))
+	if(is.null(childPeaksGood)){
 		childPeaksGood <- childPeaks[c(),,drop=FALSE]
+        childPeaksGood$good <- logical(0)
+    }
 	if(is.null(childPeaksBad))
 		childPeaksBad <- childPeaks[c(),,drop=FALSE]
 	childPeaksUnassigned <- childPeaks[is.na(childPeaks$dppm),,drop=FALSE]
 	childPeaksUnassigned$good <- rep(FALSE, nrow(childPeaksUnassigned))
-	
-	
     # count formulas within new limits
     # (the results of the "old" count stay in childPeaksInt and are returned
     # in $childPeaks)
@@ -686,9 +725,7 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 	childPeaksUnassigned$formulaCount <- rep(NA, nrow(childPeaksUnassigned))
 	childPeaksBad$formulaCount <- rep(NA, nrow(childPeaksBad))
 	childPeaksBad$good <- rep(FALSE, nrow(childPeaksBad))
-	
-	
-	
+    
 	# Now: childPeaksGood (containing the new, recounted peaks with good = TRUE), and childPeaksBad (containing the 
 	# peaks with good=FALSE, i.e. outside filter criteria, with the old formula count even though it is worthless)
 	# are bound together.
@@ -698,6 +735,7 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 	
 	# Now let's cross fingers. Add a good=NA column to the unmatched peaks and reorder the columns
 	# to match order in childPeaks. After that, setData to the child slot.
+
 	childPeaksOmitted <- getData(child)
 	childPeaksOmitted <- childPeaksOmitted[child@low | child@satellite,,drop=FALSE]
 	childPeaksOmitted$rawOK <- rep(FALSE, nrow(childPeaksOmitted))
@@ -708,11 +746,10 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
 	childPeaksOmitted$dbe <- rep(NA, nrow(childPeaksOmitted))
     childPeaksOmitted$dppmBest <- rep(NA, nrow(childPeaksOmitted))
     childPeaksOmitted$formulaCount <- rep(0, nrow(childPeaksOmitted))
-    
 	childPeaks$satellite <- rep(FALSE, nrow(childPeaks))
 	childPeaks$low <- rep(FALSE, nrow(childPeaks))
 	childPeaks$rawOK <- rep(TRUE, nrow(childPeaks))
-	
+    
 	childPeaks <- childPeaks[,colnames(childPeaksOmitted), drop=FALSE]
 	
 	childPeaksTotal <- rbind(childPeaks, childPeaksOmitted)
@@ -795,8 +832,9 @@ analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preli
 		
 		# Filter out low intensity peaks:
 		child@low <- (shot$intensity < cut) | (shot$intensity < max(shot$intensity)*cut_ratio)
-		shot <- shot[!child@low,,drop=FALSE]
 		shot_full <- shot
+        shot <- shot[!child@low,,drop=FALSE]
+		
 		
 		# Is there still anything left?
 		if(length(which(!child@low))==0)
@@ -831,9 +869,11 @@ analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preli
 				formula=msmsPeaks@formula,
 				dppm=0,
 				x1=0,x2=0,x3=0)
-		
-		
-		childPeaks <- addProperty(shot, "good", "logical", FALSE)
+        
+		childPeaks <- addProperty(shot_full, "rawOK", "logical", FALSE)
+        childPeaks[!(child@low | child@satellite),"rawOK"] <- TRUE
+     
+        childPeaks <- addProperty(childPeaks, "good", "logical", FALSE)
 		childPeaks[childPeaks$rawOK,"good"] <- TRUE
 
 		childPeaks <- addProperty(childPeaks, "mzCalc", "numeric")
@@ -853,8 +893,8 @@ analyzeMsMs.intensity <- function(msmsPeaks, mode="pH", detail=FALSE, run="preli
 		
 		childPeaks <- addProperty(childPeaks, "dppmBest", "numeric")
 		childPeaks[childPeaks$rawOK,"dppmBest"] <- 0
-		
-		child <- setData(child, childPeaks)
+        
+        child <- setData(child, childPeaks)
 		child@ok <- TRUE
 		return(child)
 	}
@@ -1119,7 +1159,7 @@ problematicPeaks <- function(peaks_unmatched, peaks_matched, mode="pH")
   # find parent m/z to exclude co-isolated peaks
   #p_control$mzCenter <- numeric(nrow(p_control))
   p_control$mzCenter <- as.numeric(
-    unlist(lapply(p_control$cpdID, function(id) findMz(id, mode)$mzCenter)) )
+    unlist(lapply(p_control$cpdID, function(id) findMz(id, mode, retrieval=findLevel(id,TRUE))$mzCenter)) )
   p_control_noMH <- p_control[
 		  (p_control$mzFound < p_control$mzCenter - 1) |
 		  (p_control$mzFound > p_control$mzCenter + 1),]
@@ -1133,7 +1173,9 @@ problematicPeaks <- function(peaks_unmatched, peaks_matched, mode="pH")
 #' 
 #' @param w \code{msmsWorkspace} to analyze. 
 #' @param mode Processing mode (pH etc)
-#' @param archivename Base name of the archive to write to (for "abc" the exported failpeaks list will be "abc_Failpeaks.csv"). 
+#' @param archivename Base name of the archive to write to (for "abc" the exported failpeaks list will be "abc_Failpeaks.csv").
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
 #' @return  Returns the aggregate data.frame with added column "\code{problematic}" (logical) which marks peaks which match the problematic criteria 
 #' 
 #' @author stravsmi
@@ -1707,8 +1749,8 @@ reanalyzeFailpeak <- function(custom_additions, mass, cpdID, counter, pb = NULL,
 	# generate.formula starts from empirical mass, but dppm cal-
 	# culation of the evaluation starts from theoretical mass.
 	# So we don't miss the points on 'the border'.
-	
-	db_formula <- findFormula(cpdID)
+    
+	db_formula <- findFormula(cpdID, retrieval=findLevel(cpdID,TRUE))
 	
 	ppmlimit <- 2.25 * filterSettings$ppmFine
 	parent_formula <- add.formula(db_formula, allowed_additions)
@@ -1824,8 +1866,11 @@ filterPeaksMultiplicity <- function(peaks, formulacol, recalcBest = TRUE)
 	{
 		peaks <- cbind(peaks, data.frame(formulaMultiplicity=numeric()))
 		if(recalcBest){
-			if(formulacol == formula){
-				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point.")
+			if(formulacol == "formula"){
+				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
+			}
+			if(formulacol == "reanalyzed.formula"){
+				warning("filterPeaksMultiplicity: All peaks have been filtered. The workflow can not be continued beyond this point if this error message also shows for reanalyzed peaks.")
 			}
 			peaks$fM_factor <- as.factor(peaks$formulaMultiplicity)
 			return(peaks)
@@ -2020,7 +2065,7 @@ filterMultiplicity <- function(w, archivename=NA, mode="pH", recalcBest = TRUE,
 		
     # Kick the M+H+ satellites out of peaksReanOK:
     peaksReanOK$mzCenter <- as.numeric(
-      unlist(lapply(peaksReanOK$cpdID, function(id) findMz(id)$mzCenter)) )
+      unlist(lapply(peaksReanOK$cpdID, function(id) findMz(id, retrieval=findLevel(id,TRUE))$mzCenter)) )
     peaksReanBad <- peaksReanOK[
 			!((peaksReanOK$mzFound < peaksReanOK$mzCenter - 1) |
 			(peaksReanOK$mzFound > peaksReanOK$mzCenter + 1)),]
@@ -2084,8 +2129,8 @@ recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window =
 			mzL <- findMz.formula(cpd@formula,mode,recalibrateMS1Window,0)
 			mzCalc <- mzL$mzCenter
 			ms1 <- mz(cpd@parent)
-			mzFound <- ms1[(ms1 >= mzL$mzMin) & (ms1 <= mzL$mzMax)]
-			
+           
+			mzFound <- ms1[which.min(abs(ms1 - mzL$mzCenter))]
 			if(!length(mzFound)){
 				return(c(
 					mzFound = NA,
@@ -2097,7 +2142,8 @@ recalibrate.addMS1data <- function(spec,mode="pH", recalibrateMS1Window =
 				return(c(
 					mzFound = mzFound,
 					mzCalc = mzCalc,
-					dppm = dppmRc
+					dppm = dppmRc,
+                    id=cpd@id
 				))
 			}
 		})
diff --git a/R/leMsmsRaw.R b/R/leMsmsRaw.R
index 41a75fb0817f74818f46d38d89467ed744c25cfb..f69458c26d60f3baaec12cf4af14799b281e73bc 100755
--- a/R/leMsmsRaw.R
+++ b/R/leMsmsRaw.R
@@ -10,6 +10,7 @@
 #' @import rcdk 
 #' @import rjson 
 #' @import yaml 
+#' @import digest
 NULL # This is required so that roxygen knows where the first manpage starts
 
 
@@ -84,6 +85,9 @@ NULL # This is required so that roxygen knows where the first manpage starts
 #' @param rtMargin	The retention time tolerance to use.
 #' @param deprofile	Whether deprofiling should take place, and what method should be
 #' 			used (cf. \code{\link{deprofile}}) 
+#' @param retrieval A value that determines whether the files should be handled either as "standard",
+#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+#' if the only know thing is the m/z
 #' @return	An \code{RmbSpectraSet} (for \code{findMsMsHR}). Contains parent MS1 spectrum (\code{@@parent}), a block of dependent MS2 spectra ((\code{@@children})
 #' 			and some metadata (\code{id},\code{mz},\code{name},\code{mode} in which the spectrum was acquired.
 #' 
@@ -112,7 +116,8 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
 		rtMargin = getOption("RMassBank")$rtMargin,
 		deprofile = getOption("RMassBank")$deprofile,
 		headerCache = NULL,
-		peaksCache = NULL)
+		peaksCache = NULL,
+        retrieval="standard")
 {
 	
 	# access data directly for finding the MS/MS data. This is done using
@@ -122,7 +127,7 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
 	if(!is.null(fileName))
 		msRaw <- openMSfile(fileName)
 	
-	mzLimits <- findMz(cpdID, mode)
+	mzLimits <- findMz(cpdID, mode, retrieval=retrieval)
 	mz <- mzLimits$mzCenter
 	limit.fine <- ppm(mz, ppmFine, p=TRUE)
 	if(!useRtLimit)
@@ -143,7 +148,12 @@ findMsMsHR <- function(fileName = NULL, msRaw = NULL, cpdID, mode="pH",confirmMo
 	#sp@mz <- mzLimits
 	sp@id <- as.character(as.integer(cpdID))
 	sp@name <- findName(cpdID)
-	sp@formula <- findFormula(cpdID)
+    ENV <- environment()
+	if(retrieval == "unknown"){
+        sp@formula <- ""
+    } else{
+        sp@formula <- findFormula(cpdID, retrieval=retrieval)
+    }
 	sp@mode <- mode
 	
 	# If we had to open the file, we have to close it again
@@ -268,7 +278,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
 									tic = line["totIonCurrent"],
 									peaksCount = line["peaksCount"],
 									rt = line["retentionTime"],
-									acquisitionNum = as.integer(line["acquisitionNum"]),
+									acquisitionNum = as.integer(line["seqNum"]),
 									centroided = TRUE
 									)
 						})
@@ -283,7 +293,7 @@ findMsMsHR.mass <- function(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
 						polarity = as.integer(masterHeader$polarity),
 						peaksCount = as.integer(masterHeader$peaksCount),
 						rt = masterHeader$retentionTime,
-						acquisitionNum = as.integer(masterHeader$acquisitionNum),
+						acquisitionNum = as.integer(masterHeader$seqNum),
 						tic = masterHeader$totIonCurrent,
 						centroided = TRUE
 						)
diff --git a/R/msmsRead.R b/R/msmsRead.R
index 4a315fa87ebe0dc376b92ca3361bc9ce0bc14b81..408d8dcfd69dca0f0c2ac1bfda0560e1c2636b76 100644
--- a/R/msmsRead.R
+++ b/R/msmsRead.R
@@ -42,12 +42,13 @@
 #' @export
 msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL, 
 					readMethod, mode, confirmMode = FALSE, useRtLimit = TRUE, 
-					Args = NULL, settings = getOption("RMassBank"), progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){
+					Args = NULL, settings = getOption("RMassBank"),
+                    progressbar = "progressBarHook", MSe = FALSE, plots = FALSE){
 	.checkMbSettings()
 	##Read the files and cpdids according to the definition
 	##All cases are silently accepted, as long as they can be handled according to one definition
 	if(!any(mode %in% c("pH","pNa","pM","pNH4","mH","mFA","mM",""))) stop(paste("The ionization mode", mode, "is unknown."))
-	
+    
 	if(is.null(filetable)){
 		##If no filetable is supplied, filenames must be named explicitly
 		if(is.null(files))
@@ -83,23 +84,24 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 	if(!all(file.exists(w@files))){
 		stop("The supplied files ", paste(w@files[!file.exists(w@files)]), " don't exist")
 	}
-	
-	na.ids <- which(is.na(sapply(cpdids, findSmiles)))
-	
-	if(length(na.ids)){
-		stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list")
-	}
+
+    # na.ids <- which(is.na(sapply(cpdids, findSmiles)))
+    
+    # if(length(na.ids)){
+        # stop("The supplied compound ids ", paste(cpdids[na.ids], collapse=" "), " don't have a corresponding smiles entry. Maybe they are missing from the compound list")
+    # }
 	
 	##This should work
-	if(readMethod == "minimal"){
-		##Edit options
-		opt <- getOption("RMassBank")
-		opt$recalibrator$MS1 <- "recalibrate.identity"
-		opt$recalibrator$MS2 <- "recalibrate.identity"
-		options(RMassBank=opt)
-		##Edit analyzemethod
-		analyzeMethod <- "intensity"
-	}
+    if(readMethod == "minimal"){
+        ##Edit options
+        opt <- getOption("RMassBank")
+        opt$recalibrator$MS1 <- "recalibrate.identity"
+        opt$recalibrator$MS2 <- "recalibrate.identity"
+        opt$add_annotation==FALSE
+        options(RMassBank=opt)
+        ##Edit analyzemethod
+        analyzeMethod <- "intensity"
+    }
 	
 	if(readMethod == "mzR"){
 		##Progressbar
@@ -113,7 +115,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 							
 							# Find compound ID
 							cpdID <- cpdids[count]
-							
+							retrieval <- findLevel(cpdID,TRUE)
 							# Set counter up
 							envir$count <- envir$count + 1
 							
@@ -124,7 +126,7 @@ msmsRead <- function(w, filetable = NULL, files = NULL, cpdids = NULL,
 									mzCoarse = settings$findMsMsRawSettings$mzCoarse,
 									fillPrecursorScan = settings$findMsMsRawSettings$fillPrecursorScan,
 									rtMargin = settings$rtMargin,
-									deprofile = settings$deprofile)
+									deprofile = settings$deprofile, retrieval=retrieval)
 							gc()
 														
 							# Progress:
@@ -310,7 +312,7 @@ msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NU
 			} else{
 				psp[[i]] <- which.min( abs(getRT(anmsms[[i]]) - RT) )
 			}
-			## Now find the pspec for compound       
+			## Now find the pspec for compound
 
 			## 2nd best: Spectrum closest to MS1
 			##psp <- which.min( abs(getRT(anmsms) - actualRT))
@@ -345,7 +347,7 @@ msmsRead.RAW <- function(w, xRAW = NULL, cpdids = NULL, mode, findPeaksArgs = NU
 			spectraNum <- length(w@spectra[[which(IDindex)]]@children)
 			w@spectra[[which(IDindex)]]@children[[spectraNum+1]] <- sp@children[[1]]
 		} else {
-			w@spectra[[length(www@spectra)+1]] <- sp
+			w@spectra[[length(w@spectra)+1]] <- sp
 		}
 	} else{
 		w@spectra[[1]] <- sp
diff --git a/R/parseMassBank.R b/R/parseMassBank.R
index c881b312cbce3b3c497c7675c963e627e31f6d73..32efc6f9369fce425b5a272cdb3427715e177eb1 100644
--- a/R/parseMassBank.R
+++ b/R/parseMassBank.R
@@ -167,18 +167,18 @@ parseMassBank <- function(Files){
 			}
 		##Extract the peaks and write the data into a data.frame
 		PKStart <- grep('PK$PEAK:',record, fixed = TRUE) + 1
-		endslash <- grep('//',record, fixed = TRUE)
-			if(PKStart < endslash){
-				splitted <- strsplit(record[PKStart:(endslash-1)]," ")
-				PKPeak <- matrix(nrow = endslash - PKStart, ncol = 3)
-				for(k in 1:length(splitted)){
-					splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
-					PKPeak[k,] <- splitted[[k]]
-				}
-				PKPeak <- as.data.frame(PKPeak, stringsAsFactors = FALSE)
-				PKPeak[] <- lapply(PKPeak, type.convert)
-				colnames(PKPeak) <- c("m/z", "int", "rel.int.")
-			}
+		endslash <- tail(grep('//',record, fixed = TRUE),1)
+        if(PKStart < endslash){
+            splitted <- strsplit(record[PKStart:(endslash-1)]," ")
+            PKPeak <- matrix(nrow = endslash - PKStart, ncol = 3)
+            for(k in 1:length(splitted)){
+                splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
+                PKPeak[k,] <- splitted[[k]]
+            }
+            PKPeak <- as.data.frame(PKPeak, stringsAsFactors = FALSE)
+            PKPeak[] <- lapply(PKPeak, type.convert)
+            colnames(PKPeak) <- c("m/z", "int", "rel.int.")
+        }
 	
 		mb@compiled_ok[[i]][['PK$PEAK']] <- PKPeak	
 	
diff --git a/R/settings_example.R b/R/settings_example.R
index fe7d81e0ce0e394d42f0b41d8b79478f46ccf840..b8fb266f7db516a7818382561dea238305ee4fa4 100755
--- a/R/settings_example.R
+++ b/R/settings_example.R
@@ -187,7 +187,7 @@ NULL
     authors = "Nomen Nescio, The Unseen University",
     copyright = "Copyright (C) XXX",
     publication = "",
-    license = "CC BY-SA",
+    license = "CC BY",
     instrument = "LTQ Orbitrap XL Thermo Scientific",
     instrument_type = "LC-ESI-ITFT",
     confidence_comment = "standard compound",
diff --git a/R/sysData.rda b/R/sysData.rda
deleted file mode 100644
index 7b22ce3cc76d4eb12ad0aadb20665a4f755212a4..0000000000000000000000000000000000000000
Binary files a/R/sysData.rda and /dev/null differ
diff --git a/R/tools.R b/R/tools.R
index cba9f109c208778851db33ba198e86f4ce51381f..030a9a943b1f7a3b03aeeb61a1ad4b6de41f9e4a 100644
--- a/R/tools.R
+++ b/R/tools.R
@@ -116,15 +116,15 @@ findProgress <- function(workspace)
 #' 
 updateSettings <- function(settings, warn=TRUE)
 {
-  settings.new <- .settingsList
-  settings.old <- settings
-  renew <- setdiff(names(settings.new), names(settings.old))
-  if(length(renew) > 0 && warn==TRUE){
+    settings.new <- .settingsList
+    settings.old <- settings
+    renew <- setdiff(names(settings.new), names(settings.old))
+    if(length(renew) > 0 && warn==TRUE){
     warning(paste0("Your settings are outdated! The following fields were taken from default values: ", 
             paste(renew,collapse=", ")))
     if("filterSettings" %in% renew)
       warning("The default values of filterSettings could change the processing behaviour if you have negative-mode spectra. Check ?updateSettings for details.")
-  }
-  settings.old[renew] <- settings.new[renew]
-  return(settings.old)
+    }
+    settings.old[renew] <- settings.new[renew]
+    return(settings.old)
 }
diff --git a/R/validateMassBank.R b/R/validateMassBank.R
index a9eeb4f42f0c3ca59806ac5584a068680e0b1ecc..2590b3d0af43fd142f1d5ce9d5a50a19668b9f98 100644
--- a/R/validateMassBank.R
+++ b/R/validateMassBank.R
@@ -175,7 +175,7 @@ smiles2mass <- function(SMILES){
 	return(mass)
 }
 
-.unitTestRMB <- function(WD=getwd()){
+.unitTestmzR <- function(WD=getwd()){
 	requireNamespace("RUnit",quietly=TRUE)
 	requireNamespace("RMassBankData",quietly=TRUE)
 	oldwd <- getwd()
@@ -217,19 +217,12 @@ smiles2mass <- function(SMILES){
 					#testFuncRegexp = "^test.+",
 					rngKind = "Marsaglia-Multicarry",
 					rngNormalKind = "Kinderman-Ramage")
-	testSuite3 <- RUnit::defineTestSuite("Evaluation of correct handling if no peaks are found", dirs = system.file("unitTests", 
-		package="RMassBank"), testFileRegexp = "runit.NOPEAKS.R",
-					#testFuncRegexp = "^test.+",
-					rngKind = "Marsaglia-Multicarry",
-					rngNormalKind = "Kinderman-Ramage")	
 	testData <- suppressWarnings(RUnit::runTestSuite(testSuite))
 	testData2 <- suppressWarnings(RUnit::runTestSuite(testSuite2))
-	testData3 <- suppressWarnings(RUnit::runTestSuite(testSuite3))
 	
 	file.remove(c("pH_narcotics_Failpeaks.csv","pH_narcotics.RData","pH_narcotics_RA.RData","pH_narcotics_RF.RData"))
 	RUnit::printTextProtocol(testData)
 	RUnit::printTextProtocol(testData2)
-	RUnit::printTextProtocol(testData3)
 	setwd(oldwd)
 	return(testData)
 }
diff --git a/R/webAccess.R b/R/webAccess.R
index 1f9147b574b1366b5f7c86619be151785b99bf8b..204b34504ed6ad2866ed1212ac4efc94b50fcba0 100755
--- a/R/webAccess.R
+++ b/R/webAccess.R
@@ -41,7 +41,7 @@ getCactus <- function(identifier, representation)
 
   ret <- tryCatch(
     getURLContent(paste(
-      "http://cactus.nci.nih.gov/chemical/structure/",
+      "https://cactus.nci.nih.gov/chemical/structure/",
       URLencode(identifier), "/", representation, sep='')),
     error = function(e) NA)
   if(is.na(ret))
diff --git a/inst/RMB_options.ini b/inst/RMB_options.ini
index 2b2aa5bb11205ec22bd428fe2f3db4b9c05a57c2..10574b185029cb954cad133964428b7334330bfd 100755
--- a/inst/RMB_options.ini
+++ b/inst/RMB_options.ini
@@ -13,7 +13,7 @@ deprofile:
 # Deviation (in minutes) allowed the for retention time
 rtMargin: 0.4
 # Systematic retention time shift
-rtShift: -0.3
+rtShift: 0.0
 
 # Directory to OpenBabel. Required for creating molfiles for MassBank export.
 # If no OpenBabel directory is given, RMassBank will attempt to use the CACTUS webservice
@@ -39,7 +39,7 @@ annotations:
     authors: Nomen Nescio, The Unseen University
     copyright: Copyright (C) XXX
     publication: 
-    license: CC BY-SA
+    license: CC BY
     instrument: LTQ Orbitrap XL Thermo Scientific
     instrument_type: LC-ESI-ITFT
     confidence_comment: standard compound
diff --git a/inst/unitTests/runTests.R b/inst/unitTests/runTests.R
new file mode 100644
index 0000000000000000000000000000000000000000..03dd13fd56b28370b8f319b9c648e2200f817446
--- /dev/null
+++ b/inst/unitTests/runTests.R
@@ -0,0 +1,37 @@
+w <- newMsmsWorkspace()
+RmbDefaultSettings()
+files <- list.files(system.file("spectra", package="RMassBankData"), ".mzML", full.names = TRUE)
+
+w@files <- files
+loadList(system.file("list/NarcoticsDataset.csv", package="RMassBankData"))
+w <- msmsWorkflow(w, mode="pH", steps=c(1:4), archivename="pH_narcotics")
+w <- msmsWorkflow(w, mode="pH", steps=c(5:8), archivename="pH_narcotics")	
+mb <- newMbWorkspace(w)
+mb <- loadInfolists(mb, system.file("infolists", package="RMassBankData"))
+mb <- mbWorkflow(mb)
+
+
+### Error message code structure borrowed from rcppgls:
+### https://github.com/eddelbuettel/rcppgsl/blob/master/inst/unitTests/runTests.R
+
+testElec <- RUnit::defineTestSuite("Electronic noise and formula calculation Test", dirs = system.file("unitTests", 
+									package="RMassBank"), testFileRegexp = "test_El.*")
+testAcqu <- RUnit::defineTestSuite("Evaluation of data acquisition process", dirs = system.file("unitTests", 
+									package="RMassBank"), testFileRegexp = "test_mzR.R")
+testNope <- RUnit::defineTestSuite("Evaluation of correct handling if no peaks are found", dirs = system.file("unitTests", 
+									package="RMassBank"), testFileRegexp = "test_NOPEAKS.R")
+
+errEval <- function(testSuite){
+	Res <- RUnit::runTestSuite(testSuite)
+	err <- getErrors(Res)
+	if((err$nFail + err$nErr) > 0){
+            stop(sprintf('unit test problems: %d failures, %d errors in testfunction: "%s"', err$nFail, err$nErr, testSuite$name) )
+        } else{
+            success <- err$nTestFunc - err$nFail - err$nErr - err$nDeactivated
+            cat( sprintf( "%d / %d\n", success, err$nTestFunc ) )
+    }
+}
+
+errEval(testElec)
+errEval(testAcqu)
+errEval(testNope)
\ No newline at end of file
diff --git a/inst/unitTests/runit.NOPEAKS.R b/inst/unitTests/runit.NOPEAKS.R
deleted file mode 100644
index 8c9b014980ff92bd9b3478041fb300c804fc98d3..0000000000000000000000000000000000000000
--- a/inst/unitTests/runit.NOPEAKS.R
+++ /dev/null
@@ -1,9 +0,0 @@
-test.nopeaks <- function(){
-	w <- newMsmsWorkspace()
-	w@aggregated <- data.frame(mzFound = numeric(0), intensity = numeric(0), good = logical(0), mzCalc = numeric(0), formula = character(0), dbe = numeric(0),
-	formulaCount = integer(0), dppm = numeric(0), dppmBest = numeric(0), scan = integer(0), cpdID = character(0), parentScan = integer(0), dppmRc = numeric(0),
-	index = integer(0), noise = logical(0), reanalyzed.formula = character(0), reanalyzed.mzCalc = numeric(0), reanalyzed.dppm = numeric(0),reanalyzed.formulaCount = numeric(0),
-	reanalyzed.dbe = numeric(0),matchedReanalysis = logical(0))
-	filterMultiplicity(w, mode="pH")
-	RUnit::checkTrue(TRUE)
-}
\ No newline at end of file
diff --git a/inst/unitTests/runit.EN_FC.R b/inst/unitTests/test_ElecNoise_FormulaCalc.R
similarity index 97%
rename from inst/unitTests/runit.EN_FC.R
rename to inst/unitTests/test_ElecNoise_FormulaCalc.R
index fec0c3f8064523a10d6c927621f9d6f8955837e0..4f08fa210671e201c40391115d423041bf9eeb53 100644
--- a/inst/unitTests/runit.EN_FC.R
+++ b/inst/unitTests/test_ElecNoise_FormulaCalc.R
@@ -5,6 +5,5 @@ test.eletronicnoise <- function(){
 
 test.formulacalculation <- function(){
 	failpeaks <- read.csv("pH_narcotics_Failpeaks.csv")
-	
 	RUnit::checkTrue(!any(apply(failpeaks,1,function(x) all(x[2:5] == c(70,2758,321,56.04933)))))
 }
\ No newline at end of file
diff --git a/inst/unitTests/test_NOPEAKS.R b/inst/unitTests/test_NOPEAKS.R
new file mode 100644
index 0000000000000000000000000000000000000000..1f5ec72d6f27d0fcd4b2d22f150dacb10ff8a11a
--- /dev/null
+++ b/inst/unitTests/test_NOPEAKS.R
@@ -0,0 +1,9 @@
+test.nopeaks <- function(){
+	w <- newMsmsWorkspace()
+	w@aggregated <- data.frame(mzFound = numeric(0), intensity = numeric(0), good = logical(0), mzCalc = numeric(0), formula = character(0), dbe = numeric(0),
+		formulaCount = integer(0), dppm = numeric(0), dppmBest = numeric(0), scan = integer(0), cpdID = character(0), parentScan = integer(0), dppmRc = numeric(0),
+		index = integer(0), noise = logical(0), reanalyzed.formula = character(0), reanalyzed.mzCalc = numeric(0), reanalyzed.dppm = numeric(0),reanalyzed.formulaCount = numeric(0),
+		reanalyzed.dbe = numeric(0),matchedReanalysis = logical(0))
+	filterMultiplicity(w, mode="pH")
+	checkTrue(TRUE)
+}
\ No newline at end of file
diff --git a/inst/unitTests/runit.DA.R b/inst/unitTests/test_mzR.R
similarity index 97%
rename from inst/unitTests/runit.DA.R
rename to inst/unitTests/test_mzR.R
index 4d25af6a1b29fdbdc9103985ad9938ccddac708e..9fa790a8daa7c63e5780ed921cf729c769758517 100644
--- a/inst/unitTests/runit.DA.R
+++ b/inst/unitTests/test_mzR.R
@@ -2,7 +2,6 @@ test.mzRRead <- function(){
 	allOK <- TRUE
 	records <- list.files("XX/recdata",full.names=TRUE)
 	rightrecords <- list.files(system.file("records/XX/recdata", package="RMassBankData"), full.names=TRUE)
-	
 	for(i in 1:length(records)){
 		gen <- file(description = records[i], open = "r", blocking = TRUE,
 		encoding = getOption("encoding"), raw = FALSE)
diff --git a/man/checkIsotopes.Rd b/man/checkIsotopes.Rd
index 3e6bc8f9c71bfcec749f3db90052377d70301aad..e88e38366be042c7b6cfd738a6ea80220981962c 100644
--- a/man/checkIsotopes.Rd
+++ b/man/checkIsotopes.Rd
@@ -4,8 +4,8 @@
 \alias{checkIsotopes}
 \title{Checks for isotopes in a \code{msmsWorkspace}}
 \usage{
-checkIsotopes(w, mode = "pH", intensity_cutoff = 1000,
-  intensity_precision = "low", conflict = "dppm", isolationWindow = 4,
+checkIsotopes(w, mode = "pH", intensity_cutoff = 0,
+  intensity_precision = "none", conflict = "strict", isolationWindow = 2,
   evalMode = "complete", plotSpectrum = TRUE,
   settings = getOption("RMassBank"))
 }
@@ -15,7 +15,8 @@ checkIsotopes(w, mode = "pH", intensity_cutoff = 1000,
 \item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
 ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).}
 
-\item{intensity_cutoff}{The cutoff (as an absolute intensity value) under which isotopic peaks shouldn't be checked for or accepted as valid.}
+\item{intensity_cutoff}{The cutoff (as an absolute intensity value) under which isotopic peaks shouldn't be checked for or accepted as valid.
+Please note: The cutoff is not hard in the sense that it interacts with the intensity_precision parameter.}
 
 \item{intensity_precision}{The difference that is accepted between the calculated and observed intensity of a possible isotopic peak. Further details down below.}
 
diff --git a/man/compileRecord.Rd b/man/compileRecord.Rd
index d954f74eecee566834b6ebc79277d2bf4ec4567a..c80c137526d8e2b7ebf56542a2a17de2c6571ca7 100755
--- a/man/compileRecord.Rd
+++ b/man/compileRecord.Rd
@@ -4,7 +4,7 @@
 \alias{compileRecord}
 \title{Compile MassBank records}
 \usage{
-compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL)
+compileRecord(spec, mbdata, aggregated, additionalPeaks = NULL, retrieval="standard")
 }
 \arguments{
 \item{spec}{A \code{RmbSpectraSet} for a compound, after analysis (\code{\link{analyzeMsMs}}).
@@ -20,6 +20,10 @@ usage information.)}
 
 \item{additionalPeaks}{If present, a table with additional peaks to add into the spectra.
 As loaded with \code{\link{addPeaks}}.}
+
+\item{retrieval}{A value that determines whether the files should be handled either as "standard",
+if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+if the only know thing is the m/z}
 }
 \value{
 Returns a MassBank record in list format: e.g.
diff --git a/man/findMass.Rd b/man/findMass.Rd
index 79cd02c0419330f06b0b7f21a21f03ffed48f43c..01236caee5ba723e4f12eb70179fd60f6461f936 100755
--- a/man/findMass.Rd
+++ b/man/findMass.Rd
@@ -4,11 +4,19 @@
 \alias{findMass}
 \title{Calculate exact mass}
 \usage{
-findMass(cpdID_or_smiles)
+findMass(cpdID_or_smiles, retrieval = "standard", mode = "pH")
 }
 \arguments{
 \item{cpdID_or_smiles}{SMILES code or compound ID of the molecule. (Numerics
 are treated as compound ID).}
+
+\item{retrieval}{A value that determines whether the files should be handled either as "standard",
+if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+if the only know thing is the m/z}
+
+\item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+            ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
+Only needed for retrieval="unknown"}
 }
 \value{
 Returns the exact mass of the uncharged molecule.
diff --git a/man/findMsMsHR.Rd b/man/findMsMsHR.Rd
index e8c05df2e9e893bd750225f58746dc7bb7ed1328..03b34146884f6df8c44471d460880d8c57c08c53 100755
--- a/man/findMsMsHR.Rd
+++ b/man/findMsMsHR.Rd
@@ -12,7 +12,7 @@ findMsMsHR(fileName = NULL, msRaw = NULL, cpdID, mode = "pH",
   fillPrecursorScan = getOption("RMassBank")$findMsMsRawSettings$fillPrecursorScan,
   rtMargin = getOption("RMassBank")$rtMargin,
   deprofile = getOption("RMassBank")$deprofile, headerCache = NULL,
-  peaksCache = NULL)
+  peaksCache = NULL, retrieval = "standard")
 
 findMsMsHR.mass(msRaw, mz, limit.coarse, limit.fine, rtLimits = NA,
   maxCount = NA, headerCache = NULL, fillPrecursorScan = FALSE,
@@ -60,6 +60,10 @@ freshly from \code{msRaw} for every compound.}
 \item{peaksCache}{If present, the complete output of \code{mzR::peaks(msRaw)}. This speeds up the lookup
 if multiple compounds should be searched in the same file.}
 
+\item{retrieval}{A value that determines whether the files should be handled either as "standard",
+if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+if the only know thing is the m/z}
+
 \item{mz}{The mass to use for spectrum search.}
 
 \item{limit.coarse}{Parameter in \code{findMsMsHR.mass} corresponding to \code{mzCoarse}.
diff --git a/man/findMz.Rd b/man/findMz.Rd
index 8e971d5ee7127715fde147b36901ba936156d9da..3c0e675ae1b6e7cb5018d64535784b1f58ecc3ca 100755
--- a/man/findMz.Rd
+++ b/man/findMz.Rd
@@ -3,23 +3,26 @@
 \name{findMz}
 \alias{findCAS}
 \alias{findFormula}
+\alias{findLevel}
 \alias{findMz}
 \alias{findName}
 \alias{findRt}
 \alias{findSmiles}
 \title{Find compound information}
 \usage{
-findMz(cpdID, mode = "pH", ppm = 10, deltaMz = 0)
+findMz(cpdID, mode = "pH", ppm = 10, deltaMz = 0, retrieval="standard")
 
 findRt(cpdID) 
 
 findSmiles(cpdID) 
 
-findFormula(cpdID) 
+findFormula(cpdID, retrieval="standard") 
 
 findCAS(cpdID)
 
 findName(cpdID)
+
+findLevel(cpdID, compact=FALSE)
 }
 \arguments{
 \item{cpdID}{The compound ID in the compound list.}
@@ -37,6 +40,13 @@ molecular mass + and - 10 ppm).}
 \item{deltaMz}{Specifies additional m/z window to add to the range (deltaMz
 = 0.02 will return the range of the molecular mass +- 0.02 (and additionally
 +- the set ppm value).}
+
+\item{retrieval}{A value that determines whether the files should be handled either as "standard",
+if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+if the only know thing is the m/z}
+
+\item{compact}{Only for \code{findLevel}, returns the "retrieval" parameter used for many functions 
+within RMassBank if TRUE}
 }
 \value{
 \code{findMz} will return a \code{list(mzCenter=, mzMin=, mzMax=)}
diff --git a/man/gatherCompound.Rd b/man/gatherCompound.Rd
index fc91507446dfe7977a329b0588b94996b654bd63..91631d0a8ceb5fa6e1eedc810382e52a89781d53 100755
--- a/man/gatherCompound.Rd
+++ b/man/gatherCompound.Rd
@@ -5,10 +5,10 @@
 \alias{gatherSpectrum}
 \title{Compose data block of MassBank record}
 \usage{
-gatherCompound(spec, aggregated, additionalPeaks = NULL)
+gatherCompound(spec, aggregated, additionalPeaks = NULL, retrieval="standard")
 
 		gatherSpectrum(spec, msmsdata, ac_ms, ac_lc, aggregated, 
- 		additionalPeaks = NULL)
+ 		additionalPeaks = NULL, retrieval="standard")
 }
 \arguments{
 \item{spec}{A \code{RmbSpectraSet} object, representing a compound with multiple spectra.}
@@ -18,6 +18,10 @@ gatherCompound(spec, aggregated, additionalPeaks = NULL)
 \item{additionalPeaks}{If present, a table with additional peaks to add into the spectra.
 As loaded with \code{\link{addPeaks}}.}
 
+\item{retrieval}{A value that determines whether the files should be handled either as "standard",
+if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+if the only know thing is the m/z}
+
 \item{msmsdata}{A \code{RmbSpectrum2} object from the \code{spec} spectra set, representing a single spectrum to give a record.}
 
 \item{ac_ms, ac_lc}{Information for the AC\$MASS_SPECTROMETRY and
diff --git a/man/gatherDataUnknown.Rd b/man/gatherDataUnknown.Rd
new file mode 100644
index 0000000000000000000000000000000000000000..cef099cc9ffa05ae8808f3b62253863975aa70b2
--- /dev/null
+++ b/man/gatherDataUnknown.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/createMassBank.R
+\name{gatherDataUnknown}
+\alias{gatherDataUnknown}
+\title{Retrieve annotation data}
+\usage{
+gatherDataUnknown(id, mode, retrieval)
+}
+\arguments{
+\item{id}{The compound ID.}
+
+\item{mode}{\code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions 
+([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).}
+
+\item{retrieval}{A value that determines whether the files should be handled either as "standard",
+if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+if the only know thing is the m/z}
+}
+\value{
+Returns a list of type \code{list(id= \var{compoundID}, ...,
+'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
+}
+\description{
+Retrieves annotation data for an unknown compound by using basic information present
+}
+\details{
+Composes the "upper part" of a MassBank record filled with chemical data
+about the compound: name, exact mass, structure, CAS no..  
+The instrument type is also written into this block (even
+if not strictly part of the chemical information). Additionally, index
+fields are added at the start of the record, which will be removed later:
+\code{id, dbcas, dbname} from the compound list.
+
+Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
+inserted empty and will be filled later on.
+
+This function is used to generate the data in case a substance is unknown,
+i.e. not enough information is present to derive anything about formulas or links
+}
+\examples{
+
+# Gather data for compound ID 131
+\dontrun{gatherDataUnknown(131,"pH")}
+
+}
+\author{
+Michael Stravs, Erik Mueller
+}
+\references{
+MassBank record format:
+\url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
+}
+\seealso{
+\code{\link{mbWorkflow}}
+}
+
diff --git a/man/mbWorkflow.Rd b/man/mbWorkflow.Rd
index 2e75e80b6bf03a609048b99ec61ad107f222b1c0..b2e1421bbfa39893743ee361b431ca2f88113cba 100755
--- a/man/mbWorkflow.Rd
+++ b/man/mbWorkflow.Rd
@@ -17,7 +17,7 @@ which should then be manually inspected.}
 
 \item{gatherData}{A variable denoting whether to retrieve information using several online databases \code{gatherData= "online"}
 or to use the local babel installation \code{gatherData= "babel"}. Note that babel is used either way, if a directory is given 
-in the settings}
+in the settings. This setting will be ignored if retrieval is set to "standard"}
 }
 \value{
 The processed \code{mbWorkspace}.
diff --git a/man/processProblematicPeaks.Rd b/man/processProblematicPeaks.Rd
index 7326cde3c530c29662709ec5a0024e8e99963904..99b0f82f9fff74ee7fb3470572ef53481c7ad00b 100644
--- a/man/processProblematicPeaks.Rd
+++ b/man/processProblematicPeaks.Rd
@@ -11,7 +11,9 @@ processProblematicPeaks(w, mode, archivename = NA)
 
 \item{mode}{Processing mode (pH etc)}
 
-\item{archivename}{Base name of the archive to write to (for "abc" the exported failpeaks list will be "abc_Failpeaks.csv").}
+\item{archivename}{Base name of the archive to write to (for "abc" the exported failpeaks list will be "abc_Failpeaks.csv").
+if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
+if the only know thing is the m/z}
 }
 \value{
 Returns the aggregate data.frame with added column "\code{problematic}" (logical) which marks peaks which match the problematic criteria
diff --git a/tests/doRUnit.R b/tests/doRUnit.R
new file mode 100644
index 0000000000000000000000000000000000000000..d481b74b95480dd931e0a56c3e7a0d548bf44993
--- /dev/null
+++ b/tests/doRUnit.R
@@ -0,0 +1,24 @@
+#### doRUnit.R --- Run RUnit tests
+####------------------------------------------------------------------------
+
+### Structure borrowed from rcppgls:
+### https://github.com/eddelbuettel/rcppgsl/blob/master/tests/doRUnit.R
+
+if(require("RUnit", quietly = TRUE)) {
+	if(require("RMassBankData", quietly = TRUE) && !(compareVersion(installed.packages()["RMassBankData","Version"],"1.99.0") == -1)) {
+		pkg <- "RMassBank"
+		print("Starting tests")
+		require(pkg, character.only=TRUE)
+
+		path <- system.file("unitTests", package = pkg)
+
+		stopifnot(file.exists(path), file.info(path.expand(path))$isdir)
+
+		source(file.path(path, "runTests.R"), echo = TRUE)
+	} else {
+		## Taking this message out until the new RMassBankData is on bioc, just to avoid confusion.
+        # message("Package RMassBankData with version > 1.99 not available, cannot run unit tests")
+	}
+} else {
+	message("Package RUnit not available, cannot run unit tests")
+}