Skip to content
Snippets Groups Projects
Unverified Commit 6f4f25bb authored by Steffen Neumann's avatar Steffen Neumann Committed by GitHub
Browse files

Merge pull request #230 from MassBank/feature/merge_229_228

Feature/merge 229 228
parents c8b00e9c fa995a71
No related branches found
No related tags found
No related merge requests found
Package: RMassBank
Type: Package
Title: Workflow to process tandem MS files and build MassBank records
Version: 2.15.1
Version: 2.15.2
Authors@R: c(
person(given = "RMassBank at Eawag", email = "massbank@eawag.ch",
role=c("cre")),
......
......@@ -116,17 +116,17 @@ resetInfolists <- function(mb)
structure(list(X = integer(0), id = integer(0), dbcas = character(0),
dbname = character(0), dataused = character(0), COMMENT.CONFIDENCE = character(0),
COMMENT.ID = integer(0), CH.NAME1 = character(0),
CH.NAME2 = character(0), CH.NAME3 = character(0), CH.COMPOUND_CLASS = character(0),
CH.NAME2 = character(0), CH.NAME3 = character(0), CH.NAME4 = character(0), CH.NAME5 = character(0), CH.COMPOUND_CLASS = character(0),
CH.FORMULA = character(0), CH.EXACT_MASS = numeric(0), CH.SMILES = character(0),
CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0),
CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0),
CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0),
CH.LINK.CHEMSPIDER = integer(0)), .Names = c("X", "id", "dbcas",
CH.LINK.CHEMSPIDER = integer(0), CH.LINK.COMPTOX = character(0)), .Names = c("X", "id", "dbcas",
"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID",
"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.COMPOUND_CLASS", "CH.FORMULA",
"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.NAME4", "CH.NAME5", "CH.COMPOUND_CLASS", "CH.FORMULA",
"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI",
"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM",
"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER"), row.names = integer(0), class = "data.frame")
"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM",
"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER", "CH.LINK.COMPTOX"), row.names = integer(0), class = "data.frame")
if(getOption("RMassBank")$include_sp_tags)
{
mb@mbdata_archive["SP.SAMPLE"] <- character(0)
......@@ -135,7 +135,7 @@ resetInfolists <- function(mb)
}
# The workflow function, i.e. (almost) the only thing you actually need to call.
# The workflow function, i.e. (almost) the only thing you actually need to call.
# See below for explanation of steps.
#' MassBank record creation workflow
#'
......@@ -587,6 +587,13 @@ gatherData <- function(id)
csid <- getCactus(inchikey_split, 'chemspider_id')
}
##Get CompTox
comptox <- getCompTox(inchikey_split)
if(is.null(comptox)){
comptox <- NA
}
##Use CTS to retrieve information
CTSinfo <- getCtsRecord(inchikey_split)
......@@ -781,6 +788,7 @@ gatherData <- function(id)
}
link[["INCHIKEY"]] <- inchikey_split
link[["COMPTOX"]] <- comptox
if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid[!is.na(csid)])))
mbdata[['CH$LINK']] <- link
......@@ -1133,6 +1141,8 @@ flatten <- function(mbdata)
"CH$NAME1",
"CH$NAME2",
"CH$NAME3",
"CH$NAME4",
"CH$NAME5",
"CH$COMPOUND_CLASS",
"CH$FORMULA",
"CH$EXACT_MASS",
......@@ -1145,7 +1155,9 @@ flatten <- function(mbdata)
"CH$LINK.LIPIDMAPS",
"CH$LINK.PUBCHEM",
"CH$LINK.INCHIKEY",
"CH$LINK.CHEMSPIDER")
"CH$LINK.CHEMSPIDER",
"CH$LINK.COMPTOX"
)
# make an empty data frame with the right length
rows <- length(mbdata)
cols <- length(colList)
......@@ -1203,6 +1215,8 @@ readMbdata <- function(row)
"CH$NAME1",
"CH$NAME2",
"CH$NAME3",
"CH$NAME4",
"CH$NAME5",
"CH$COMPOUND_CLASS",
"CH$FORMULA",
"CH$EXACT_MASS",
......@@ -1215,14 +1229,15 @@ readMbdata <- function(row)
"CH$LINK.LIPIDMAPS",
"CH$LINK.PUBCHEM",
"CH$LINK.INCHIKEY",
"CH$LINK.CHEMSPIDER")
"CH$LINK.CHEMSPIDER",
"CH$LINK.COMPTOX")
mbdata[["COMMENT"]] = list()
#mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
# Again, our ID field.
#mbdata[["COMMENT"]][["ID"]] <- row[["COMMENT.ID"]]
mbdata[["COMMENT"]][gsub(x = commentNames, pattern = "^COMMENT\\.", replacement = "")] <- row[commentNames]
names = c(row[["CH.NAME1"]], row[["CH.NAME2"]], row[["CH.NAME3"]])
names = c(row[["CH.NAME1"]], row[["CH.NAME2"]], row[["CH.NAME3"]], row[["CH.NAME4"]], row[["CH.NAME5"]])
names = names[which(!is.na(names))]
names <- gsub("'", "`", names)
......@@ -1242,6 +1257,7 @@ readMbdata <- function(row)
link[["PUBCHEM"]] = row[["CH.LINK.PUBCHEM"]]
link[["INCHIKEY"]] = row[["CH.LINK.INCHIKEY"]]
link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
link[["COMPTOX"]] = row[["CH.LINK.COMPTOX"]]
link[which(is.na(link))] <- NULL
mbdata[["CH$LINK"]] <- link
## SP$SAMPLE
......
......@@ -175,6 +175,7 @@ dbe <- function(formula)
"Cl"= -0.5,
"Br" = -0.5,
"S" = 2,
"Se" = 2,
"P" = 1.5,
"I" = -0.5,
"As" = 2.5,
......
......@@ -386,13 +386,26 @@ getAdductInformation <- function(formula){
c(mode = "pH_mC4H7", addition = "C-4H-6", charge = 1, adductString = "[M-C4H7+H]+"),
c(mode = "pH_mC6H10O4", addition = "C-6H-9O-4", charge = 1, adductString = "[M-C6H10O4+H]+"),
c(mode = "pH_mC5H8O3", addition = "C-5H-7O-3", charge = 1, adductString = "[M-C5H8O3+H]+"),
c(mode = "pH_mCO", addition = "H-1C-1O-1", charge = 1, adductString = "[M-CO+H]+"),
c(mode = "pH_mCO", addition = "H1C-1O-1", charge = 1, adductString = "[M-CO+H]+"),
c(mode = "p_mCO", addition = "C-1O-1", charge = 1, adductString = "[M-CO]+"),
c(mode = "pH_mO3", addition = "H-1O-3", charge = 1, adductString = "[M-O3+H]+"),
c(mode = "pH_mC3H6", addition = "C-3H-5", charge = 1, adductString = "[M-C3H6+H]+"),
c(mode = "pH_mC4H3O5", addition = "C-4H-2O-5", charge = 1, adductString = "[M-C4H3O5+H]+"),
c(mode = "pH_mC6H11O6", addition = "C-6H-10O-6", charge = 1, adductString = "[M-C6H11O6+H]+"),
c(mode = "pH_mCH4S", addition = "C-1H-3S-1", charge = 1, adductString = "[M-CH4S+H]+"),
c(mode = "pH_mC7H12O6", addition = "C-7H-11O-6", charge = 1, adductString = "[M-C7H12O6+H]+"),
c(mode = "pH_mCH4O", addition = "C-1H-3O-1", charge = 1, adductString = "[M-CH4O+H]+"),
c(mode = "pH_mCH2O2", addition = "C-1H-1O-2", charge = 1, adductString = "[M-CH2O2+H]+"),
c(mode = "pH_mC4H8", addition = "C-4H-7", charge = 1, adductString = "[M-C4H8+H]+"),
c(mode = "pH_mC3H6O", addition = "C-3H-5O-1", charge = 1, adductString = "[M-C3H6O+H]+"),
c(mode = "pH_mC8H18O2", addition = "C-8H-17O-2", charge = 1, adductString = "[M-C8H18O2+H]+"),
c(mode = "pH_mC6H14O2", addition = "C-6H-13O-2", charge = 1, adductString = "[M-C6H14O2+H]+"),
c(mode = "pH_mC4H12O2", addition = "C-4H-11O-2", charge = 1, adductString = "[M-C4H12O2+H]+"),
c(mode = "pH_mH2O", addition = "H-1O-1", charge = 2, adductString = "[M-H2O+H]+"),
c(mode = "pNa_mH2O", addition = "H-2O-1Na1", charge = 2, adductString = "[M-H2O+Na]+"),
c(mode = "pH_mCO2", addition = "C-1O-2H1", charge = 1, adductString = "[M-CO2+H]+"),
c(mode = "pH_mO", addition = "O-1H1", charge = 1, adductString = "[M-O+H]+"),
c(mode = "p_mO", addition = "O-1", charge = 1, adductString = "[M-O]+"),
c(mode = "p2H", addition = "H2", charge = 2, adductString = "[M+2H]2+"),
c(mode = "pACN_p2H", addition = "C2H5N1", charge = 2, adductString = "[M+ACN+2H]2+"),
## positive: 2M+X
......@@ -426,13 +439,17 @@ getAdductInformation <- function(formula){
c(mode = "pM_pH_m3O", addition = add.formula(formula, "O-3H1"), charge = 1, adductString = "[2M-3O+H]+"),
c(mode = "pM_pNa_m3O", addition = add.formula(formula, "O-3Na1"), charge = 1, adductString = "[2M-3O+Na]+"),
c(mode = "pM_mH_c1", addition = add.formula(formula, "H-1"), charge = 1, adductString = "[2M-H]+"),
c(mode = "p_p2M_m3H", addition = add.formula(formula, add.formula(formula, "H-3")), charge = 1, adductString = "[3M-3H]+"),
c(mode = "pH_p2M_m2H2O", addition = add.formula(formula, add.formula(formula, "H-3O-2")), charge = 1, adductString = "[3M-2H2O+H]+"),
c(mode = "pNa_p2M_m2H2O", addition = add.formula(formula, add.formula(formula, "H-4O-2Na1")), charge = 1, adductString = "[3M-2H2O+Na]+"),
c(mode = "p_p2M_m2H2O", addition = add.formula(formula, add.formula(formula, "H-4O-2")), charge = 1, adductString = "[3M-2H2O]+"),
c(mode = "pM_mH_pH", addition = formula, charge = 1, adductString = "[2M-H+H]+"),
c(mode = "pH_c2", addition = "H1", charge = 2, adductString = "[M+H]2+"),
## negative: M-X
c(mode = "mH", addition = "H-1", charge = -1, adductString = "[M-H]-"),
c(mode = "mCl", addition = "Cl-1", charge = -1, adductString = "[M+Cl]-"),
c(mode = "mCl", addition = "Cl1", charge = -1, adductString = "[M+Cl]-"),
c(mode = "mFA", addition = "C1O2H", charge = -1, adductString = "[M+HCOOH-H]-"),
c(mode = "mH_pTFA", addition = "C2F3O2", charge = -1, adductString = "[M+CF3CO2H-H]-"),
......
......@@ -669,9 +669,10 @@ analyzeMsMs.formula <- function(msmsPeaks, mode="pH", detail=FALSE, run="prelimi
childPeaks <- as.data.frame(do.call(rbind, peakmatrix))
presentElements <- unique(unlist(lapply(X = lapply(X = childPeaks$formula, FUN = formulastring.to.list), FUN = names)))
## function dbe from FormulaCalculator.R
atomDBEs <- sapply(X = presentElements, FUN = dbe)
unknownElements <- names(atomDBEs)[sapply(X = atomDBEs, FUN = function(atomDBE){length(atomDBE)==0})]
if(length(unknownElements) > 0) stop(paste("Element(s)", paste(unknownElements), "cannot be assigned a DBE"))
if(length(unknownElements) > 0) stop(paste("Element(s)", paste(unknownElements, collapse = "; "), "cannot be assigned a DBE"))
# Reformat the deformatted output correctly (why doesn't R have a better way to do this, e.g. avoid deformatting?)
......
......@@ -2,6 +2,7 @@
NULL
## library(XML)
## library(RCurl)
## library(jsonlite)
retrieveDataWithRetry <- function(url, timeout, maximumNumberOfRetries = 5, retryDelayInSeconds = 3){
......@@ -384,6 +385,45 @@ getPcCHEBI <- function(query, from = "inchikey")
}
}
#' Retrieves DTXSID (if it exists) from EPA Comptox Dashboard
#'
#' @usage getCompTox(query)
#' @param query The InChIKey of the compound.
#' @return Returns the DTXSID.
#'
#'
#' @examples
#'
#' \dontrun{
#' # getCompTox("MKXZASYAUGDDCJ-NJAFHUGGSA-N")
#' }
#'
#' @author Adelene Lai <adelene.lai@uni.lu>
#' @export
getCompTox <- function(query)
{
baseURL <- "https://actorws.epa.gov/actorws/chemIdentifier/v01/resolve.json?identifier="
url <- paste0(baseURL,query)
errorvar <- 0
currEnvir <- environment()
tryCatch(
data <- getURL(URLencode(url), timeout=5),
error=function(e){
currEnvir$errorvar <- 1 #TRUE?
}
)
if(errorvar){ #if TRUE?
warning("EPA web service is currently offline")
return(NA)
}
r <- fromJSON(data) #returns list
return(r$DataRow$dtxsid)
}
#' Retrieve the Chemspider ID for a given compound
#'
#' Given an InChIKey, this function queries the chemspider web API to retrieve
......
Changes in version 2.15.2
- Merge PR by Adelene Lai to add CompTox information
- Merge PR by Hendrik Treutler to 1) add more CH$NAME fields and 2) more Adducts and 3) Selenium
Changes in version 2.11.2
- Avoid writing out empty PUBLICATIONS
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment