Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
RMassBank_by_MaliRemorker
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Environmental Cheminformatics
RMassBank_by_MaliRemorker
Commits
97087e63
Commit
97087e63
authored
6 years ago
by
Treutler
Browse files
Options
Downloads
Patches
Plain Diff
added export of invalid records, decomposed the export of records and molfiles
parent
93ba17c6
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
R/RmbWorkspace.R
+2
-0
2 additions, 0 deletions
R/RmbWorkspace.R
R/createMassBank.R
+101
-44
101 additions, 44 deletions
R/createMassBank.R
R/zzz.R
+4
-1
4 additions, 1 deletion
R/zzz.R
with
107 additions
and
45 deletions
R/RmbWorkspace.R
+
2
−
0
View file @
97087e63
...
...
@@ -144,7 +144,9 @@ setClass("mbWorkspace",
# output data:
compiled
=
"list"
,
compiled_ok
=
"list"
,
compiled_notOk
=
"list"
,
mbfiles
=
"list"
,
mbfiles_notOk
=
"list"
,
molfile
=
"list"
,
ok
=
"integer"
,
problems
=
"integer"
...
...
This diff is collapsed.
Click to expand it.
R/createMassBank.R
+
101
−
44
View file @
97087e63
...
...
@@ -75,26 +75,32 @@ loadInfolist <- function(mb, fileName)
"COMMENT.ID"
# Clear from padding spaces and NAs
mbdata_new
<-
as.data.frame
(
t
(
apply
(
mbdata_new
,
1
,
function
(
r
)
mbdata_new
<-
as.data.frame
(
x
=
t
(
apply
(
mbdata_new
,
1
,
function
(
r
)
{
# Substitute empty spaces by real NA values
r
[
which
(
r
==
""
)]
<-
NA
# Trim spaces (in all non-NA fields)
r
[
which
(
!
is.na
(
r
))]
<-
sub
(
"^ *([^ ]+) *$"
,
"\\1"
,
r
[
which
(
!
is.na
(
r
))])
return
(
r
)
})))
}))
,
stringsAsFactors
=
FALSE
)
# use only the columns present in mbdata_archive, no other columns added in excel
mbdata_new
<-
mbdata_new
[,
colnames
(
mb
@
mbdata_archive
)]
# substitute the old entires with the ones from our files
# then find the new (previously inexistent) entries, and rbind them to the table
new_entries
<-
setdiff
(
mbdata_new
$
id
,
mb
@
mbdata_archive
$
id
)
old_entries
<-
intersect
(
mbdata_new
$
id
,
mb
@
mbdata_archive
$
id
)
for
(
colname
in
colnames
(
mb
@
mbdata_archive
))
mb
@
mbdata_archive
[,
colname
]
<-
as.character
(
mb
@
mbdata_archive
[,
colname
])
for
(
entry
in
old_entries
)
mb
@
mbdata_archive
[
mb
@
mbdata_archive
$
id
==
entry
,]
<-
mbdata_new
[
mbdata_new
$
id
==
entry
,]
mb
@
mbdata_archive
<-
rbind
(
mb
@
mbdata_archive
,
mbdata_new
[
mbdata_new
$
id
==
new_entries
,])
mb
@
mbdata_archive
<-
rbind
(
mb
@
mbdata_archive
,
mbdata_new
[
mbdata_new
$
id
==
new_entries
,])
for
(
colname
in
colnames
(
mb
@
mbdata_archive
))
mb
@
mbdata_archive
[,
colname
]
<-
as.factor
(
mb
@
mbdata_archive
[,
colname
])
return
(
mb
)
}
...
...
@@ -229,9 +235,7 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
if
(
4
%in%
steps
)
{
message
(
"mbWorkflow: Step 4. Spectra compilation"
)
mb
@
compiled
<-
lapply
(
selectSpectra
(
mb
@
spectra
,
"found"
,
"object"
),
function
(
r
)
{
mb
@
compiled
<-
lapply
(
X
=
selectSpectra
(
mb
@
spectra
,
"found"
,
"object"
),
FUN
=
function
(
r
)
{
message
(
paste
(
"Compiling: "
,
r
@
name
,
sep
=
""
))
mbdata
<-
mb
@
mbdata_relisted
[[
which
(
mb
@
mbdata_archive
$
id
==
as.numeric
(
r
@
id
))]]
if
(
nrow
(
mb
@
additionalPeaks
)
>
0
)
...
...
@@ -241,41 +245,86 @@ mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.c
return
(
res
)
})
# check which compounds have useful spectra
mb
@
ok
<-
which
(
!
is.na
(
mb
@
compiled
)
&
!
(
lapply
(
mb
@
compiled
,
length
)
==
0
))
ok
<-
unlist
(
lapply
(
X
=
selectSpectra
(
mb
@
spectra
,
"found"
,
"object"
),
FUN
=
function
(
spec
){
unlist
(
lapply
(
X
=
spec
@
children
,
FUN
=
function
(
child
){
child
@
ok
}))}))
mb
@
ok
<-
which
(
ok
)
#mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
mb
@
problems
<-
which
(
is.na
(
mb
@
compiled
))
mb
@
compiled_ok
<-
mb
@
compiled
[
mb
@
ok
]
mb
@
compiled_ok
<-
mb
@
compiled
[
mb
@
ok
]
mb
@
compiled_notOk
<-
mb
@
compiled
[
!
ok
]
}
# Step 5: Convert the internal tree-like representation of the MassBank data into
# flat-text string arrays (basically, into text-file style, but still in memory)
if
(
5
%in%
steps
)
{
message
(
"mbWorkflow: Step 5. Flattening records"
)
mb
@
mbfiles
<-
lapply
(
mb
@
compiled_ok
,
function
(
c
)
lapply
(
c
,
toMassbank
))
mb
@
mbfiles
<-
lapply
(
mb
@
compiled_ok
,
function
(
c
)
lapply
(
c
,
toMassbank
))
mb
@
mbfiles_notOk
<-
lapply
(
mb
@
compiled_notOk
,
function
(
c
)
lapply
(
c
,
toMassbank
))
}
# Step 6: For all OK records, generate a corresponding molfile with the structure
# of the compound, based on the SMILES entry from the MassBank record. (This molfile
# is still in memory only, not yet a physical file)
if
(
6
%in%
steps
)
{
message
(
"mbWorkflow: Step 6. Generate molfiles"
)
mb
@
molfile
<-
lapply
(
mb
@
compiled_ok
,
function
(
c
)
createMolfile
(
as.numeric
(
c
[[
1
]][[
'COMMENT'
]][[
getOption
(
"RMassBank"
)
$
annotations
$
internal_id_fieldname
]])))
if
(
RMassBank.env
$
export.molfiles
){
message
(
"mbWorkflow: Step 6. Generate molfiles"
)
mb
@
molfile
<-
lapply
(
mb
@
compiled_ok
,
function
(
c
)
createMolfile
(
as.numeric
(
c
[[
1
]][[
'COMMENT'
]][[
getOption
(
"RMassBank"
)
$
annotations
$
internal_id_fieldname
]])))
}
else
warning
(
"RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 6 is therefore ignored."
)
}
# Step 7: If necessary, generate the appropriate subdirectories, and actually write
# the files to disk.
if
(
7
%in%
steps
)
{
message
(
"mbWorkflow: Step 7. Generate subdirs and export"
)
dir.create
(
paste
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"moldata"
,
sep
=
'/'
),
recursive
=
TRUE
)
dir.create
(
paste
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"recdata"
,
sep
=
'/'
),
recursive
=
TRUE
)
for
(
cnt
in
seq_along
(
mb
@
compiled_ok
))
exportMassbank
(
mb
@
compiled_ok
[[
cnt
]],
mb
@
mbfiles
[[
cnt
]],
mb
@
molfile
[[
cnt
]])
## create folder
filePath_recData_valid
<-
file.path
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"recdata"
)
filePath_recData_invalid
<-
file.path
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"recdata_invalid"
)
filePath_molData
<-
file.path
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"moldata"
)
dir.create
(
filePath_recData_valid
,
recursive
=
TRUE
)
if
(
RMassBank.env
$
export.molfiles
)
dir.create
(
filePath_molData
,
recursive
=
TRUE
)
if
(
RMassBank.env
$
export.invalid
)
dir.create
(
filePath_recData_invalid
,
recursive
=
TRUE
)
if
(
length
(
mb
@
molfile
)
==
0
)
mb
@
molfile
<-
as.list
(
rep
(
x
=
NA
,
times
=
length
(
mb
@
compiled_ok
)))
## export valid spectra
for
(
cnt
in
seq_along
(
mb
@
compiled_ok
)){
exportMassbank_recdata
(
accessions
=
unlist
(
lapply
(
X
=
mb
@
compiled_ok
[[
cnt
]],
FUN
=
"["
,
"ACCESSION"
)),
files
=
mb
@
mbfiles
[[
cnt
]],
recDataFolder
=
filePath_recData_valid
)
if
(
findLevel
(
mb
@
compiled_ok
[[
cnt
]][[
1
]][[
"COMMENT"
]][[
getOption
(
"RMassBank"
)
$
annotations
$
internal_id_fieldname
]][[
1
]],
TRUE
)
==
"standard"
&
RMassBank.env
$
export.molfiles
)
exportMassbank_moldata
(
cpdID
=
as.numeric
(
mb
@
compiled_ok
[[
cnt
]][[
1
]][[
"COMMENT"
]][[
getOption
(
"RMassBank"
)
$
annotations
$
internal_id_fieldname
]][[
1
]]),
molfile
=
mb
@
molfile
[[
cnt
]],
molDataFolder
=
filePath_molData
)
}
## export invalid spectra
if
(
RMassBank.env
$
export.invalid
)
for
(
cnt
in
seq_along
(
mb
@
compiled_notOk
))
exportMassbank_recdata
(
accessions
=
unlist
(
lapply
(
X
=
mb
@
compiled_notOk
[[
cnt
]],
FUN
=
"["
,
"ACCESSION"
)),
files
=
mb
@
mbfiles_notOk
[[
cnt
]],
recDataFolder
=
filePath_recData_invalid
)
}
# Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
# to attribute substances to their corresponding structure molfiles.
if
(
8
%in%
steps
)
{
message
(
"mbWorkflow: Step 8. Create list.tsv"
)
makeMollist
(
mb
@
compiled_ok
)
if
(
RMassBank.env
$
export.molfiles
){
message
(
"mbWorkflow: Step 8. Create list.tsv"
)
makeMollist
(
compiled
=
mb
@
compiled_ok
)
}
else
warning
(
"RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 8 is therefore ignored."
)
}
return
(
mb
)
}
...
...
@@ -1289,20 +1338,21 @@ gatherSpectrum <- function(spec, msmsdata, ac_ms, ac_lc, aggregated, additionalP
{
# If the spectrum is not filled, return right now. All "NA" spectra will
# not be treated further.
if
(
msmsdata
@
ok
==
FALSE
)
if
(
msmsdata
@
ok
==
FALSE
&
!
RMassBank.env
$
export.invalid
)
return
(
NA
)
# get data
scan
<-
msmsdata
@
acquisitionNum
id
<-
spec
@
id
# Further fill the ac_ms datasets, and add the ms$focused_ion with spectrum-specific data:
precursor_types
<-
list
(
"pH"
=
"[M+H]+"
,
"pNa"
=
"[M+Na]+"
,
"mH"
=
"[M-H]-"
,
"mFA"
=
"[M+HCOO-]-"
,
"pM"
=
"[M]+"
,
"mM"
=
"[M]-"
,
"pNH4"
=
"[M+NH4]+"
)
"pH"
=
"[M+H]+"
,
"pNa"
=
"[M+Na]+"
,
"mH"
=
"[M-H]-"
,
"mFA"
=
"[M+HCOO-]-"
,
"pM"
=
"[M]+"
,
"mM"
=
"[M]-"
,
"pNH4"
=
"[M+NH4]+"
)
ac_ms
[[
'FRAGMENTATION_MODE'
]]
<-
msmsdata
@
info
$
mode
#ac_ms['PRECURSOR_TYPE'] <- precursor_types[spec$mode]
ac_ms
[[
'COLLISION_ENERGY'
]]
<-
msmsdata
@
info
$
ce
...
...
@@ -1902,28 +1952,35 @@ toMassbank <- function (mbdata)
#' }
#'
#' @export
exportMassbank
<-
function
(
compiled
,
files
,
molfile
)
exportMassbank
<-
function
(
compiled
,
files
,
molfile
){
exportMassbank_recdata
(
accessions
=
unlist
(
lapply
(
X
=
compiled
,
FUN
=
"["
,
"ACCESSION"
)),
files
,
recDataFolder
=
file.path
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"recdata"
)
)
exportMassbank_moldata
(
cpdID
=
as.numeric
(
compiled
[[
1
]][[
"COMMENT"
]][[
getOption
(
"RMassBank"
)
$
annotations
$
internal_id_fieldname
]][[
1
]]),
molfile
,
molDataFolder
=
file.path
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"moldata"
)
)
}
exportMassbank_recdata
<-
function
(
accessions
,
files
,
recDataFolder
)
{
molnames
<-
c
()
for
(
file
in
1
:
length
(
compiled
))
for
(
fileIdx
in
1
:
length
(
accessions
))
{
# Read the accession no. from the corresponding "compiled" entry
filename
<-
compiled
[[
file
]][
"ACCESSION"
]
# use this accession no. as filename
filename
<-
paste
(
filename
,
".txt"
,
sep
=
""
)
write
(
files
[[
file
]],
file.path
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"recdata"
,
filename
)
)
filename
<-
paste
(
accessions
[[
fileIdx
]],
".txt"
,
sep
=
""
)
filePath
<-
file.path
(
recDataFolder
,
filename
)
write
(
files
[[
fileIdx
]],
filePath
)
}
}
exportMassbank_moldata
<-
function
(
cpdID
,
molfile
,
molDataFolder
)
{
# Use internal ID for naming the molfiles
if
(
findLevel
(
compiled
[[
1
]][[
"COMMENT"
]][[
getOption
(
"RMassBank"
)
$
annotations
$
internal_id_fieldname
]][[
1
]],
TRUE
)
==
"standard"
){
molname
<-
sprintf
(
"%04d"
,
as.numeric
(
compiled
[[
1
]][[
"COMMENT"
]][[
getOption
(
"RMassBank"
)
$
annotations
$
internal_id_fieldname
]][[
1
]]))
molname
<-
paste
(
molname
,
".mol"
,
sep
=
""
)
write
(
molfile
,
file.path
(
getOption
(
"RMassBank"
)
$
annotations
$
entry_prefix
,
"moldata"
,
molname
)
)
}
molname
<-
sprintf
(
"%04d"
,
cpdID
)
molname
<-
paste
(
molname
,
".mol"
,
sep
=
""
)
write
(
molfile
,
file.path
(
molDataFolder
,
molname
))
}
# Makes a list.tsv with molfile -> massbank ch$name attribution.
...
...
This diff is collapsed.
Click to expand it.
R/zzz.R
+
4
−
1
View file @
97087e63
...
...
@@ -2,7 +2,10 @@
RMassBank.env
<<-
new.env
()
RMassBank.env
$
ReadAnnotation
<-
FALSE
RMassBank.env
$
testnumber
<-
1
RMassBank.env
$
verbose.output
<-
TRUE
## new variables
RMassBank.env
$
verbose.output
<-
FALSE
RMassBank.env
$
export.invalid
<-
FALSE
RMassBank.env
$
export.molfiles
<-
TRUE
mb
<-
list
()
attach
(
RMassBank.env
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment