Created
September 19, 2022 14:22
-
-
Save zachcp/ac410b900d67264ffb8061d2b8fa45bf to your computer and use it in GitHub Desktop.
Metabolomics benchmarks update
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install.packages("microbenchmark") | |
# install.packages("plyr") | |
# | |
# install.packages("rcdk") | |
# install.packages("OrgMassSpecR") | |
# install.packages("CHNOSZ") | |
# | |
# BiocManager::install("Rdisop") | |
# BiocManager::install("MetaboCoreUtils") | |
# | |
# BiocManager::install("ChemmineR") | |
# BiocManager::install("ChemmineOB") | |
# | |
# BiocManager::install("enviPat") | |
## These require loading the packages explicitly | |
library(plyr) | |
library(CHNOSZ) | |
library(enviPat) | |
library(MetaboCoreUtils) | |
library(rcdk) | |
library(ChemmineR) | |
#library(ChemmineOB) | |
library(OrgMassSpecR) | |
library(Rdisop) | |
data(isotopes) | |
# original | |
# https://github.com/CDK-R/cdkr/blob/master/rcdk/R/formula.R | |
# get.formula <- function(mf, charge=0) { | |
# | |
# manipulator <- get("mfManipulator", envir = .rcdk.GlobalEnv) | |
# if(!is.character(mf)) { | |
# stop("Must supply a Formula string"); | |
# }else{ | |
# dcob <- .cdkFormula.createChemObject() | |
# molecularformula <- .cdkFormula.createFormulaObject() | |
# molecularFormula <- .jcall(manipulator, | |
# "Lorg/openscience/cdk/interfaces/IMolecularFormula;", | |
# "getMolecularFormula", | |
# mf, | |
# .jcast(molecularformula,.IMolecularFormula), | |
# TRUE); | |
# } | |
# | |
# D <- new(J("java/lang/Integer"), as.integer(charge)) | |
# .jcall(molecularFormula,"V","setCharge",D); | |
# object <- .cdkFormula.createObject(.jcast(molecularFormula,.IMolecularFormula)); | |
# return(object); | |
# } | |
mfManipulator <- J("org/openscience/cdk/tools/manipulator/MolecularFormulaManipulator") | |
silentchemobject <- J("org.openscience.cdk.silent.SilentChemObjectBuilder") | |
#' Rewrite the formual object and directly access Java | |
#' | |
get.formula2 <- function(mf) { | |
formula <- mfManipulator$getMolecularFormula( | |
"C2H3", | |
silentchemobject$getInstance()) | |
mfManipulator$getMass(formula) | |
} | |
#' Add type hints | |
#' | |
get.formula3 <- function(mf) { | |
builderinstance <- .jcall(silentchemobject, | |
"Lorg/openscience/cdk/interfaces/IChemObjectBuilder;", | |
"getInstance") | |
formula <- .jcall(mfManipulator, | |
"Lorg/openscience/cdk/interfaces/IMolecularFormula;", | |
"getMolecularFormula", | |
mf, | |
builderinstance); | |
mfManipulator$getMass(formula) | |
} | |
#' Add type hints | |
#' | |
get.formula4 <- function(mf) { | |
builderinstance <- .jcall(silentchemobject, | |
"Lorg/openscience/cdk/interfaces/IChemObjectBuilder;", | |
"getInstance") | |
formula <- .jcall(mfManipulator, | |
"Lorg/openscience/cdk/interfaces/IMolecularFormula;", | |
"getMolecularFormula", | |
mf, | |
builderinstance); | |
.jcall(mfManipulator, | |
"D", | |
"getMass", | |
formula) | |
} | |
benchmark <- microbenchmark::microbenchmark( | |
MetaboCoreUtils = MetaboCoreUtils::calculateMass("C2H6O"), | |
rcdk = rcdk::get.formula("C2H6O", charge = 0)@mass, | |
rcdk2 = get.formula2("C2H6O"), | |
rcdk3 = get.formula3("C2H6O"), | |
rcdk4 = get.formula4("C2H6O"), | |
Rdisop = Rdisop::getMolecule("C2H6O")$exactmass, | |
ChemmineR = ChemmineR::exactMassOB(ChemmineR::smiles2sdf("CCO")), | |
OrgMassSpecR = OrgMassSpecR::MonoisotopicMass(formula = OrgMassSpecR::ListFormula("C2H6O)"), charge = 0), | |
CHNOSZ = CHNOSZ::mass("C2H6O"), | |
enviPat = enviPat::isopattern(isotopes, "C2H6O", charge=FALSE, verbose=FALSE)[[1]][1,1] | |
, times=1000L) | |
masses <- c( | |
MetaboCoreUtils=MetaboCoreUtils::calculateMass("C2H6O"), | |
rcdk=rcdk::get.formula("C2H6O", charge = 0)@mass, | |
Rdisop=Rdisop::getMolecule("C2H6O")$exactmass, | |
#ChemmineR=ChemmineR::exactMassOB(ChemmineR::smiles2sdf("CCO")), | |
OrgMassSpecR=OrgMassSpecR::MonoisotopicMass(formula = OrgMassSpecR::ListFormula("C2H6O)"), charge = 0), | |
CHNOSZ=CHNOSZ::mass("C2H6O"), | |
enviPat=enviPat::isopattern(isotopes, "C2H6O", charge=FALSE, verbose=FALSE)[[1]][1,1] | |
) | |
options(digits=10) | |
t(t(sort(masses))) | |
summary(benchmark)[order(summary(benchmark)[,"median"]) , ] | |
clipr::write_clip(as.data.frame(summary(benchmark)[order(summary(benchmark)[,"median"]) , ] )) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
expr min lq mean median uq | |
1 MetaboCoreUtils 69.479 122.8465 154.049427 139.6495 156.2700 | |
10 enviPat 83.250 143.0935 170.429197 160.5360 179.6570 | |
5 rcdk4 175.889 228.8605 324.182735 271.2955 327.7135 | |
8 OrgMassSpecR 249.287 333.3135 392.479869 357.6665 401.5585 | |
6 Rdisop 382.417 459.8790 538.068697 490.1505 557.9975 | |
9 CHNOSZ 355.145 510.2910 588.186951 555.9165 632.2060 | |
4 rcdk3 781.987 1004.7160 1294.507318 1133.3415 1339.4695 | |
3 rcdk2 2078.465 2392.4950 2920.601088 2612.8025 2931.5465 | |
7 ChemmineR 3227.320 3790.0455 4808.783873 4044.1410 4465.1000 | |
2 rcdk 14823.815 16456.7715 19088.569430 17485.0800 19468.7195 |
@sneumann its not that simple. the formula object that is built might be used for other things. A straight substitution is not backwards compatible.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Cool, waaay faster. Should that become a PR for
https://github.com/CDK-R/cdkr/blob/7ebaa8d389f0987fcf261cb7fcaf4a5699b0cf50/rcdk/R/formula.R#L43
?
Yours, Steffen