Created
August 8, 2012 20:14
-
-
Save haxney/3298219 to your computer and use it in GitHub Desktop.
Hack xcms to read from a database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##' Select spectra based on the path of their associated raw file. | |
RAW_FILE_NAME_QUERY_TEMPLATE <- 'SELECT raw_file_path, | |
run_index, spectra.start_time, array_length, mz_array, intensity_array, | |
polarity, total_ion_current, lowest_observed_mz, highest_observed_mz | |
FROM runs LEFT JOIN spectra ON spectra.run_id = runs.id | |
WHERE runs.raw_file_path = \'%s\' AND ms_level = %d | |
%s /* Will be filled with TIME_RANGE_QUERY_TEMPLATE */ | |
ORDER BY spectra.start_time ASC' | |
##' Builds and executes a query for a run based on a raw file name. | |
ExecRawFileQuery <- function(conn, | |
raw_file_name, | |
timeLower=NULL, | |
timeUpper=NULL, | |
msLevel=1) { | |
timeConstraints <- GetTimeRangeQuery(timeLower, timeUpper) | |
query <- sprintf(RAW_FILE_NAME_QUERY_TEMPLATE, raw_file_name, msLevel, timeConstraints) | |
odbcQuery(conn, query) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##' Seriously, this function is bad news. | |
HackRawReader <- function() { | |
xEnv <- environment(xcms:::xcmsSet) | |
unlockBinding("xcmsRaw", xEnv) | |
assign("xcmsRaw", xcmsRaw.db, envir=xEnv) | |
assign("xcmsRaw", xcmsRaw.db, envir=parent.frame()) | |
#lockBinding("xcmsRaw", xEnv) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##' Trick xcms into using database values for its raw data. | |
##' | |
##' Must be called from an environment which contains a variable `conn` bound to | |
##' a RODBC database connection. Optionally, if a variable `timeRange` is | |
##' defined within the environment, it should be in the form of | |
##' `list(lower=LOWER_TIME, upper=UPPER_TIME)` where the elements specify the | |
##' lower and upper bounds of the time window (respectively), specified in | |
##' minutes. | |
##' | |
##' On its own, this function is just fine; it's when it is hacked to replace | |
##' the "default" `xcmsRaw` that things become dicey. | |
xcmsRaw.db <- function(filename, profstep = 1, profmethod = "bin", | |
profparam = list(), | |
includeMSn = FALSE, mslevel=NULL, | |
scanrange=NULL) { | |
object <- new("xcmsRaw") | |
object@env <- new.env(parent=.GlobalEnv) | |
if (exists("timeRange")) { | |
ExecRawFileQuery(conn, filename, timeRange$lower, timeRange$upper) | |
} else { | |
ExecRawFileQuery(conn, filename) | |
} | |
cb <- list() | |
attr(cb, "class") <- c("decode") | |
spectra <- ProcessAllSegments(conn, cb) | |
if (length(spectra) == 0) { | |
stop(paste("No spectra found for run:", filename)) | |
} | |
object@filepath <- filename | |
## Database start time is in minutes; must convert to seconds. | |
object@scantime <- sapply(spectra, getElement, "start_time") * 60 | |
object@tic <- sapply(spectra, getElement, "total_ion_current") | |
## This is a bit tricky. The goal is to populate `object@env` with two | |
## variables: `mz` and `intensity` which are the concatenated vectors of the | |
## mz and intensity vectors of each spectrum, respectively. Setting the | |
## environment for `addSpectrum` allows its body to access the variables | |
## `mz` and `intensity` because of environment inheritance. For | |
## `addSpectrum` to modify the variables, it has to assign them in the | |
## _parent_ environment because its local environment is created for each | |
## invocation of the function. | |
## | |
## The length of the `mz` vector before appending the new decoded arrays is | |
## the index of the element immediately _before_ the first element added by | |
## the `spectrum` argument. It is returned so that `sapply` can append it to | |
## the `object@scanindex` vector. | |
assign("mz", double(), envir=object@env) | |
assign("intensity", double(), envir=object@env) | |
addSpectrum <- function(spectrum) { | |
idx <- length(mz) | |
assign("mz", append(mz, spectrum$decoded[,1]), envir=parent.env(environment())) | |
assign("intensity", append(intensity, spectrum$decoded[,2]), envir=parent.env(environment())) | |
idx | |
} | |
environment(addSpectrum) <- object@env | |
object@scanindex <- sapply(spectra, addSpectrum) | |
object@profmethod <- profmethod | |
object@profparam <- profparam | |
if (profstep) | |
profStep(object) <- profstep | |
## run_index from database is 0-based, xcms expects 1-based spectra. | |
object@acquisitionNum <- as.integer(sapply(spectra, getElement, "run_index") + 1) | |
object@polarity <- sapply(spectra, getElement, "polarity") | |
levels(object@polarity) <- list(negative="MS:1000129", | |
positive="MS:1000130", | |
unknown="unknown") | |
## MS2 spectra aren't dealt with right now. | |
if(exists("rawdataMSn")) { | |
object@env$msnMz <- rawdataMSn$mz | |
object@env$msnIntensity <- rawdataMSn$intensity | |
object@msnScanindex <- rawdataMSn$scanindex | |
object@msnAcquisitionNum <- rawdataMSn$acquisitionNum | |
object@msnLevel <- rawdataMSn$msLevel | |
object@msnRt <- rawdataMSn$rt | |
object@msnPrecursorScan <- match(rawdataMSn$precursorNum, object@acquisitionNum) | |
object@msnPrecursorMz <- rawdataMSn$precursorMZ | |
object@msnPrecursorIntensity <- rawdataMSn$precursorIntensity | |
object@msnPrecursorCharge <- rawdataMSn$precursorCharge | |
object@msnCollisionEnergy <- rawdataMSn$collisionEnergy | |
} | |
if (!missing(mslevel) & !is.null(mslevel)) { | |
object <- msn2ms(object) | |
object <- split(object, f=object@msnLevel==mslevel)$"TRUE" | |
## fix xcmsRaw metadata, or always calculate later than here ? | |
} | |
object | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment