Created
October 9, 2025 09:06
-
-
Save vjcitn/aa7ec6b6571a08770608349309b3d8c2 to your computer and use it in GitHub Desktop.
use GDSArray to interact with bigmelon serialization as SummarizedExperiment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(S4Vectors) | |
library(SummarizedExperiment) | |
library(GDSArray) | |
library(bigmelon) | |
data(melon) | |
tf = tempfile() | |
es2gds(melon, tf) | |
bm2SE = function(gdsf, elem="betas", | |
fdkeep=c("ProbeID_A", "ProbeID_B", "ILMNID", "NAME", | |
"GENOME_BUILD", "CHROMOSOME_36", "COORDINATE_36"), | |
rnvbl = "ILMNID", | |
sampnvbl = "sampleID") { | |
nodes = gdsnodes(gdsf) | |
pnodes = grep("pData", nodes, value=TRUE) | |
stopifnot(elem %in% nodes) | |
el = GDSArray::GDSArray(gdsf, elem) | |
# process fData | |
fd = lapply(fdkeep, function(x) as.character(GDSArray::GDSArray(gdsf, paste0("fData/", x)))) | |
fd = DataFrame(data.frame(t(do.call(rbind, fd)))) | |
names(fd) = fdkeep | |
# process pData | |
pd = lapply(pnodes, function(x) as.character(GDSArray::GDSArray(gdsf, x))) | |
pd = DataFrame(data.frame(t(do.call(rbind, pd)))) | |
colnames(pd) = basename(pnodes) | |
ans = SummarizedExperiment(assays=SimpleList(el), rowData=fd, colData=pd) | |
assayNames(ans) = elem | |
rownames(ans) = fd[, rnvbl] | |
colnames(ans) = pd[, sampnvbl] | |
ans | |
} | |
x = bm2SE(tf) # will close and message | |
#> x = bm2SE("test.gds") | |
# FileName ReadOnly State | |
#1 /Users/vincentcarey/BIOC_SOURCES/bigmelon/vignettes/test.gds FALSE closed | |
#> x | |
#class: SummarizedExperiment | |
#dim: 3363 12 | |
#metadata(0): | |
#assays(1): betas | |
#rownames(3363): cg00000029 cg00000108 ... rs966367 rs9839873 | |
#rowData names(7): ProbeID_A ProbeID_B ... CHROMOSOME_36 COORDINATE_36 | |
#colnames(12): 6057825008_R01C01 6057825008_R01C02 ... 6057825008_R06C01 | |
# 6057825008_R06C02 | |
#colData names(3): sampleID label sex | |
#> assay(x["rs966367",]) | |
#<1 x 12> DelayedMatrix object of type "double": | |
# 6057825008_R01C01 ... 6057825008_R06C02 | |
#rs966367 0.86588 . 0.17073 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment