Skip to content

Instantly share code, notes, and snippets.

View vjcitn's full-sized avatar

Vince Carey vjcitn

View GitHub Profile
@vjcitn
vjcitn / ebe.R
Created August 13, 2024 10:53
use EBImage to explore spatial transcriptomics data
library(EBImage)
library(shiny)
ui = fluidPage(
sidebarLayout(
sidebarPanel(
helpText("EBImage explorer"),
fileInput("inimg", "file"),
numericInput("scalefactor", "multfac", min=1, max=50, value=10,step=1),
sliderInput("blursig", "sigma for blur", min=1, max=100, value=50)
#tabix Lung.tsv.gz 1:1-300000000 > lung.chr1.tsv
#tabix Lung.tsv.gz 2:1-300000000 > lung.chr2.tsv
#tabix Lung.tsv.gz 3:1-300000000 > lung.chr3.tsv
#tabix Lung.tsv.gz 4:1-300000000 > lung.chr4.tsv
#tabix Lung.tsv.gz 5:1-300000000 > lung.chr5.tsv
#tabix Lung.tsv.gz 6:1-300000000 > lung.chr6.tsv
#tabix Lung.tsv.gz 7:1-300000000 > lung.chr7.tsv
#tabix Lung.tsv.gz 8:1-300000000 > lung.chr8.tsv
#tabix Lung.tsv.gz 9:1-300000000 > lung.chr9.tsv
#tabix Lung.tsv.gz 10:1-300000000 > lung.chr10.tsv
@vjcitn
vjcitn / XenSCE.R
Created August 6, 2024 16:56
simple collection of matrix.mtx and parquet assets from Xenium exemplary dataset
# retrieve these from https://mghp.osn.xsede.org/bir190004-bucket01/BiocXenData/
#-rw-r--r-- 1 exouser exouser 3300521324 Mar 20 21:17 transcripts.parquet
#-rw-r--r-- 1 exouser exouser 68454210 Mar 20 21:16 nucleus_boundaries.parquet
#-rw-r--r-- 1 exouser exouser 73791358 Mar 20 21:15 cell_boundaries.parquet
#-rw-r--r-- 1 exouser exouser 242459483 Apr 10 03:46 cell_feature_matrix.tar.gz
#-rw-r--r-- 1 exouser exouser 44907408 Mar 20 21:15 cells.csv.gz
# tar zxf cell_feature_matrix.tar.gz to obtain folder cell_feature_matrix
library(Matrix)
@vjcitn
vjcitn / explore_bert.R
Last active July 17, 2024 13:35
look at 'safetensors' component of bert (use after usebert.R succeeds)
# probe R user cache file system for bert resources
explore_bert = function() {
if (!requireNamespace("tibble")) stop("please install tibble to use this function")
res = dir(tools::R_user_dir("../huggingface/hub/models--google-bert--bert-base-uncased/snapshots/", "cache"), full=TRUE, recursive=TRUE)
if (length(res)<1) stop("not finding huggingface/hub or bert components in cache")
safetpath = grep("model.safetensors", res, value=TRUE)
st = try(reticulate::import("safetensors"))
if (inherits(st, "try-error")) stop("can't import safetensors with reticulate")
oo = st$safe_open(filename=safetpath, framework="pt")
kk = oo$keys()
@vjcitn
vjcitn / usebert.R
Last active July 20, 2024 23:28
demonstration of bert-base-uncased in huggingface
use_bert = function(phrase) {
# use reticulate::py_install(c("torch", "transformers"), pip=TRUE) to set up
# devtools::source_gist() may produce some warnings related to GPU
# note that first run will populate .cache/huggingface/hub with model components
my_bert_template = "
# ensure accessible python has transformers installed
from transformers import AutoTokenizer, BertForMaskedLM, logging
from transformers import pipeline
logging.set_verbosity_error()
@vjcitn
vjcitn / get68proc.R
Last active June 16, 2024 15:38
function that will create a SingleCellExperiment by retrieving a serialized version of 'processed' PBMC 68k from Open Storage Network
get68proc = function(cache=BiocFileCache::BiocFileCache(),
targetfolder=tempdir()) {
zippath = "https://mghp.osn.xsede.org/bir190004-bucket01/BiocMatrixGenerics/pbmc68kproc.zip"
ent = BiocFileCache::bfcquery(cache, "pbmc68kproc.zip")
if (nrow(ent)==0) {
ent = BiocFileCache::bfcadd(cache, rname=zippath, action="copy")
}
refresh = BiocFileCache::bfcquery(cache, "pbmc68kproc.zip")
nzip = nrow(refresh)
ind = 1
library(TENxPBMCData)
p68 = TENxPBMCData("pbmc68k")
rownames(p68) = make.names(rowData(p68)$Symbol_TENx, unique=TRUE)
library(scater)
library(scran)
p68 = logNormCounts(p68)
library(celldex)
hpca = HumanPrimaryCellAtlasData()
library(SingleR)
library(BiocParallel)
@vjcitn
vjcitn / lang.R
Created May 18, 2024 14:42
make silly sentences
nouns = c("cat", "mat", "cow", "chair", "table", "person", "food")
pastverbs = c("sat", "walked", "rode", "spent", "moved", "wrote", "managed", "went")
preps = c("on", "over", "beyond", "above", "beneath")
indic = c("the", "a", "any", "some")
adjectives = c("cute", "big", "small", "tiny", "huge", "green")
get1 = function (x) sample(x, size=1)
sentence = function() {
sprintf("%s %s %s %s %s %s %s\n", get1(indic), get1(nouns), get1(pastverbs),
@vjcitn
vjcitn / inst.R
Created March 25, 2024 10:51
"instrumented" do_SingleR
do_SingleRi = function(sce=NULL, path="/home/vincent/tenx3k.h5ad",
ref=celldex::HumanPrimaryCellAtlasData(),
ref.type = "label.main",
min.common = 1000, assay.type.test=1L, instrument=TRUE,
clprocid=NULL, ...) {
if (instrument == TRUE && is.null(clprocid)) stop("clprocid not set")
cl_timestamp(clprocid, "init")
stopifnot(ref.type %in% c("label.main", "label.fine"))
if (is.null(sce)) {
is_h5ad = length(grep("h5ad$", path)==1)
@vjcitn
vjcitn / testi
Last active March 25, 2024 10:51
use 'instrumented' do_SingleR(i)
source("inst.R", echo=TRUE)
library(Rcollectl)
library(AnVILBestPractices)
library(SingleR)
library(BiocParallel)
clid = cl_start()
Rcollectl::cl_timestamp(clid, "pre-data")
p3k = TENxPBMCData::TENxPBMCData("pbmc3k")
Rcollectl::cl_timestamp(clid, "3k loaded")
rownames(p3k) = make.names(rowData(p3k)$Symbol, unique=TRUE)