Last active
December 23, 2022 22:00
-
-
Save mtmorgan/f6607493caaed612e174d350f2d93275 to your computer and use it in GitHub Desktop.
Tabula muris liver discovery & visualization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## setup | |
## | |
if (!"BiocManager" %in% rownames(installed.packages())) | |
install.packages("BiocManager", repository = "https://cran.r-project.org") | |
BiocManager::install( | |
## only reinstall if newer version available | |
c("cellxgenedp", "plotly") | |
) | |
BiocManager::install( | |
## github package; experimental! | |
"mtmorgan/h5ad" | |
) | |
## | |
## quick start | |
## | |
library(cellxgenedp) | |
library(h5ad) | |
## filter on, e.g., Tissue == 'liver' and Organism = 'Mus', select | |
## first row, and click 'Quit and download | |
h5ad_file = cxg() | |
## navigate the data in R | |
h5ad <- h5ad(h5ad_file$local_path) | |
## row (gene) or column (cell) annotations are easy... | |
## read and visualize the 'n_genes" column (cells) | |
h5ad |> | |
column_data(j = "n_genes") |> | |
unlist() |> | |
hist(breaks = 40) | |
## expression values take more work, and is not currently efficient... | |
## which columns (cells) do we want? | |
is_age_30m_and_kupffer <- | |
h5ad |> | |
column_data(j = c("age", "cell_type")) |> | |
with(age == "30m" & cell_type == "Kupffer cell") | |
## which rows (genes) do want? | |
is_Cd36 <- | |
h5ad |> | |
row_data(j = "feature_name") |> | |
with(feature_name == "Cd36") | |
selected_cells <- | |
## this step is slow and could use a lot of memory at the moment... | |
h5ad |> | |
## retrieve just the cells of interest... | |
layer(j = is_age_30m_and_kupffer) |> | |
## ...then filter for rows of interest | |
filter(row %in% which(is_Cd36)) | |
## histogram | |
selected_cells |> | |
pull(data) |> | |
hist() | |
## | |
## A longer tour | |
## | |
## 'discover' the Tabula muris collection, liver dataset, and H5AD | |
## file in CELLxGENE | |
## | |
library(cellxgenedp) | |
## find the Tabula muris 'collection_id' | |
tabula_muris_collection_id <- | |
collections() |> | |
filter(grepl("tabula muris", name, ignore.case = TRUE)) |> | |
pull(collection_id) | |
## find the 10x liver dataset | |
liver_10x <- | |
datasets() |> | |
filter( | |
collection_id == tabula_muris_collection_id, | |
facets_filter(tissue, "label", "liver"), | |
facets_filter(assay, "label", "10x 3' v2"), | |
is_primary_data == "SECONDARY" | |
) | |
liver_10x_dataset_id <- liver_10x |> pull(dataset_id) | |
## find the 'h5ad' files | |
liver_h5ad <- | |
files() |> | |
filter( | |
dataset_id == liver_10x_dataset_id, | |
filetype == "H5AD" | |
) | |
## download the file to a local cache for easy reference | |
liver_h5ad_file <- | |
liver_h5ad |> | |
files_download(dry.run = FALSE) | |
## | |
## manipulate the file in R | |
## | |
library(h5ad) | |
h5ad <- h5ad(liver_h5ad_file) | |
## > h5ad | |
## class: H5AD | |
## dim: 17985 x 7294 | |
## layer names: X raw | |
## column_data names: age cell free_annotation method donor_id n_genes | |
## subtissue n_counts louvain leiden assay_ontology_term_id | |
## disease_ontology_term_id cell_type_ontology_term_id | |
## tissue_ontology_term_id development_stage_ontology_term_id | |
## self_reported_ethnicity_ontology_term_id sex_ontology_term_id | |
## is_primary_data organism_ontology_term_id suspension_type cell_type | |
## assay disease organism sex tissue self_reported_ethnicity | |
## development_stage | |
## row_data names: n_cells means dispersions dispersions_norm | |
## highly_variable feature_is_filtered feature_name feature_reference | |
## feature_biotype | |
## column_embedding: X_pca X_tsne X_umap | |
## row_embedding: PCs | |
## look at the 'n_gene' column | |
h5ad |> | |
column_data(j = "n_genes") |> | |
summarize( | |
min = min(n_genes, na.rm = TRUE), | |
max = max(n_genes, na.rm = TRUE) | |
) | |
## a basic histogram | |
h5ad |> | |
column_data(j = "n_genes") |> | |
unlist() |> | |
hist(breaks = 40) | |
## | |
## interactive umap visualization (requires 'plotly' package | |
## | |
if (!"plotly" %in% rownames(installed.packages())) | |
BiocManager::install("plotly") | |
## extract the 'umap' | |
umap <- | |
h5ad |> | |
column_embedding("X_umap", with = "cell_type") | |
## visualize | |
umap |> | |
## randomize points to avoid possible false sense of 'groups' due | |
## to overplotting | |
slice(sample(n())) |> | |
plotly::plot_ly( | |
x = ~ X_umap_1, y = ~ X_umap_2, | |
color = ~ cell_type, | |
colors = "Paired", | |
type = "scattergl", # greatly speed display | |
mode = "markers", opacity = 1, marker = list(size = 5L) | |
) |> | |
plotly::layout( | |
legend = list(itemsizing = 'constant', layers = list(opacity = 1)) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment