Last active
September 20, 2023 20:54
-
-
Save gokceneraslan/f953b697ea09b9a8ff890a8707775498 to your computer and use it in GitHub Desktop.
Running SingleR from Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scanpy as sc | |
# numpy et al. | |
import numpy as np | |
import scipy.sparse as sp | |
import pandas as pd | |
import gc | |
# R integration | |
from rpy2.robjects.packages import importr | |
from rpy2.robjects import pandas2ri, numpy2ri, r | |
from rpy2.robjects.vectors import StrVector, FloatVector, ListVector | |
import rpy2.robjects as ro | |
import anndata2ri | |
def predict_cell_types(adata, | |
use_raw=True, | |
species='mouse', | |
ref_name=None, | |
ref_label_column='label.fine', | |
cluster_key='leiden', | |
obs_out_key='predicted_cell_types', | |
uns_out_key='cell_type_prediction', | |
**kwds): | |
r('BiocParallel::register(BiocParallel::SerialParam())') | |
s = importr('SingleR') | |
if ref_name is None: | |
if species == 'mouse': | |
ref_names = ['MouseRNAseqData', 'ImmGenData'] | |
else: | |
ref_names = ['HumanPrimaryCellAtlasData', 'MonacoImmuneData'] | |
refs = [s.__dict__[ref_name]() for ref_name in ref_names] | |
ref_genes = StrVector(set.intersection(*[set(r['rownames'](d)) for d in refs])) | |
ref = r('cbind')(*[r('`[`')(d, ref_genes) for d in refs]) # merge references | |
else: | |
if isinstance(ref_name, str): | |
ref = r(f'scRNAseq::{ref_name}')() | |
ref = r('scater::logNormCounts')(ref) | |
else: | |
ref = ref_name | |
ref_genes = r['rownames'](ref) | |
ad = adata.raw if use_raw else adata | |
obs = adata.obs | |
common_genes = sorted(list(set(ad.var_names) & set(ref_genes))) | |
assert len(common_genes) > 1000, 'Not enough genes overlapping with ref SingleR datasets...' | |
ref = r('`[`')(ref, ro.vectors.StrVector(common_genes)) | |
mat = ad[:, common_genes].X.T.copy() | |
if sp.issparse(mat): | |
mat = anndata2ri.scipy2ri.py2rpy(sp.csr_matrix(mat)) | |
else: | |
mat = numpy2ri.py2rpy(mat) | |
mat = r("`rownames<-`")(mat, ro.vectors.StrVector(ad[:, common_genes].var_names)) | |
mat = r("`colnames<-`")(mat, ro.vectors.StrVector(obs.index)) # TODO: really needed? | |
clusters = ro.vectors.StrVector(obs[cluster_key].values.tolist()) | |
par = r('BiocParallel::SerialParam')() | |
labels = s.SingleR(test=mat, | |
ref=ref, | |
labels=r('`$`')(ref, ref_label_column), # use label.main too | |
method='cluster', | |
clusters=clusters, BPPARAM=par, **kwds) | |
labels = pandas2ri.rpy2py(r('as.data.frame')(labels)) | |
adata.obs[obs_out_key] = '' | |
for cluster in adata.obs[cluster_key].cat.categories: | |
adata.obs.loc[adata.obs[cluster_key] == cluster, obs_out_key] = labels.loc[cluster]['pruned.labels'] | |
adata.uns[uns_out_key] = labels['pruned.labels'].to_dict() |
I think you need instal R packeage 'BiocParallel' first
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
When I try running this code, I get an error at this line: r('BiocParallel::register(BiocParallel::SerialParam())') and the error is: Error in loadNamespace(name) : there is no package called ‘BiocParallel’, have you ever encountered this error and/or know how to fix it? Thanks!