Created
May 12, 2021 14:51
-
-
Save wflynny/5cb18436e83ab455ccb3d82647f3cc3a to your computer and use it in GitHub Desktop.
Convert a GSE microarray to AnnData
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scanpy as sc | |
import pandas as pd | |
import GEOparse | |
def gse_to_adata(gse): | |
X = gse.pivot_samples("VALUE") | |
key = list(gse.gpls.keys())[0] | |
annotated = X.index.to_frame(index=False).merge( | |
gse.gpls[key].table[["ID", "Gene Symbol"]], | |
left_on="ID_REF", right_on="ID" | |
) | |
annotated.set_index("ID_REF", inplace=True) | |
annotated.dropna(subset=["Gene Symbol"], inplace=True) | |
annotated = annotated[~annotated["Gene Symbol"].str.contains("///")] | |
X = X.loc[annotated.index] | |
X.index = annotated["Gene Symbol"] | |
avg = X.median(axis=1) | |
expressed_probes = avg >= avg.quantile(0.25) | |
X = X.loc[expressed_probes] | |
X = X.groupby("Gene Symbol").median() | |
adata = sc.AnnData( | |
X.T.values, | |
obs=gse.phenotype_data, | |
var=X.index.to_frame() | |
) | |
return adata | |
if __name__ == "__main__": | |
g = GEOparser.get_GEO(geo="GSE51981", silent=True) | |
adata = gse_to_adata(g) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment