Last active
March 19, 2024 10:02
-
-
Save afrendeiro/d975b994cd88ce8cfb9965e062296e83 to your computer and use it in GitHub Desktop.
Re-analyze GeoMx data from Rendeiro et al. 2021 doi:s41586-021-03475-6
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Re-analyze GeoMx data from Rendeiro et al. 2021 doi:s41586-021-03475-6 | |
Requirements (Python 3.10+): | |
pip install \ | |
"anndata" \ | |
"matplotlib>=3.8.3" \ | |
"numpy>=1.26.4" \ | |
"pandas>=2.1.0" \ | |
"scanpy" \ | |
"seaborn_extensions" \ | |
"tqdm>=4.64.1" \ | |
"urlpath>=1.2.0" | |
""" | |
from urlpath import URL | |
import numpy as np | |
import pandas as pd | |
import pingouin as pg | |
from tqdm import tqdm | |
from anndata import AnnData | |
import scanpy as sc | |
import matplotlib.pyplot as plt | |
from seaborn_extensions import clustermap | |
base_url = URL("https://zenodo.org/records/4635286/files/data/geomx/") | |
figkws = dict(bbox_inches="tight", dpi=200) | |
prefix = "rendeiro_Nature_2021_covid19_paper.geomx" | |
# Load metadata | |
meta = pd.read_parquet(base_url / "metadata_matrix.pq").dropna(subset="phenotypes") | |
meta["patient"] = pd.Categorical(meta["sample_id"].str.split("_").str[0]) | |
meta["location"] = pd.Categorical(meta["location"]) | |
meta["batch"] = pd.Categorical("B" + meta["Batch"].astype(str)) | |
voi = ["phenotypes", "location", "patient", "batch"] | |
# Load data | |
df = pd.read_parquet(base_url / "expression_matrix.pq").T.reindex(meta.index) | |
df = np.log1p(df) | |
# Visualize | |
fig, ax = plt.subplots(1, 1, figsize=(4, 4)) | |
ax.scatter(df.mean(), df.std(), alpha=0.25, s=5) | |
ax.set(xscale="log", yscale="log", xlabel="Mean", ylabel="Standard deviation") | |
fig.savefig(f"{prefix}.mean_vs_std.png", **figkws) | |
g = clustermap( | |
df, | |
config="z", | |
row_colors=meta.reindex(df.index)[voi], | |
square=False, | |
) | |
g.ax_heatmap.set(rasterized=True) | |
g.fig.savefig(f"{prefix}.all_data_heatmap.png", **figkws) | |
a = AnnData(df.values, obs=meta) | |
a.var.index = df.columns | |
sc.pp.scale(a) | |
sc.pp.pca(a) | |
sc.pp.neighbors(a) | |
sc.tl.umap(a) | |
fig = sc.pl.umap(a, color=voi, alpha=0.75, s=25, show=False, return_fig=True) | |
fig.savefig(f"{prefix}.umap.svg", **figkws) | |
# Differential expression (Mann-Whitney U) | |
_res = list() | |
for location in meta["location"].cat.categories: | |
ctrl = meta.query("location == @location & phenotypes == 'Normal'").index | |
for phenotypes in meta["phenotypes"].cat.categories[1:]: | |
test = meta.query("location == @location & phenotypes == @phenotypes").index | |
for gene in tqdm(df.columns): | |
r = pg.mwu(df.loc[ctrl, gene], df.loc[test, gene]).assign( | |
gene=gene, phenotypes=phenotypes, location=location | |
) | |
_res.append(r) | |
diff = pd.concat(_res, ignore_index=True) | |
diff.to_csv(f"{prefix}.differential_expression_results.csv", index=False) | |
sel = ( | |
diff.set_index("gene") | |
.groupby(voi[:2])["p-val"] | |
.nsmallest(10) | |
.unstack(2) | |
.columns.unique() | |
) | |
g = clustermap( | |
df.loc[:, sel], | |
config="z", | |
row_colors=meta.reindex(df.index)[voi], | |
square=False, | |
figsize=(12, 10), | |
) | |
g.ax_heatmap.set(rasterized=True) | |
g.fig.savefig( | |
f"{prefix}.diff_expression.diff_top10_p-val.png", | |
**figkws, | |
) | |
sel = ( | |
diff.set_index("gene") | |
.groupby(voi[:2])["RBC"] | |
.nlargest(5) | |
.unstack(2) | |
.columns.unique() | |
.tolist() | |
) | |
sel += ( | |
diff.set_index("gene") | |
.groupby(voi[:2])["RBC"] | |
.nsmallest(5) | |
.unstack(2) | |
.columns.unique() | |
.tolist() | |
) | |
g = clustermap( | |
df.loc[:, sel], | |
config="z", | |
row_colors=meta.reindex(df.index)[voi], | |
square=False, | |
figsize=(12, 10), | |
) | |
g.ax_heatmap.set(rasterized=True) | |
g.fig.savefig( | |
f"{prefix}.diff_expression.diff_top10_effectsize.png", | |
**figkws, | |
) |
Author
afrendeiro
commented
Mar 19, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment