Created
August 22, 2024 07:31
-
-
Save Stfort52/a67a6d4f887b6704f5058ec6cdc88320 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Literal | |
import pandas as pd | |
def get_ensembl_mappings( | |
genes: list[str], | |
organism: str = "hsa", | |
server: Literal["www", "eu", "uswest", "asia", "gprofiler"] = "asia", | |
) -> pd.DataFrame: | |
match server: | |
case "eu": | |
return _query_ensembl(genes, organism, "www") | |
case "www" | "uswest" | "asia": | |
return _query_ensembl(genes, organism, server) | |
case "gprofiler": | |
return _query_gprofiler(genes, organism) | |
case _: | |
raise ValueError(f"Invalid server: {server}") | |
def _query_ensembl(genes: list[str], organism: str, server: str) -> pd.DataFrame: | |
import biomart | |
ORGANISM_KV: dict[Literal["mmu", "hsa"], str] = { | |
"mmu": "mmusculus_gene_ensembl", | |
"hsa": "hsapiens_gene_ensembl", | |
} | |
server = f"http://{server}.ensembl.org/biomart" | |
mart = biomart.BiomartServer(server) | |
dataset = mart.datasets[ORGANISM_KV[organism]] | |
response = dataset.search( | |
{ | |
"filters": {"ensembl_gene_id": genes}, | |
"attributes": ["ensembl_gene_id", "external_gene_name"], | |
} | |
) | |
df = pd.read_csv(StringIO(response.text), sep="\t", header=None) | |
df.columns = ["ensembl_gene_id", "external_gene_name"] | |
return df.set_index("ensembl_gene_id") | |
def _query_gprofiler(genes: list[str], organism: str) -> pd.DataFrame: | |
import requests | |
ORGANISM_KV: dict[Literal["mmu", "hsa"], str] = { | |
"mmu": "mmusculus", | |
"hsa": "hsapiens", | |
} | |
url = f"https://biit.cs.ut.ee/gprofiler/api/convert/convert/" | |
response = requests.post( | |
url, | |
json={ | |
"query": genes, | |
"target": "ENTREZGENE", | |
"organism": ORGANISM_KV[organism], | |
}, | |
) | |
df = pd.DataFrame(response.json()["result"]) | |
df = df.loc[:, ["incoming", "name"]] | |
df = df.drop_duplicates() | |
return df.set_index("incoming") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment