y1zhou · November 9, 2020 21:17
diff --git a/yeast2human.py b/yeast2human.py
 import numpy as np
 import pandas as pd
 import requests

 # Interested genes
 df = pd.read_excel("yeast_genes.xlsx")
 pom_genes = df["Schizosaccharomyces pombe"][
    df["Schizosaccharomyces pombe"].notnull()
 ].tolist()
 cer_genes = df["Saccharomyces cerevisiae"][
    df["Saccharomyces cerevisiae"].notnull()
 ].tolist()

 # SGD data
 cer_map = pd.read_table(
    "http://sgd-archive.yeastgenome.org/curation/chromosomal_feature/SGD_features.tab",
    header=None,
 )
 cer_map = cer_map.iloc[:, [0, 4]]
 cer_map.columns = ["SGDID", "Symbol"]
 cer_map = cer_map[cer_map["Symbol"].isin(cer_genes)].reset_index(drop=True)

 # Get human homologs from Alliance of Genome Resources
 ss = requests.session()
 payload = {
    "filter.stringency": "stringent",
    "taxonID": "NCBITaxon:9606",
 }
 res = []
 for i, row in cer_map.iterrows():
    r = ss.get(
        f"https://www.alliancegenome.org/api/gene/SGD:{row['SGDID']}/homologs",
        params=payload,
    )
    homologs = [
        x["homologGene"]["symbol"]
        for x in r.json()["results"]
        if x["homologGene"]["species"]["taxonId"] == "NCBITaxon:9606"
    ]
    res.append(homologs)

 cer_map["HumanSymbol"] = res
 cer_map = cer_map[["Symbol", "HumanSymbol"]]

 # Fission yeast (Schizosaccharomyces pombe) data from PomBase
 pom_ortho = pd.read_table(
    "ftp://ftp.pombase.org/pombe/orthologs/human-orthologs.txt.gz", header=None,
 )
 pom_ortho.columns = ["Systematic", "HumanSymbol"]
 pom_ortho = (
    pom_ortho.assign(HumanSymbol=pom_ortho["HumanSymbol"].str.split("|"))
    .explode("HumanSymbol")
    .query("(HumanSymbol.notnull()) & (HumanSymbol != 'NONE')")
 )

 pom_map = pd.read_table(
    "ftp://ftp.pombase.org/pombe/names_and_identifiers/gene_IDs_names.tsv",
    skiprows=1,
    header=None,
 )
 pom_map.columns = ["Systematic", "Symbol", "Synonyms"]
 pom_map = (
    pom_map.drop(["Synonyms"], axis=1)
    .assign(Symbol=pom_map["Symbol"].str.upper())
    .query("Symbol.isin(@pom_genes)")
    .merge(pom_ortho, on="Systematic")
    .groupby(["Systematic", "Symbol"])["HumanSymbol"]
    .apply(list)
    .reset_index(name="HumanSymbol")[["Symbol", "HumanSymbol"]]
 )

 # Merge results
 res = (
    df.merge(cer_map, how="left", left_on="Saccharomyces cerevisiae", right_on="Symbol")
    .drop("Symbol", axis=1)
    .merge(pom_map, how="left", left_on="Schizosaccharomyces pombe", right_on="Symbol")
    .drop("Symbol", axis=1)
 )

 res = res.assign(
    HumanSymbol_x=res["HumanSymbol_x"].apply(lambda x: [] if x is np.NaN else x)
 ).assign(HumanSymbol_y=res["HumanSymbol_y"].apply(lambda x: [] if x is np.NaN else x))

 res = res.assign(HumanSymbol=res["HumanSymbol_x"] + res["HumanSymbol_y"]).drop(
    ["HumanSymbol_x", "HumanSymbol_y"], axis=1
 )

 res["HumanSymbol"] = res["HumanSymbol"].apply(lambda x: sorted(list(set(x))))
 res.rename(columns={"HumanSymbol": "Homo sapiens"}).to_excel(
    "yeast_table_1.xlsx", index=False
 )
	import numpy as np
	import pandas as pd
	import requests

	# Interested genes
	df = pd.read_excel("yeast_genes.xlsx")
	pom_genes = df["Schizosaccharomyces pombe"][
	df["Schizosaccharomyces pombe"].notnull()
	].tolist()
	cer_genes = df["Saccharomyces cerevisiae"][
	df["Saccharomyces cerevisiae"].notnull()
	].tolist()

	# SGD data
	cer_map = pd.read_table(
	"http://sgd-archive.yeastgenome.org/curation/chromosomal_feature/SGD_features.tab",
	header=None,
	)
	cer_map = cer_map.iloc[:, [0, 4]]
	cer_map.columns = ["SGDID", "Symbol"]
	cer_map = cer_map[cer_map["Symbol"].isin(cer_genes)].reset_index(drop=True)

	# Get human homologs from Alliance of Genome Resources
	ss = requests.session()
	payload = {
	"filter.stringency": "stringent",
	"taxonID": "NCBITaxon:9606",
	}
	res = []
	for i, row in cer_map.iterrows():
	r = ss.get(
	f"https://www.alliancegenome.org/api/gene/SGD:{row['SGDID']}/homologs",
	params=payload,
	)
	homologs = [
	x["homologGene"]["symbol"]
	for x in r.json()["results"]
	if x["homologGene"]["species"]["taxonId"] == "NCBITaxon:9606"
	]
	res.append(homologs)

	cer_map["HumanSymbol"] = res
	cer_map = cer_map[["Symbol", "HumanSymbol"]]

	# Fission yeast (Schizosaccharomyces pombe) data from PomBase
	pom_ortho = pd.read_table(
	"ftp://ftp.pombase.org/pombe/orthologs/human-orthologs.txt.gz", header=None,
	)
	pom_ortho.columns = ["Systematic", "HumanSymbol"]
	pom_ortho = (
	pom_ortho.assign(HumanSymbol=pom_ortho["HumanSymbol"].str.split("\|"))
	.explode("HumanSymbol")
	.query("(HumanSymbol.notnull()) & (HumanSymbol != 'NONE')")
	)

	pom_map = pd.read_table(
	"ftp://ftp.pombase.org/pombe/names_and_identifiers/gene_IDs_names.tsv",
	skiprows=1,
	header=None,
	)
	pom_map.columns = ["Systematic", "Symbol", "Synonyms"]
	pom_map = (
	pom_map.drop(["Synonyms"], axis=1)
	.assign(Symbol=pom_map["Symbol"].str.upper())
	.query("Symbol.isin(@pom_genes)")
	.merge(pom_ortho, on="Systematic")
	.groupby(["Systematic", "Symbol"])["HumanSymbol"]
	.apply(list)
	.reset_index(name="HumanSymbol")[["Symbol", "HumanSymbol"]]
	)

	# Merge results
	res = (
	df.merge(cer_map, how="left", left_on="Saccharomyces cerevisiae", right_on="Symbol")
	.drop("Symbol", axis=1)
	.merge(pom_map, how="left", left_on="Schizosaccharomyces pombe", right_on="Symbol")
	.drop("Symbol", axis=1)
	)

	res = res.assign(
	HumanSymbol_x=res["HumanSymbol_x"].apply(lambda x: [] if x is np.NaN else x)
	).assign(HumanSymbol_y=res["HumanSymbol_y"].apply(lambda x: [] if x is np.NaN else x))

	res = res.assign(HumanSymbol=res["HumanSymbol_x"] + res["HumanSymbol_y"]).drop(
	["HumanSymbol_x", "HumanSymbol_y"], axis=1
	)

	res["HumanSymbol"] = res["HumanSymbol"].apply(lambda x: sorted(list(set(x))))
	res.rename(columns={"HumanSymbol": "Homo sapiens"}).to_excel(
	"yeast_table_1.xlsx", index=False
	)