Skip to content

Instantly share code, notes, and snippets.

@omsai
Last active September 10, 2015 02:10
Show Gist options
  • Save omsai/52a4a3bb6c44aeb990a3 to your computer and use it in GitHub Desktop.
Save omsai/52a4a3bb6c44aeb990a3 to your computer and use it in GitHub Desktop.
Print graphviz ontology in tabbed plain text using Python pandas
# License: Public Domain
"""Determine ontology object levels and pretty print them.
Pariksheet Nanda <[email protected]> 2015-08-26
"""
import pandas as pd
import numpy as np
df = pd.read_csv("TS13_lineage_prune_0.txt",
sep='\t',
header=None,
names=["parent", "child"])
# Create a new levels column and initialize it with NaNs
df["child_level"] = np.empty(len(df)).astype("int32") * np.nan
# Find the parent nodes; namely nodes that are present in parent list,
# but not the child list
parents = list(set(pd.unique(df["parent"])) - set(pd.unique(df["child"])))
parent_level = -1
# Assign the levels
while df["child_level"].hasnans():
# Select all the parents
mask = df["parent"].isin(parents)
# Assign the child level
df.loc[mask,("child_level")] = parent_level + 1
# Now select the new parents for the subsequent step
parents = df.loc[mask,("child")].values
# Increment our level counter
parent_level += 1
max_level = int(df["child_level"].max()) + 1
# Convert each level into a DataFrame, for merging later
def iterlevels(df):
max_level = int(df["child_level"].max()) + 1
for level in range(max_level):
level_mask = (df["child_level"] == level)
# Don't return NaN rows
dfi = df[level_mask]
cols = [str(level), str(level+1)]
dfi[cols] = dfi[["parent", "child"]]
yield dfi[cols]
dfs = [dfi for dfi in iterlevels(df)]
# Merge all the Dataframes
dfj = dfs[0]
for i in range(1, max_level):
dfj = pd.merge(dfj, dfs[i], on=str(i), how="outer")
dfj.sort_index(by=list(dfj.columns.values), inplace=True)
# Print out the merged DataFrame
printed_level = 0
for i in range(len(dfj)):
for level in range(max_level):
val = dfj.iloc[i,level]
if val is np.nan:
break
if level >= printed_level:
print('\t' * level + val)
printed_level = level
elif val != dfj.iloc[i-1,level]:
print('\t' * level + val)
printed_level = level
embryo
blood vessel
branchial arch
1st branchial arch
1st branchial arch ectoderm
branchial arch ectoderm
dorsal aorta
eye
optic pit
foregut diverticulum
foregut diverticulum endoderm
gut endoderm
head
head mesenchyme
head mesenchyme derived from neural crest
head paraxial mesenchyme
heart
cardiac muscle
common atrial chamber
endocardial tube
primitive ventricle
sinus venosus
hindgut diverticulum
hindgut diverticulum endoderm
neural ectoderm
future brain
future brain neural fold
future midbrain
future prosencephalon
future rhombencephalon
rhombomere 01
rhombomere 02
rhombomere 03
rhombomere 04
rhombomere 05
midbrain-hindbrain junction
future spinal cord
neural plate
future spinal cord neural fold
neural fold
notochord
oral region
otic placode
paraxial mesenchyme
primitive streak
surface ectoderm
trunk mesenchyme
lateral plate mesenchyme
somatopleure
splanchnopleure
trunk paraxial mesenchyme
unsegmented mesenchyme
embryo eye
embryo trunk mesenchyme
embryo branchial arch
embryo hindgut diverticulum
embryo heart
embryo gut endoderm
embryo head
embryo surface ectoderm
embryo neural ectoderm
embryo otic placode
embryo notochord
embryo dorsal aorta
embryo oral region
embryo primitive streak
embryo foregut diverticulum
embryo head mesenchyme
embryo paraxial mesenchyme
embryo blood vessel
eye optic pit
trunk mesenchyme lateral plate mesenchyme
trunk mesenchyme trunk paraxial mesenchyme
branchial arch branchial arch ectoderm
branchial arch 1st branchial arch
hindgut diverticulum hindgut diverticulum endoderm
heart primitive ventricle
heart common atrial chamber
heart cardiac muscle
heart sinus venosus
heart endocardial tube
neural ectoderm future brain
neural ectoderm neural fold
neural ectoderm future spinal cord
foregut diverticulum foregut diverticulum endoderm
head mesenchyme head paraxial mesenchyme
head mesenchyme head mesenchyme derived from neural crest
lateral plate mesenchyme somatopleure
lateral plate mesenchyme splanchnopleure
trunk paraxial mesenchyme unsegmented mesenchyme
1st branchial arch 1st branchial arch ectoderm
future brain future rhombencephalon
future brain future midbrain
future brain future brain neural fold
future brain midbrain-hindbrain junction
future brain future prosencephalon
future spinal cord neural plate
future rhombencephalon rhombomere 01
future rhombencephalon rhombomere 05
future rhombencephalon rhombomere 03
future rhombencephalon rhombomere 04
future rhombencephalon rhombomere 02
neural plate future spinal cord neural fold
future spinal cord neural fold future spinal cord neural crest
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment