Last active
May 5, 2022 13:45
-
-
Save cthoyt/d3a33b19b5fd889da3c10fc5d5285ba6 to your computer and use it in GitHub Desktop.
Ground ODiseA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "ee895d12", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Disease_ID</th>\n", | |
" <th>Disease_name</th>\n", | |
" <th>Gene_symbol</th>\n", | |
" <th>Tissue</th>\n", | |
" <th>Inflicted</th>\n", | |
" <th>Pathogenic</th>\n", | |
" <th>Phenotypic_series_ID</th>\n", | |
" <th>Phenotypic_series_name</th>\n", | |
" <th>Gene_expression_TPM</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>617140</td>\n", | |
" <td>ZTTK syndrome</td>\n", | |
" <td>SON</td>\n", | |
" <td>Kidney</td>\n", | |
" <td>Yes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>35.4364\\r</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>216340</td>\n", | |
" <td>Yunis-Varon syndrome</td>\n", | |
" <td>FIG4</td>\n", | |
" <td>Blood and bone marrow</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Yes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>10.2865\\r</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>615829</td>\n", | |
" <td>Xia-Gibbs syndrome</td>\n", | |
" <td>AHDC1</td>\n", | |
" <td>Brain</td>\n", | |
" <td>Yes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>\\r</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>278300</td>\n", | |
" <td>Xanthinuria, type I</td>\n", | |
" <td>XDH</td>\n", | |
" <td>Kidney</td>\n", | |
" <td>Yes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>PS278300</td>\n", | |
" <td>Xanthinuria</td>\n", | |
" <td>0.200846\\r</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>278000</td>\n", | |
" <td>Wolman disease</td>\n", | |
" <td>LIPA</td>\n", | |
" <td>Blood and bone marrow</td>\n", | |
" <td>Yes</td>\n", | |
" <td>Yes</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>15.211\\r</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Disease_ID Disease_name Gene_symbol Tissue \\\n", | |
"0 617140 ZTTK syndrome SON Kidney \n", | |
"1 216340 Yunis-Varon syndrome FIG4 Blood and bone marrow \n", | |
"2 615829 Xia-Gibbs syndrome AHDC1 Brain \n", | |
"3 278300 Xanthinuria, type I XDH Kidney \n", | |
"4 278000 Wolman disease LIPA Blood and bone marrow \n", | |
"\n", | |
" Inflicted Pathogenic Phenotypic_series_ID Phenotypic_series_name \\\n", | |
"0 Yes NaN NaN NaN \n", | |
"1 Yes Yes NaN NaN \n", | |
"2 Yes NaN NaN NaN \n", | |
"3 Yes NaN PS278300 Xanthinuria \n", | |
"4 Yes Yes NaN NaN \n", | |
"\n", | |
" Gene_expression_TPM \n", | |
"0 35.4364\\r \n", | |
"1 10.2865\\r \n", | |
"2 \\r \n", | |
"3 0.200846\\r \n", | |
"4 15.211\\r " | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import gilda\n", | |
"\n", | |
"# can't get directly?\n", | |
"# url = \"https://netbio.bgu.ac.il/8dfcaffc-94d2-4aa4-a687-11d9962c6d51\"\n", | |
"df = pd.read_csv(\"ODiseA_data.csv\")\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "a7baa9c1", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"45" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.Tissue.nunique()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "2f91d167", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rows = []\n", | |
"for text in df.Tissue.unique():\n", | |
" parts = text.split(\"-\")\n", | |
" row = [text]\n", | |
" for part in parts:\n", | |
" results = gilda.ground(part, namespaces=[\"MESH\", \"UBERON\", \"BTO\"])\n", | |
" if results:\n", | |
" row.extend((\n", | |
" part,\n", | |
" results[0].term.db,\n", | |
" results[0].term.id,\n", | |
" results[0].term.entry_name,\n", | |
" ))\n", | |
" else:\n", | |
" row.extend((part, None, None, None))\n", | |
" rows.append(row)\n", | |
"\n", | |
"results_df = pd.DataFrame(rows)\n", | |
"results_df.to_csv(\"results.tsv\", sep=\"\\t\", index=False)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Kidney | Kidney | MESH | D007668 | Kidney | |||||||||
Blood and bone marrow | Blood and bone marrow | ||||||||||||
Brain | Brain | MESH | D001921 | Brain | |||||||||
Respiratory system-Lung | Respiratory system | MESH | D012137 | Respiratory System | Lung | MESH | D008168 | Lung | |||||
Respiratory system | Respiratory system | MESH | D012137 | Respiratory System | |||||||||
Testis | Testis | MESH | D013737 | Testis | |||||||||
Brain-Cortex | Brain | MESH | D001921 | Brain | Cortex | ||||||||
Ovary | Ovary | MESH | D010053 | Ovary | |||||||||
Skin | Skin | MESH | D012867 | Skin | |||||||||
Liver | Liver | MESH | D008099 | Liver | |||||||||
Respiratory system-Trachea | Respiratory system | MESH | D012137 | Respiratory System | Trachea | MESH | D014132 | Trachea | |||||
Heart | Heart | MESH | D006321 | Heart | |||||||||
Heart-Ventricle | Heart | MESH | D006321 | Heart | Ventricle | ||||||||
Heart-Atrium | Heart | MESH | D006321 | Heart | Atrium | ||||||||
Thyroid | Thyroid | MESH | D013961 | Thyroid Gland | |||||||||
Peripheral nerve | Peripheral nerve | MESH | D010525 | Peripheral Nerves | |||||||||
Skeletal muscle | Skeletal muscle | MESH | D018482 | Muscle, Skeletal | |||||||||
Artery-Aorta | Artery | MESH | D001158 | Arteries | Aorta | MESH | D001011 | Aorta | |||||
Artery | Artery | MESH | D001158 | Arteries | |||||||||
Brain-Basal ganglia | Brain | MESH | D001921 | Brain | Basal ganglia | MESH | D001479 | Basal Ganglia | |||||
Brain-Basal ganglia-Caudate nucleus | Brain | MESH | D001921 | Brain | Basal ganglia | MESH | D001479 | Basal Ganglia | Caudate nucleus | MESH | D002421 | Caudate Nucleus | |
Artery-Coronary | Artery | MESH | D001158 | Arteries | Coronary | ||||||||
Brain-Basal ganglia-Substantia nigra | Brain | MESH | D001921 | Brain | Basal ganglia | MESH | D001479 | Basal Ganglia | Substantia nigra | MESH | D013378 | Substantia Nigra | |
Brain-Basal ganglia-Putamen | Brain | MESH | D001921 | Brain | Basal ganglia | MESH | D001479 | Basal Ganglia | Putamen | MESH | D011699 | Putamen | |
Brain-Basal ganglia-Nucleus accumbens | Brain | MESH | D001921 | Brain | Basal ganglia | MESH | D001479 | Basal Ganglia | Nucleus accumbens | MESH | D009714 | Nucleus Accumbens | |
Brain-Cerebellum | Brain | MESH | D001921 | Brain | Cerebellum | MESH | D002531 | Cerebellum | |||||
Spinal cord | Spinal cord | MESH | D013116 | Spinal Cord | |||||||||
Adipose-Subcutaneous | Adipose | Subcutaneous | |||||||||||
Adipose | Adipose | ||||||||||||
Adipose-Visceral | Adipose | Visceral | |||||||||||
Pituitary | Pituitary | ||||||||||||
Digestive system-Esophagus | Digestive system | MESH | D004064 | Digestive System | Esophagus | MESH | D004947 | Esophagus | |||||
Digestive system | Digestive system | MESH | D004064 | Digestive System | |||||||||
Digestive system-Esophagus-Mucosa | Digestive system | MESH | D004064 | Digestive System | Esophagus | MESH | D004947 | Esophagus | Mucosa | MESH | D009092 | Mucous Membrane | |
Eye | Eye | MESH | D005123 | Eye | |||||||||
Brain-Hypothalamus | Brain | MESH | D001921 | Brain | Hypothalamus | MESH | D007031 | Hypothalamus | |||||
Bone | Bone | ||||||||||||
Digestive system-Esophagus-Gastroesophageal junction | Digestive system | MESH | D004064 | Digestive System | Esophagus | MESH | D004947 | Esophagus | Gastroesophageal junction | MESH | D004943 | Esophagogastric Junction | |
Cervix | Cervix | MESH | D002584 | Cervix Uteri | |||||||||
Eye-Retina | Eye | MESH | D005123 | Eye | Retina | MESH | D012160 | Retina | |||||
Digestive system-Colon | Digestive system | MESH | D004064 | Digestive System | Colon | MESH | D003106 | Colon | |||||
Uterus | Uterus | MESH | D014599 | Uterus | |||||||||
Mammary gland | Mammary gland | MESH | D042361 | Mammary Glands, Human | |||||||||
Brain-Cortex-Frontal | Brain | MESH | D001921 | Brain | Cortex | Frontal | |||||||
Pancreas | Pancreas | MESH | D010179 | Pancreas |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment