Last active
May 31, 2020 02:13
-
-
Save PatWalters/2cf6e95072e588244cf8eda624cc15db to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"from tqdm.notebook import tqdm_notebook\n", | |
"from tqdm.notebook import tqdm\n", | |
"from rdkit import Chem\n", | |
"from rdkit.Chem.Descriptors import MolWt, MolLogP, NumHDonors, NumHAcceptors, NumRotatableBonds\n", | |
"import seaborn as sns\n", | |
"from ipyfilechooser import FileChooser" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Select a file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "b8b26dcdd9d9426a9c1e37724d5d1a12", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"FileChooser(path='.', filename='', show_hidden='False')" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"fc = FileChooser('.')\n", | |
"display(fc)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Read the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv(fc.selected_filename)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"How many molecules in the database" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(6264, 6)" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>molregno</th>\n", | |
" <th>SMILES</th>\n", | |
" <th>pref_name</th>\n", | |
" <th>max_phase</th>\n", | |
" <th>clean_smiles</th>\n", | |
" <th>mw</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>97</td>\n", | |
" <td>COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC</td>\n", | |
" <td>PRAZOSIN</td>\n", | |
" <td>4</td>\n", | |
" <td>COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC</td>\n", | |
" <td>383.408</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>115</td>\n", | |
" <td>CN1CCC[C@H]1c1cccnc1</td>\n", | |
" <td>NICOTINE</td>\n", | |
" <td>4</td>\n", | |
" <td>CN1CCC[C@H]1c1cccnc1</td>\n", | |
" <td>162.236</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>146</td>\n", | |
" <td>CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23</td>\n", | |
" <td>OFLOXACIN</td>\n", | |
" <td>4</td>\n", | |
" <td>CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23</td>\n", | |
" <td>361.373</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>147</td>\n", | |
" <td>CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21</td>\n", | |
" <td>NALIDIXIC ACID</td>\n", | |
" <td>4</td>\n", | |
" <td>CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21</td>\n", | |
" <td>232.239</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>148</td>\n", | |
" <td>O=c1oc2c(O)c(O)cc3c(=O)oc4c(O)c(O)cc1c4c23</td>\n", | |
" <td>ELLAGIC ACID</td>\n", | |
" <td>2</td>\n", | |
" <td>O=c1oc2c(O)c(O)cc3c(=O)oc4c(O)c(O)cc1c4c23</td>\n", | |
" <td>302.194</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" molregno SMILES pref_name \\\n", | |
"0 97 COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC PRAZOSIN \n", | |
"1 115 CN1CCC[C@H]1c1cccnc1 NICOTINE \n", | |
"2 146 CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23 OFLOXACIN \n", | |
"3 147 CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21 NALIDIXIC ACID \n", | |
"4 148 O=c1oc2c(O)c(O)cc3c(=O)oc4c(O)c(O)cc1c4c23 ELLAGIC ACID \n", | |
"\n", | |
" max_phase clean_smiles mw \n", | |
"0 4 COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC 383.408 \n", | |
"1 4 CN1CCC[C@H]1c1cccnc1 162.236 \n", | |
"2 4 CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23 361.373 \n", | |
"3 4 CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21 232.239 \n", | |
"4 2 O=c1oc2c(O)c(O)cc3c(=O)oc4c(O)c(O)cc1c4c23 302.194 " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "5ba2549d59684ca19ff125b5b86a9e9d", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=4647.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"RDKit WARNING: [20:52:20] WARNING: not removing hydrogen atom without neighbors\n" | |
] | |
} | |
], | |
"source": [ | |
"df['mol'] = [Chem.MolFromSmiles(x) for x in tqdm(df.SMILES)]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Remove rows with null values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.dropna(inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(6264, 7)" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.shape" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Calculate properties" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/pwalters/opt/anaconda3/envs/rdkit_2020_02/lib/python3.7/site-packages/tqdm/std.py:668: FutureWarning: The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version\n", | |
" from pandas import Panel\n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "14e4859018e0455cba4c904b5029e2a0", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=4647.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "15ff593042994eb1882a3072e79f89ef", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=4647.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "cd32e98257e14e4f85c9ca3693807068", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=4647.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "8f3d4431bbe84af39afccb7dc076d2de", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=4647.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
}, | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "8c47955a0d254e538cfa675f9408c25c", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, max=4647.0), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"tqdm_notebook.pandas()\n", | |
"df['MW'] = df.mol.progress_apply(MolWt)\n", | |
"df['LogP'] = df.mol.progress_apply(MolLogP)\n", | |
"df['HBD'] = df.mol.progress_apply(NumHDonors)\n", | |
"df['HBA'] = df.mol.progress_apply(NumHAcceptors)\n", | |
"df['Rotors'] = df.mol.progress_apply(NumRotatableBonds)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Remove molecule with MW > 500 - **May want to comment the line below out**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = df.query(\"MW <= 500\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>molregno</th>\n", | |
" <th>SMILES</th>\n", | |
" <th>pref_name</th>\n", | |
" <th>max_phase</th>\n", | |
" <th>clean_smiles</th>\n", | |
" <th>mw</th>\n", | |
" <th>mol</th>\n", | |
" <th>MW</th>\n", | |
" <th>LogP</th>\n", | |
" <th>HBD</th>\n", | |
" <th>HBA</th>\n", | |
" <th>Rotors</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>97</td>\n", | |
" <td>COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC</td>\n", | |
" <td>PRAZOSIN</td>\n", | |
" <td>4</td>\n", | |
" <td>COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC</td>\n", | |
" <td>383.4</td>\n", | |
" <td><rdkit.Chem.rdchem.Mol object at 0x1a238800d0></td>\n", | |
" <td>383.4</td>\n", | |
" <td>1.8</td>\n", | |
" <td>1</td>\n", | |
" <td>8</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>115</td>\n", | |
" <td>CN1CCC[C@H]1c1cccnc1</td>\n", | |
" <td>NICOTINE</td>\n", | |
" <td>4</td>\n", | |
" <td>CN1CCC[C@H]1c1cccnc1</td>\n", | |
" <td>162.2</td>\n", | |
" <td><rdkit.Chem.rdchem.Mol object at 0x1a23880cb0></td>\n", | |
" <td>162.2</td>\n", | |
" <td>1.8</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>146</td>\n", | |
" <td>CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23</td>\n", | |
" <td>OFLOXACIN</td>\n", | |
" <td>4</td>\n", | |
" <td>CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23</td>\n", | |
" <td>361.4</td>\n", | |
" <td><rdkit.Chem.rdchem.Mol object at 0x1a23880030></td>\n", | |
" <td>361.4</td>\n", | |
" <td>1.5</td>\n", | |
" <td>1</td>\n", | |
" <td>6</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>147</td>\n", | |
" <td>CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21</td>\n", | |
" <td>NALIDIXIC ACID</td>\n", | |
" <td>4</td>\n", | |
" <td>CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21</td>\n", | |
" <td>232.2</td>\n", | |
" <td><rdkit.Chem.rdchem.Mol object at 0x1a23880440></td>\n", | |
" <td>232.2</td>\n", | |
" <td>1.4</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>148</td>\n", | |
" <td>O=c1oc2c(O)c(O)cc3c(=O)oc4c(O)c(O)cc1c4c23</td>\n", | |
" <td>ELLAGIC ACID</td>\n", | |
" <td>2</td>\n", | |
" <td>O=c1oc2c(O)c(O)cc3c(=O)oc4c(O)c(O)cc1c4c23</td>\n", | |
" <td>302.2</td>\n", | |
" <td><rdkit.Chem.rdchem.Mol object at 0x1a238801c0></td>\n", | |
" <td>302.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>4</td>\n", | |
" <td>8</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" molregno SMILES pref_name \\\n", | |
"0 97 COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC PRAZOSIN \n", | |
"1 115 CN1CCC[C@H]1c1cccnc1 NICOTINE \n", | |
"2 146 CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23 OFLOXACIN \n", | |
"3 147 CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21 NALIDIXIC ACID \n", | |
"4 148 O=c1oc2c(O)c(O)cc3c(=O)oc4c(O)c(O)cc1c4c23 ELLAGIC ACID \n", | |
"\n", | |
" max_phase clean_smiles mw \\\n", | |
"0 4 COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC 383.4 \n", | |
"1 4 CN1CCC[C@H]1c1cccnc1 162.2 \n", | |
"2 4 CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23 361.4 \n", | |
"3 4 CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21 232.2 \n", | |
"4 2 O=c1oc2c(O)c(O)cc3c(=O)oc4c(O)c(O)cc1c4c23 302.2 \n", | |
"\n", | |
" mol MW LogP HBD HBA \\\n", | |
"0 <rdkit.Chem.rdchem.Mol object at 0x1a238800d0> 383.4 1.8 1 8 \n", | |
"1 <rdkit.Chem.rdchem.Mol object at 0x1a23880cb0> 162.2 1.8 0 2 \n", | |
"2 <rdkit.Chem.rdchem.Mol object at 0x1a23880030> 361.4 1.5 1 6 \n", | |
"3 <rdkit.Chem.rdchem.Mol object at 0x1a23880440> 232.2 1.4 1 4 \n", | |
"4 <rdkit.Chem.rdchem.Mol object at 0x1a238801c0> 302.2 1.3 4 8 \n", | |
"\n", | |
" Rotors \n", | |
"0 4 \n", | |
"1 1 \n", | |
"2 2 \n", | |
"3 2 \n", | |
"4 0 " | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cols = ['MW','LogP','HBD','HBA','Rotors']\n", | |
"xlab_list = [\"Molecular Weight\",\"RDKit LogP\",\"# HB Donors\",\"# HB Acceptors\",\"# Rotatable Bonds\"]\n", | |
"df_melt = df[cols].melt()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 1080x216 with 5 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"g = sns.FacetGrid(df_melt,col=\"variable\",sharex=False)\n", | |
"g.map(sns.violinplot,\"value\",order=cols)\n", | |
"for i,label in enumerate(cols):\n", | |
" g.axes[0,i].set_xlabel(xlab_list[i])\n", | |
" g.axes[0,i].set_title(label)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>MW</th>\n", | |
" <th>LogP</th>\n", | |
" <th>HBD</th>\n", | |
" <th>HBA</th>\n", | |
" <th>Rotors</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>4647.0</td>\n", | |
" <td>4647.0</td>\n", | |
" <td>4647.0</td>\n", | |
" <td>4647.0</td>\n", | |
" <td>4647.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>329.7</td>\n", | |
" <td>2.0</td>\n", | |
" <td>1.8</td>\n", | |
" <td>4.5</td>\n", | |
" <td>4.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>105.1</td>\n", | |
" <td>2.8</td>\n", | |
" <td>1.6</td>\n", | |
" <td>2.3</td>\n", | |
" <td>3.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>4.0</td>\n", | |
" <td>-18.7</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>259.3</td>\n", | |
" <td>0.7</td>\n", | |
" <td>1.0</td>\n", | |
" <td>3.0</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>342.2</td>\n", | |
" <td>2.5</td>\n", | |
" <td>1.0</td>\n", | |
" <td>4.0</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>413.5</td>\n", | |
" <td>3.8</td>\n", | |
" <td>2.0</td>\n", | |
" <td>6.0</td>\n", | |
" <td>6.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>499.7</td>\n", | |
" <td>9.2</td>\n", | |
" <td>12.0</td>\n", | |
" <td>16.0</td>\n", | |
" <td>23.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" MW LogP HBD HBA Rotors\n", | |
"count 4647.0 4647.0 4647.0 4647.0 4647.0\n", | |
"mean 329.7 2.0 1.8 4.5 4.5\n", | |
"std 105.1 2.8 1.6 2.3 3.1\n", | |
"min 4.0 -18.7 0.0 0.0 0.0\n", | |
"25% 259.3 0.7 1.0 3.0 2.0\n", | |
"50% 342.2 2.5 1.0 4.0 4.0\n", | |
"75% 413.5 3.8 2.0 6.0 6.0\n", | |
"max 499.7 9.2 12.0 16.0 23.0" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.set_option('precision', 1)\n", | |
"df[cols].describe()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment