Created
August 20, 2025 01:19
-
-
Save PatWalters/7fcb5d688df9a0a5c3034b159d01bb83 to your computer and use it in GitHub Desktop.
Evaluate fastsolv on Polaris solubility datasets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 73, | |
| "id": "21a58726-b864-476b-82f0-d3ba642e6762", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import polaris as po\n", | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "from rdkit import Chem\n", | |
| "from rdkit.Chem.Descriptors import MolWt\n", | |
| "import seaborn as sns\n", | |
| "from fastsolv import fastsolv\n", | |
| "from sklearn.metrics import r2_score, mean_absolute_error" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "992dd5e9-9831-47e7-8791-ae76b07ea88c", | |
| "metadata": {}, | |
| "source": [ | |
| "### 1. Download Datasets from Polaris" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "96176671-a3fb-4907-b259-fae8243fb899", | |
| "metadata": {}, | |
| "source": [ | |
| "Get the Antiviral ADMET dataset from Polaris " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "id": "b2f94e11-1a05-4145-b58e-7afc8bbcb929", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[2025-08-19 19:57:29] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO </span> The version of Polaris that was used to create the artifact <a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/_artifact.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">_artifact.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/_artifact.py#92\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">92</span></a>\n", | |
| "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\"> </span> <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.11</span>.<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8.</span>dev4+g40e3b2b.d20250207<span style=\"font-weight: bold\">)</span> is different from the currently <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> </span>\n", | |
| "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\"> </span> installed version of Polaris <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.11</span>.<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span><span style=\"font-weight: bold\">)</span>. <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> </span>\n", | |
| "</pre>\n" | |
| ], | |
| "text/plain": [ | |
| "\u001b[2;36m[2025-08-19 19:57:29]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m The version of Polaris that was used to create the artifact \u001b]8;id=558010;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/_artifact.py\u001b\\\u001b[2m_artifact.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=173126;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/_artifact.py#92\u001b\\\u001b[2m92\u001b[0m\u001b]8;;\u001b\\\n", | |
| "\u001b[2;36m \u001b[0m \u001b[1m(\u001b[0m\u001b[1;36m0.11\u001b[0m.\u001b[1;36m8.\u001b[0mdev4+g40e3b2b.d20250207\u001b[1m)\u001b[0m is different from the currently \u001b[2m \u001b[0m\n", | |
| "\u001b[2;36m \u001b[0m installed version of Polaris \u001b[1m(\u001b[0m\u001b[1;36m0.11\u001b[0m.\u001b[1;36m10\u001b[0m\u001b[1m)\u001b[0m. \u001b[2m \u001b[0m\n" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\"> </span><span style=\"color: #808000; text-decoration-color: #808000\">WARNING </span> You're loading data from a remote location. If the dataset is small <a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/dataset/_base.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">_base.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/dataset/_base.py#181\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">181</span></a>\n", | |
| "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\"> </span> enough, consider caching the dataset first using <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">DatasetV2.cache</span><span style=\"font-weight: bold\">()</span> for <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> </span>\n", | |
| "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\"> </span> more performant data access. <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> </span>\n", | |
| "</pre>\n" | |
| ], | |
| "text/plain": [ | |
| "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[33mWARNING \u001b[0m You're loading data from a remote location. If the dataset is small \u001b]8;id=898257;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/dataset/_base.py\u001b\\\u001b[2m_base.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=836900;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/dataset/_base.py#181\u001b\\\u001b[2m181\u001b[0m\u001b]8;;\u001b\\\n", | |
| "\u001b[2;36m \u001b[0m enough, consider caching the dataset first using \u001b[1;35mDatasetV2.cache\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m for \u001b[2m \u001b[0m\n", | |
| "\u001b[2;36m \u001b[0m more performant data access. \u001b[2m \u001b[0m\n" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[19:57:30] </span><span style=\"color: #008000; text-decoration-color: #008000\"> Success: Fetching dataset</span> <a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/utils/context.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">context.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/utils/context.py#53\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">53</span></a>\n", | |
| "</pre>\n" | |
| ], | |
| "text/plain": [ | |
| "\u001b[2;36m[19:57:30]\u001b[0m\u001b[2;36m \u001b[0m\u001b[32m Success: Fetching dataset\u001b[0m \u001b]8;id=75727;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/utils/context.py\u001b\\\u001b[2mcontext.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=729285;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/utils/context.py#53\u001b\\\u001b[2m53\u001b[0m\u001b]8;;\u001b\\\n" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n" | |
| ], | |
| "text/plain": [] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "antiviral_admet_ds = po.load_dataset(\"asap-discovery/antiviral-admet-2025-unblinded\")\n", | |
| "antiviral_admet_ds.load_to_memory()\n", | |
| "antiviral_admet_df = pd.DataFrame(antiviral_admet_ds.zarr_data).dropna(subset=\"KSOL\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "b329b7e3-311a-4275-857d-d813d8c2fdfe", | |
| "metadata": {}, | |
| "source": [ | |
| "Get the Biogen dataset from Polaris" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "id": "154c97a4-9368-44ca-b9cb-b53f8b6d0d1e", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[2025-08-19 20:00:22] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO </span> The version of Polaris that was used to create the artifact <span style=\"font-weight: bold\">(</span>dev<span style=\"font-weight: bold\">)</span> is <a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/_artifact.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">_artifact.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/_artifact.py#92\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">92</span></a>\n", | |
| "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\"> </span> different from the currently installed version of Polaris <span style=\"font-weight: bold\">(</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.11</span>.<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span><span style=\"font-weight: bold\">)</span>. <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> </span>\n", | |
| "</pre>\n" | |
| ], | |
| "text/plain": [ | |
| "\u001b[2;36m[2025-08-19 20:00:22]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m The version of Polaris that was used to create the artifact \u001b[1m(\u001b[0mdev\u001b[1m)\u001b[0m is \u001b]8;id=210923;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/_artifact.py\u001b\\\u001b[2m_artifact.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=840339;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/_artifact.py#92\u001b\\\u001b[2m92\u001b[0m\u001b]8;;\u001b\\\n", | |
| "\u001b[2;36m \u001b[0m different from the currently installed version of Polaris \u001b[1m(\u001b[0m\u001b[1;36m0.11\u001b[0m.\u001b[1;36m10\u001b[0m\u001b[1m)\u001b[0m. \u001b[2m \u001b[0m\n" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[2025-08-19 20:00:23] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO </span> To verify the checksum, we need to recompute it. This can be slow <a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/mixins/_checksum.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">_checksum.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/mixins/_checksum.py#67\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">67</span></a>\n", | |
| "<span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\"> </span> for large datasets. <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> </span>\n", | |
| "</pre>\n" | |
| ], | |
| "text/plain": [ | |
| "\u001b[2;36m[2025-08-19 20:00:23]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m To verify the checksum, we need to recompute it. This can be slow \u001b]8;id=601500;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/mixins/_checksum.py\u001b\\\u001b[2m_checksum.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=448182;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/mixins/_checksum.py#67\u001b\\\u001b[2m67\u001b[0m\u001b]8;;\u001b\\\n", | |
| "\u001b[2;36m \u001b[0m for large datasets. \u001b[2m \u001b[0m\n" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[20:00:23] </span><span style=\"color: #008000; text-decoration-color: #008000\"> Success: Fetching dataset</span> <a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/utils/context.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">context.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/utils/context.py#53\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">53</span></a>\n", | |
| "</pre>\n" | |
| ], | |
| "text/plain": [ | |
| "\u001b[2;36m[20:00:23]\u001b[0m\u001b[2;36m \u001b[0m\u001b[32m Success: Fetching dataset\u001b[0m \u001b]8;id=835257;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/utils/context.py\u001b\\\u001b[2mcontext.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=939853;file:///opt/homebrew/Caskroom/miniforge/base/envs/rdkit_2025_03/lib/python3.11/site-packages/polaris/utils/context.py#53\u001b\\\u001b[2m53\u001b[0m\u001b]8;;\u001b\\\n" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n" | |
| ], | |
| "text/plain": [] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "biogen_ds = po.load_dataset(\"biogen/adme-fang-v1\")\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "b17d4920-7b36-499b-9907-9e01c09c5e3e", | |
| "metadata": {}, | |
| "source": [ | |
| "### 2. Convert Units to LogS " | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "2a378d4f-49b6-44a2-ad32-ec416ddd8934", | |
| "metadata": {}, | |
| "source": [ | |
| "The Antiviral ADMET dataset has kinetic solubility in $\\mu$M, we need to convert this to the log of molar solubility (LogS)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "28821f86-c531-45c1-b02f-dc31685533e4", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "antiviral_admet_df['LogS'] = np.log10(antiviral_admet_df.KSOL * 1e-6)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "d9956122-8bb6-473a-b7c8-fd41e4b56257", | |
| "metadata": {}, | |
| "source": [ | |
| "The Biogen dataset has solubility in log10($\\mu$g/ml), we need to convert this to the log of molar solubility (LogS)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "id": "a1a9e46c-1b6f-4267-816d-b3f3d423432e", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def ug_ml_to_logS(ug_ml, molar_mass):\n", | |
| " g_per_liter = ug_ml / 1000.0\n", | |
| " molar_solubility_S = g_per_liter / molar_mass\n", | |
| " log_S = np.log10(molar_solubility_S)\n", | |
| " return log_S" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 82, | |
| "id": "cb912ff2-7b73-4b7a-8290-b97dc68881fb", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "biogen_df = biogen_ds.table\n", | |
| "biogen_df.dropna(subset=\"LOG_SOLUBILITY\",inplace=True)\n", | |
| "biogen_df['mol'] = biogen_df.SMILES.apply(Chem.MolFromSmiles)\n", | |
| "biogen_df['ug_ml'] = 10**biogen_df.LOG_SOLUBILITY\n", | |
| "biogen_df['MW'] = biogen_df.mol.apply(MolWt)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 83, | |
| "id": "aa53a624-4f92-468e-a9f2-9d1c6fe4f190", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "biogen_df['LogS'] = [ug_ml_to_logS(x,y) for x,y in biogen_df[[\"ug_ml\",\"MW\"]].values]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 112, | |
| "id": "0c676e45-38ff-4a37-8ca3-ab17a8971c02", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "", | |
| "text/plain": [ | |
| "<Figure size 640x480 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "antiviral_admet_df['Dataset'] = \"Antiviral\"\n", | |
| "biogen_df['Dataset'] = \"Biogen\"\n", | |
| "cols = ['Dataset','LogS']\n", | |
| "sns.violinplot(x='Dataset',y='LogS',data=pd.concat([antiviral_admet_df[cols],biogen_df[cols]]));" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "7557c134-32c8-4b74-9362-192dc2e8ee71", | |
| "metadata": {}, | |
| "source": [ | |
| "### 3. Evaluate fastsolv on the Antiviral ADMET dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "0472a923-7da2-4142-8a3c-b4fe9ecae4e8", | |
| "metadata": {}, | |
| "source": [ | |
| "A simple function at add the necessary columns for **fastsolv**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 113, | |
| "id": "7ba8d8ee-35fe-464f-9921-25035b55f96f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def prepare_dataframe(df,smiles_col):\n", | |
| " df['solute_smiles'] = df[smiles_col]\n", | |
| " df['solvent_smiles'] = \"O\"\n", | |
| " df['temperature'] = 298" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "221b41b5-fb99-4dcf-9e8d-6a5cbe33d056", | |
| "metadata": {}, | |
| "source": [ | |
| "Run fastsolv on the Antiviral ADMET dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 114, | |
| "id": "7345cf4e-61de-4162-96e7-a37461245b3b", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|████████████████████████████████████████████████████████████████████████████████████████████████| 447/447 [00:02<00:00, 151.19it/s]\n", | |
| "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n", | |
| "GPU available: True (mps), used: True\n", | |
| "TPU available: False, using: 0 TPU cores\n", | |
| "HPU available: False, using: 0 HPUs\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "eb7bf1f4de9742799da654d4057648b0", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Predicting: | …" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "6f91d93f3632470dad3e8763eb491018", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Predicting: | …" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "52a94856f23a40dcac90e3c90445b202", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Predicting: | …" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "a42f51a709784622899139b81b29f9dd", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Predicting: | …" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "prepare_dataframe(antiviral_admet_df,\"CXSMILES\")\n", | |
| "antiviral_admet_pred = fastsolv(antiviral_admet_df)\n", | |
| "antiviral_admet_df['predicted_LogS'] = antiviral_admet_pred.predicted_logS.values" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 115, | |
| "id": "da7e79b1-59a2-44a4-9cc6-e331967611c1", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Text(-5.5, -0.75, 'MAE = 1.47')" | |
| ] | |
| }, | |
| "execution_count": 115, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "", | |
| "text/plain": [ | |
| "<Figure size 640x480 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "ax = sns.regplot(x=\"LogS\",y=\"predicted_LogS\",data=antiviral_admet_df,scatter_kws={\"s\": 5, \"alpha\" : 0.35})\n", | |
| "r2 = r2_score(antiviral_admet_df.LogS,antiviral_admet_df.predicted_LogS)\n", | |
| "mae = mean_absolute_error(antiviral_admet_df.LogS,antiviral_admet_df.predicted_LogS)\n", | |
| "ax.text(-5.5, -0.5, f\"$R^2$ = {r2:.2f}\")\n", | |
| "ax.text(-5.5, -0.75, f\"MAE = {mae:.2f}\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "c361a0ba-f0e0-457d-9c30-44381ed990fb", | |
| "metadata": {}, | |
| "source": [ | |
| "### 4. Evaluate fastsolv on the Biogen dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 116, | |
| "id": "560a280d-c431-4b20-a88f-8d953befeda9", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|██████████████████████████████████████████████████████████████████████████████████████████████| 2174/2174 [00:09<00:00, 223.02it/s]\n", | |
| "💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.\n", | |
| "GPU available: True (mps), used: True\n", | |
| "TPU available: False, using: 0 TPU cores\n", | |
| "HPU available: False, using: 0 HPUs\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "5712accf088443f8bdc43cfeee02c61c", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Predicting: | …" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "457f9c1d035241979955eb2c0b442a27", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Predicting: | …" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "9039d562884c41359e8405fea76a44f2", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Predicting: | …" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "0f951b18dc8a41fd953b3dbf3df52742", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "Predicting: | …" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "prepare_dataframe(biogen_df,\"SMILES\")\n", | |
| "biogen_pred = fastsolv(biogen_df)\n", | |
| "biogen_df['predicted_LogS'] = biogen_pred.predicted_logS.values" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 98, | |
| "id": "a9ae2248-6510-46e3-a62e-e798ecb5bc90", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "", | |
| "text/plain": [ | |
| "<Figure size 640x480 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "ax = sns.regplot(x=\"LogS\",y=\"predicted_LogS\",data=biogen_df,scatter_kws={\"s\": 5, \"alpha\" : 0.35})\n", | |
| "r2 = r2_score(biogen_df.LogS,biogen_df.predicted_LogS)\n", | |
| "mae = mean_absolute_error(biogen_df.LogS,biogen_df.predicted_LogS)\n", | |
| "ax.text(-6.5, 0.0, f\"$R^2$ = {r2:.2f}\")\n", | |
| "ax.text(-6.5, -0.25, f\"MAE = {mae:.2f}\");" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 121, | |
| "id": "a560643f-8a8f-4db8-ad9f-22a078c3030e", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>UNIQUE_ID</th>\n", | |
| " <th>MOL_smiles</th>\n", | |
| " <th>SMILES</th>\n", | |
| " <th>LOG_HLM_CLint</th>\n", | |
| " <th>LOG_RLM_CLint</th>\n", | |
| " <th>LOG_MDR1-MDCK_ER</th>\n", | |
| " <th>LOG_HPPB</th>\n", | |
| " <th>LOG_RPPB</th>\n", | |
| " <th>LOG_SOLUBILITY</th>\n", | |
| " <th>mol</th>\n", | |
| " <th>MW</th>\n", | |
| " <th>sol_ug_ml</th>\n", | |
| " <th>ug_ml</th>\n", | |
| " <th>LogS</th>\n", | |
| " <th>solute_smiles</th>\n", | |
| " <th>solvent_smiles</th>\n", | |
| " <th>temperature</th>\n", | |
| " <th>predicted_LogS</th>\n", | |
| " <th>Dataset</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>b5e1231aef0080c76b4207ca1355485432877339</td>\n", | |
| " <td>Brc1cc2c(cc1Cn1cncn1)OCCCO2</td>\n", | |
| " <td>Brc1cc2c(cc1Cn1cncn1)OCCCO2</td>\n", | |
| " <td>0.886265</td>\n", | |
| " <td>2.357933</td>\n", | |
| " <td>-0.247518</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.536432</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150117f40></td>\n", | |
| " <td>310.151</td>\n", | |
| " <td>34.390</td>\n", | |
| " <td>34.390</td>\n", | |
| " <td>-3.955141</td>\n", | |
| " <td>Brc1cc2c(cc1Cn1cncn1)OCCCO2</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-1.341322</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0604cf74e44450d5963d169e18854d96c4e5dba8</td>\n", | |
| " <td>Brc1ccc(-c2nnc(Cn3cnc4ccccc43)o2)o1</td>\n", | |
| " <td>Brc1ccc(-c2nnc(Cn3cnc4ccccc43)o2)o1</td>\n", | |
| " <td>0.675687</td>\n", | |
| " <td>1.613704</td>\n", | |
| " <td>-0.010669</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.797475</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150187f40></td>\n", | |
| " <td>345.156</td>\n", | |
| " <td>62.730</td>\n", | |
| " <td>62.730</td>\n", | |
| " <td>-3.740540</td>\n", | |
| " <td>Brc1ccc(-c2nnc(Cn3cnc4ccccc43)o2)o1</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-2.414264</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>9be43035e7b8a5078babf4345584f4dc6dcd1448</td>\n", | |
| " <td>Brc1cnc2ccccc2c1</td>\n", | |
| " <td>Brc1cnc2ccccc2c1</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.033858</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150187ed0></td>\n", | |
| " <td>208.058</td>\n", | |
| " <td>0.925</td>\n", | |
| " <td>0.925</td>\n", | |
| " <td>-5.352043</td>\n", | |
| " <td>Brc1cnc2ccccc2c1</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-1.294908</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>70ac5fbe7181b1fce36dbaedaedcd3cdeb4fdf82</td>\n", | |
| " <td>C#CCN1CCC(c2nc(Cc3noc(C)n3)no2)CC1</td>\n", | |
| " <td>C#CCN1CCC(c2nc(Cc3noc(C)n3)no2)CC1</td>\n", | |
| " <td>0.675687</td>\n", | |
| " <td>1.107108</td>\n", | |
| " <td>-0.135635</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.695044</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150187e60></td>\n", | |
| " <td>287.323</td>\n", | |
| " <td>49.550</td>\n", | |
| " <td>49.550</td>\n", | |
| " <td>-3.763327</td>\n", | |
| " <td>C#CCN1CCC(c2nc(Cc3noc(C)n3)no2)CC1</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-1.561687</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>e9cafc1522a01017509e0fda082d84676cdc1f5e</td>\n", | |
| " <td>C#CCN1CCC[C@H]1C(=O)Nc1cnn(-c2ncccn2)c1</td>\n", | |
| " <td>C#CCN1CCC[C@H]1C(=O)Nc1cnn(-c2ncccn2)c1</td>\n", | |
| " <td>0.675687</td>\n", | |
| " <td>1.921166</td>\n", | |
| " <td>0.596581</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.653984</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150187df0></td>\n", | |
| " <td>296.334</td>\n", | |
| " <td>45.080</td>\n", | |
| " <td>45.080</td>\n", | |
| " <td>-3.817798</td>\n", | |
| " <td>C#CCN1CCC[C@H]1C(=O)Nc1cnn(-c2ncccn2)c1</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-1.749381</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3511</th>\n", | |
| " <td>6b39954041ed15e815bc15457e06440183006a00</td>\n", | |
| " <td>c1cncc(-c2ccnn2CN2CCOCC2)c1</td>\n", | |
| " <td>c1cncc(-c2ccnn2CN2CCOCC2)c1</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.239049</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150131e70></td>\n", | |
| " <td>244.298</td>\n", | |
| " <td>17.340</td>\n", | |
| " <td>17.340</td>\n", | |
| " <td>-4.148871</td>\n", | |
| " <td>c1cncc(-c2ccnn2CN2CCOCC2)c1</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-1.242087</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3513</th>\n", | |
| " <td>d08dfa20c576ef45061fd2af3a1c7acc286bc6d2</td>\n", | |
| " <td>c1cnn(-c2ccc(-c3ccc(NC4CC4)nn3)cc2)c1</td>\n", | |
| " <td>c1cnn(-c2ccc(-c3ccc(NC4CC4)nn3)cc2)c1</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-0.107905</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150131ee0></td>\n", | |
| " <td>277.331</td>\n", | |
| " <td>0.780</td>\n", | |
| " <td>0.780</td>\n", | |
| " <td>-5.550904</td>\n", | |
| " <td>c1cnn(-c2ccc(-c3ccc(NC4CC4)nn3)cc2)c1</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-1.602266</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3514</th>\n", | |
| " <td>c9bf3aca6d10ff3538ce617d83b9de8c80723024</td>\n", | |
| " <td>c1csc(-c2csc3nc(CN4CCOCC4)nc(N4CCc5[nH]ncc5C4)...</td>\n", | |
| " <td>c1csc(-c2csc3nc(CN4CCOCC4)nc(N4CCc5[nH]ncc5C4)...</td>\n", | |
| " <td>2.747507</td>\n", | |
| " <td>3.118970</td>\n", | |
| " <td>1.921635</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.690107</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150131f50></td>\n", | |
| " <td>438.582</td>\n", | |
| " <td>48.990</td>\n", | |
| " <td>48.990</td>\n", | |
| " <td>-3.951943</td>\n", | |
| " <td>c1csc(-c2csc3nc(CN4CCOCC4)nc(N4CCc5[nH]ncc5C4)...</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-2.662541</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3517</th>\n", | |
| " <td>644313d1941ab4550760b7c1162c54d209a7435f</td>\n", | |
| " <td>c1nc(NC2CCN(C3CC3)CC2)c2sccc2n1</td>\n", | |
| " <td>c1nc(NC2CCN(C3CC3)CC2)c2sccc2n1</td>\n", | |
| " <td>1.240899</td>\n", | |
| " <td>1.906157</td>\n", | |
| " <td>0.094514</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.749891</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150131fc0></td>\n", | |
| " <td>274.393</td>\n", | |
| " <td>56.220</td>\n", | |
| " <td>56.220</td>\n", | |
| " <td>-3.688482</td>\n", | |
| " <td>c1nc(NC2CCN(C3CC3)CC2)c2sccc2n1</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-1.360543</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3518</th>\n", | |
| " <td>32658afab9952faf20249dbb305e1ac85ef10a1b</td>\n", | |
| " <td>c1nc(Nc2ccc3c(c2)OCCCO3)c2sccc2n1</td>\n", | |
| " <td>c1nc(Nc2ccc3c(c2)OCCCO3)c2sccc2n1</td>\n", | |
| " <td>1.964066</td>\n", | |
| " <td>3.196559</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>1.707229</td>\n", | |
| " <td><rdkit.Chem.rdchem.Mol object at 0x150132030></td>\n", | |
| " <td>299.355</td>\n", | |
| " <td>50.960</td>\n", | |
| " <td>50.960</td>\n", | |
| " <td>-3.768957</td>\n", | |
| " <td>c1nc(Nc2ccc3c(c2)OCCCO3)c2sccc2n1</td>\n", | |
| " <td>O</td>\n", | |
| " <td>298</td>\n", | |
| " <td>-1.360945</td>\n", | |
| " <td>Biogen</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>2173 rows × 19 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " UNIQUE_ID \\\n", | |
| "0 b5e1231aef0080c76b4207ca1355485432877339 \n", | |
| "1 0604cf74e44450d5963d169e18854d96c4e5dba8 \n", | |
| "3 9be43035e7b8a5078babf4345584f4dc6dcd1448 \n", | |
| "5 70ac5fbe7181b1fce36dbaedaedcd3cdeb4fdf82 \n", | |
| "6 e9cafc1522a01017509e0fda082d84676cdc1f5e \n", | |
| "... ... \n", | |
| "3511 6b39954041ed15e815bc15457e06440183006a00 \n", | |
| "3513 d08dfa20c576ef45061fd2af3a1c7acc286bc6d2 \n", | |
| "3514 c9bf3aca6d10ff3538ce617d83b9de8c80723024 \n", | |
| "3517 644313d1941ab4550760b7c1162c54d209a7435f \n", | |
| "3518 32658afab9952faf20249dbb305e1ac85ef10a1b \n", | |
| "\n", | |
| " MOL_smiles \\\n", | |
| "0 Brc1cc2c(cc1Cn1cncn1)OCCCO2 \n", | |
| "1 Brc1ccc(-c2nnc(Cn3cnc4ccccc43)o2)o1 \n", | |
| "3 Brc1cnc2ccccc2c1 \n", | |
| "5 C#CCN1CCC(c2nc(Cc3noc(C)n3)no2)CC1 \n", | |
| "6 C#CCN1CCC[C@H]1C(=O)Nc1cnn(-c2ncccn2)c1 \n", | |
| "... ... \n", | |
| "3511 c1cncc(-c2ccnn2CN2CCOCC2)c1 \n", | |
| "3513 c1cnn(-c2ccc(-c3ccc(NC4CC4)nn3)cc2)c1 \n", | |
| "3514 c1csc(-c2csc3nc(CN4CCOCC4)nc(N4CCc5[nH]ncc5C4)... \n", | |
| "3517 c1nc(NC2CCN(C3CC3)CC2)c2sccc2n1 \n", | |
| "3518 c1nc(Nc2ccc3c(c2)OCCCO3)c2sccc2n1 \n", | |
| "\n", | |
| " SMILES LOG_HLM_CLint \\\n", | |
| "0 Brc1cc2c(cc1Cn1cncn1)OCCCO2 0.886265 \n", | |
| "1 Brc1ccc(-c2nnc(Cn3cnc4ccccc43)o2)o1 0.675687 \n", | |
| "3 Brc1cnc2ccccc2c1 NaN \n", | |
| "5 C#CCN1CCC(c2nc(Cc3noc(C)n3)no2)CC1 0.675687 \n", | |
| "6 C#CCN1CCC[C@H]1C(=O)Nc1cnn(-c2ncccn2)c1 0.675687 \n", | |
| "... ... ... \n", | |
| "3511 c1cncc(-c2ccnn2CN2CCOCC2)c1 NaN \n", | |
| "3513 c1cnn(-c2ccc(-c3ccc(NC4CC4)nn3)cc2)c1 NaN \n", | |
| "3514 c1csc(-c2csc3nc(CN4CCOCC4)nc(N4CCc5[nH]ncc5C4)... 2.747507 \n", | |
| "3517 c1nc(NC2CCN(C3CC3)CC2)c2sccc2n1 1.240899 \n", | |
| "3518 c1nc(Nc2ccc3c(c2)OCCCO3)c2sccc2n1 1.964066 \n", | |
| "\n", | |
| " LOG_RLM_CLint LOG_MDR1-MDCK_ER LOG_HPPB LOG_RPPB LOG_SOLUBILITY \\\n", | |
| "0 2.357933 -0.247518 NaN NaN 1.536432 \n", | |
| "1 1.613704 -0.010669 NaN NaN 1.797475 \n", | |
| "3 NaN NaN NaN NaN -0.033858 \n", | |
| "5 1.107108 -0.135635 NaN NaN 1.695044 \n", | |
| "6 1.921166 0.596581 NaN NaN 1.653984 \n", | |
| "... ... ... ... ... ... \n", | |
| "3511 NaN NaN NaN NaN 1.239049 \n", | |
| "3513 NaN NaN NaN NaN -0.107905 \n", | |
| "3514 3.118970 1.921635 NaN NaN 1.690107 \n", | |
| "3517 1.906157 0.094514 NaN NaN 1.749891 \n", | |
| "3518 3.196559 NaN NaN NaN 1.707229 \n", | |
| "\n", | |
| " mol MW sol_ug_ml \\\n", | |
| "0 <rdkit.Chem.rdchem.Mol object at 0x150117f40> 310.151 34.390 \n", | |
| "1 <rdkit.Chem.rdchem.Mol object at 0x150187f40> 345.156 62.730 \n", | |
| "3 <rdkit.Chem.rdchem.Mol object at 0x150187ed0> 208.058 0.925 \n", | |
| "5 <rdkit.Chem.rdchem.Mol object at 0x150187e60> 287.323 49.550 \n", | |
| "6 <rdkit.Chem.rdchem.Mol object at 0x150187df0> 296.334 45.080 \n", | |
| "... ... ... ... \n", | |
| "3511 <rdkit.Chem.rdchem.Mol object at 0x150131e70> 244.298 17.340 \n", | |
| "3513 <rdkit.Chem.rdchem.Mol object at 0x150131ee0> 277.331 0.780 \n", | |
| "3514 <rdkit.Chem.rdchem.Mol object at 0x150131f50> 438.582 48.990 \n", | |
| "3517 <rdkit.Chem.rdchem.Mol object at 0x150131fc0> 274.393 56.220 \n", | |
| "3518 <rdkit.Chem.rdchem.Mol object at 0x150132030> 299.355 50.960 \n", | |
| "\n", | |
| " ug_ml LogS solute_smiles \\\n", | |
| "0 34.390 -3.955141 Brc1cc2c(cc1Cn1cncn1)OCCCO2 \n", | |
| "1 62.730 -3.740540 Brc1ccc(-c2nnc(Cn3cnc4ccccc43)o2)o1 \n", | |
| "3 0.925 -5.352043 Brc1cnc2ccccc2c1 \n", | |
| "5 49.550 -3.763327 C#CCN1CCC(c2nc(Cc3noc(C)n3)no2)CC1 \n", | |
| "6 45.080 -3.817798 C#CCN1CCC[C@H]1C(=O)Nc1cnn(-c2ncccn2)c1 \n", | |
| "... ... ... ... \n", | |
| "3511 17.340 -4.148871 c1cncc(-c2ccnn2CN2CCOCC2)c1 \n", | |
| "3513 0.780 -5.550904 c1cnn(-c2ccc(-c3ccc(NC4CC4)nn3)cc2)c1 \n", | |
| "3514 48.990 -3.951943 c1csc(-c2csc3nc(CN4CCOCC4)nc(N4CCc5[nH]ncc5C4)... \n", | |
| "3517 56.220 -3.688482 c1nc(NC2CCN(C3CC3)CC2)c2sccc2n1 \n", | |
| "3518 50.960 -3.768957 c1nc(Nc2ccc3c(c2)OCCCO3)c2sccc2n1 \n", | |
| "\n", | |
| " solvent_smiles temperature predicted_LogS Dataset \n", | |
| "0 O 298 -1.341322 Biogen \n", | |
| "1 O 298 -2.414264 Biogen \n", | |
| "3 O 298 -1.294908 Biogen \n", | |
| "5 O 298 -1.561687 Biogen \n", | |
| "6 O 298 -1.749381 Biogen \n", | |
| "... ... ... ... ... \n", | |
| "3511 O 298 -1.242087 Biogen \n", | |
| "3513 O 298 -1.602266 Biogen \n", | |
| "3514 O 298 -2.662541 Biogen \n", | |
| "3517 O 298 -1.360543 Biogen \n", | |
| "3518 O 298 -1.360945 Biogen \n", | |
| "\n", | |
| "[2173 rows x 19 columns]" | |
| ] | |
| }, | |
| "execution_count": 121, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "biogen_df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "b23c2743-6dc6-4d6f-bfa3-050f015189e1", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.11.11" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment