PatWalters · August 31, 2020 23:47
diff --git a/ChEMBL_examples.ipynb b/ChEMBL_examples.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import mysql.connector as sql"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "hostname = 'localhost'\n",
    "database = 'chembl_27'\n",
    "user = 'pwalters'\n",
    "password = 'imnottellin'\n",
    "con = sql.connect(host=hostname, database=database, user=user, password=password)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Retrieve marketed drugs from ChEMBL**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "drug_query = \"\"\"select distinct canonical_smiles, cs.molregno, f.ingredient from compound_structures cs\n",
    "join formulations f on cs.molregno = f.molregno\n",
    "join products p on p.product_id = f.product_id\n",
    "join compound_properties cp on cp.molregno = cs.molregno\n",
    "where p.oral = 1\n",
    "and cp.mw_freebase < 1000\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "drug_df = pd.read_sql(drug_query,con=con)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>canonical_smiles</th>\n",
       "      <th>molregno</th>\n",
       "      <th>ingredient</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Nc1ccc(S(=O)(=O)Nc2ccccn2)cc1</td>\n",
       "      <td>32842</td>\n",
       "      <td>SULFAPYRIDINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>CCC(C)C1(CC)C(=O)[N-]C(=O)NC1=O.[Na+]</td>\n",
       "      <td>674933</td>\n",
       "      <td>BUTABARBITAL SODIUM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Cl.N=C(N)N</td>\n",
       "      <td>674679</td>\n",
       "      <td>GUANIDINE HYDROCHLORIDE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>CC1=CC(=O)c2ccccc2C1=O</td>\n",
       "      <td>19344</td>\n",
       "      <td>MENADIONE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Cn1c(=O)c2[nH]cnc2n(C)c1=O.Cn1c(=O)c2[nH]cnc2n...</td>\n",
       "      <td>794445</td>\n",
       "      <td>AMINOPHYLLINE</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    canonical_smiles  molregno  \\\n",
       "0                      Nc1ccc(S(=O)(=O)Nc2ccccn2)cc1     32842   \n",
       "1              CCC(C)C1(CC)C(=O)[N-]C(=O)NC1=O.[Na+]    674933   \n",
       "2                                         Cl.N=C(N)N    674679   \n",
       "3                             CC1=CC(=O)c2ccccc2C1=O     19344   \n",
       "4  Cn1c(=O)c2[nH]cnc2n(C)c1=O.Cn1c(=O)c2[nH]cnc2n...    794445   \n",
       "\n",
       "                ingredient  \n",
       "0            SULFAPYRIDINE  \n",
       "1      BUTABARBITAL SODIUM  \n",
       "2  GUANIDINE HYDROCHLORIDE  \n",
       "3                MENADIONE  \n",
       "4            AMINOPHYLLINE  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drug_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Retrieve journal information along with structures from ChEMBL**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "journal_query = \"\"\"select canonical_smiles, cs.molregno, journal from docs\n",
    "join compound_records cr on docs.doc_id = cr.doc_id\n",
    "join compound_structures cs on cs.molregno = cr.molregno\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "journal_df = pd.read_sql(journal_query,con=con)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>canonical_smiles</th>\n",
       "      <th>molregno</th>\n",
       "      <th>journal</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccccc1Cl</td>\n",
       "      <td>1</td>\n",
       "      <td>J. Med. Chem.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(C#N)cc1</td>\n",
       "      <td>2</td>\n",
       "      <td>J. Med. Chem.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1C(O)c1ccc(Cl)cc1</td>\n",
       "      <td>3</td>\n",
       "      <td>J. Med. Chem.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Cc1ccc(C(=O)c2ccc(-n3ncc(=O)[nH]c3=O)cc2)cc1</td>\n",
       "      <td>4</td>\n",
       "      <td>J. Med. Chem.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(Cl)cc1</td>\n",
       "      <td>5</td>\n",
       "      <td>J. Med. Chem.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   canonical_smiles  molregno        journal\n",
       "0      Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccccc1Cl         1  J. Med. Chem.\n",
       "1   Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(C#N)cc1         2  J. Med. Chem.\n",
       "2  Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1C(O)c1ccc(Cl)cc1         3  J. Med. Chem.\n",
       "3      Cc1ccc(C(=O)c2ccc(-n3ncc(=O)[nH]c3=O)cc2)cc1         4  J. Med. Chem.\n",
       "4    Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(Cl)cc1         5  J. Med. Chem."
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "journal_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Retrieve COVID-19 drug repurposing data from ChEMBL**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "covid_query = \"\"\"select cs.canonical_smiles, cs.molregno, usan_tbl.synonyms, act.assay_id, act.standard_value, act.standard_type, act.standard_units from activities act\n",
    "    join compound_structures cs on cs.molregno = act.molregno\n",
    "    join assays a on act.assay_id = a.assay_id\n",
    "    join source s on a.src_id = s.src_id\n",
    "    left join (select molregno, synonyms from molecule_synonyms where syn_type = 'USAN') as usan_tbl on usan_tbl.molregno = cs.molregno\n",
    "    where src_description = 'SARS-CoV-2 Screening Data'\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "covid_df = pd.read_sql(covid_query,con=con)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>canonical_smiles</th>\n",
       "      <th>molregno</th>\n",
       "      <th>synonyms</th>\n",
       "      <th>assay_id</th>\n",
       "      <th>standard_value</th>\n",
       "      <th>standard_type</th>\n",
       "      <th>standard_units</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>O=C(O)c1ccc(N2C(=O)/C(=C/c3ccc(-c4ccc([N+](=O)...</td>\n",
       "      <td>1033892</td>\n",
       "      <td>None</td>\n",
       "      <td>1804482</td>\n",
       "      <td>8.709636e+06</td>\n",
       "      <td>IC50</td>\n",
       "      <td>nM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>O=C(O)/C(Cc1ccccc1[N+](=O)[O-])=N\\Nc1nc(-c2ccc...</td>\n",
       "      <td>425006</td>\n",
       "      <td>None</td>\n",
       "      <td>1804482</td>\n",
       "      <td>2.089300e+03</td>\n",
       "      <td>IC50</td>\n",
       "      <td>nM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>CCNC(=O)c1cc2c(-c3cc(C(C)(C)O)ccc3Oc3c(C)cc(F)...</td>\n",
       "      <td>2335425</td>\n",
       "      <td>None</td>\n",
       "      <td>1804482</td>\n",
       "      <td>3.467368e+06</td>\n",
       "      <td>IC50</td>\n",
       "      <td>nM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>CCC(=O)CCCCC[C@@H]1NC(=O)[C@H]2CCCCN2C(=O)[C@H...</td>\n",
       "      <td>275631</td>\n",
       "      <td>None</td>\n",
       "      <td>1804482</td>\n",
       "      <td>9.996846e+08</td>\n",
       "      <td>IC50</td>\n",
       "      <td>nM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>C[C@@H](C1CCCCC1)n1c(-c2cc3c(cc2Br)OCO3)nc2cc(...</td>\n",
       "      <td>2260454</td>\n",
       "      <td>None</td>\n",
       "      <td>1804482</td>\n",
       "      <td>6.025600e+02</td>\n",
       "      <td>IC50</td>\n",
       "      <td>nM</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    canonical_smiles  molregno synonyms  \\\n",
       "0  O=C(O)c1ccc(N2C(=O)/C(=C/c3ccc(-c4ccc([N+](=O)...   1033892     None   \n",
       "1  O=C(O)/C(Cc1ccccc1[N+](=O)[O-])=N\\Nc1nc(-c2ccc...    425006     None   \n",
       "2  CCNC(=O)c1cc2c(-c3cc(C(C)(C)O)ccc3Oc3c(C)cc(F)...   2335425     None   \n",
       "3  CCC(=O)CCCCC[C@@H]1NC(=O)[C@H]2CCCCN2C(=O)[C@H...    275631     None   \n",
       "4  C[C@@H](C1CCCCC1)n1c(-c2cc3c(cc2Br)OCO3)nc2cc(...   2260454     None   \n",
       "\n",
       "   assay_id  standard_value standard_type standard_units  \n",
       "0   1804482    8.709636e+06          IC50             nM  \n",
       "1   1804482    2.089300e+03          IC50             nM  \n",
       "2   1804482    3.467368e+06          IC50             nM  \n",
       "3   1804482    9.996846e+08          IC50             nM  \n",
       "4   1804482    6.025600e+02          IC50             nM  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "covid_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import mysql.connector as sql"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"hostname = 'localhost'\n",
	"database = 'chembl_27'\n",
	"user = 'pwalters'\n",
	"password = 'imnottellin'\n",
	"con = sql.connect(host=hostname, database=database, user=user, password=password)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Retrieve marketed drugs from ChEMBL"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"drug_query = \"\"\"select distinct canonical_smiles, cs.molregno, f.ingredient from compound_structures cs\n",
	"join formulations f on cs.molregno = f.molregno\n",
	"join products p on p.product_id = f.product_id\n",
	"join compound_properties cp on cp.molregno = cs.molregno\n",
	"where p.oral = 1\n",
	"and cp.mw_freebase < 1000\"\"\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"drug_df = pd.read_sql(drug_query,con=con)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>canonical_smiles</th>\n",
	" <th>molregno</th>\n",
	" <th>ingredient</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>Nc1ccc(S(=O)(=O)Nc2ccccn2)cc1</td>\n",
	" <td>32842</td>\n",
	" <td>SULFAPYRIDINE</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>CCC(C)C1(CC)C(=O)[N-]C(=O)NC1=O.[Na+]</td>\n",
	" <td>674933</td>\n",
	" <td>BUTABARBITAL SODIUM</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>Cl.N=C(N)N</td>\n",
	" <td>674679</td>\n",
	" <td>GUANIDINE HYDROCHLORIDE</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>CC1=CC(=O)c2ccccc2C1=O</td>\n",
	" <td>19344</td>\n",
	" <td>MENADIONE</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>Cn1c(=O)c2[nH]cnc2n(C)c1=O.Cn1c(=O)c2[nH]cnc2n...</td>\n",
	" <td>794445</td>\n",
	" <td>AMINOPHYLLINE</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" canonical_smiles molregno \\\n",
	"0 Nc1ccc(S(=O)(=O)Nc2ccccn2)cc1 32842 \n",
	"1 CCC(C)C1(CC)C(=O)[N-]C(=O)NC1=O.[Na+] 674933 \n",
	"2 Cl.N=C(N)N 674679 \n",
	"3 CC1=CC(=O)c2ccccc2C1=O 19344 \n",
	"4 Cn1c(=O)c2[nH]cnc2n(C)c1=O.Cn1c(=O)c2[nH]cnc2n... 794445 \n",
	"\n",
	" ingredient \n",
	"0 SULFAPYRIDINE \n",
	"1 BUTABARBITAL SODIUM \n",
	"2 GUANIDINE HYDROCHLORIDE \n",
	"3 MENADIONE \n",
	"4 AMINOPHYLLINE "
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"drug_df.head()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Retrieve journal information along with structures from ChEMBL"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"journal_query = \"\"\"select canonical_smiles, cs.molregno, journal from docs\n",
	"join compound_records cr on docs.doc_id = cr.doc_id\n",
	"join compound_structures cs on cs.molregno = cr.molregno\"\"\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"journal_df = pd.read_sql(journal_query,con=con)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>canonical_smiles</th>\n",
	" <th>molregno</th>\n",
	" <th>journal</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccccc1Cl</td>\n",
	" <td>1</td>\n",
	" <td>J. Med. Chem.</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(C#N)cc1</td>\n",
	" <td>2</td>\n",
	" <td>J. Med. Chem.</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1C(O)c1ccc(Cl)cc1</td>\n",
	" <td>3</td>\n",
	" <td>J. Med. Chem.</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>Cc1ccc(C(=O)c2ccc(-n3ncc(=O)[nH]c3=O)cc2)cc1</td>\n",
	" <td>4</td>\n",
	" <td>J. Med. Chem.</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(Cl)cc1</td>\n",
	" <td>5</td>\n",
	" <td>J. Med. Chem.</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" canonical_smiles molregno journal\n",
	"0 Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccccc1Cl 1 J. Med. Chem.\n",
	"1 Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(C#N)cc1 2 J. Med. Chem.\n",
	"2 Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1C(O)c1ccc(Cl)cc1 3 J. Med. Chem.\n",
	"3 Cc1ccc(C(=O)c2ccc(-n3ncc(=O)[nH]c3=O)cc2)cc1 4 J. Med. Chem.\n",
	"4 Cc1cc(-n2ncc(=O)[nH]c2=O)ccc1C(=O)c1ccc(Cl)cc1 5 J. Med. Chem."
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"journal_df.head()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Retrieve COVID-19 drug repurposing data from ChEMBL"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"covid_query = \"\"\"select cs.canonical_smiles, cs.molregno, usan_tbl.synonyms, act.assay_id, act.standard_value, act.standard_type, act.standard_units from activities act\n",
	" join compound_structures cs on cs.molregno = act.molregno\n",
	" join assays a on act.assay_id = a.assay_id\n",
	" join source s on a.src_id = s.src_id\n",
	" left join (select molregno, synonyms from molecule_synonyms where syn_type = 'USAN') as usan_tbl on usan_tbl.molregno = cs.molregno\n",
	" where src_description = 'SARS-CoV-2 Screening Data'\"\"\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"covid_df = pd.read_sql(covid_query,con=con)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>canonical_smiles</th>\n",
	" <th>molregno</th>\n",
	" <th>synonyms</th>\n",
	" <th>assay_id</th>\n",
	" <th>standard_value</th>\n",
	" <th>standard_type</th>\n",
	" <th>standard_units</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>O=C(O)c1ccc(N2C(=O)/C(=C/c3ccc(-c4ccc([N+](=O)...</td>\n",
	" <td>1033892</td>\n",
	" <td>None</td>\n",
	" <td>1804482</td>\n",
	" <td>8.709636e+06</td>\n",
	" <td>IC50</td>\n",
	" <td>nM</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>O=C(O)/C(Cc1ccccc1[N+](=O)[O-])=N\\Nc1nc(-c2ccc...</td>\n",
	" <td>425006</td>\n",
	" <td>None</td>\n",
	" <td>1804482</td>\n",
	" <td>2.089300e+03</td>\n",
	" <td>IC50</td>\n",
	" <td>nM</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>CCNC(=O)c1cc2c(-c3cc(C(C)(C)O)ccc3Oc3c(C)cc(F)...</td>\n",
	" <td>2335425</td>\n",
	" <td>None</td>\n",
	" <td>1804482</td>\n",
	" <td>3.467368e+06</td>\n",
	" <td>IC50</td>\n",
	" <td>nM</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>CCC(=O)CCCCC[C@@H]1NC(=O)[C@H]2CCCCN2C(=O)[C@H...</td>\n",
	" <td>275631</td>\n",
	" <td>None</td>\n",
	" <td>1804482</td>\n",
	" <td>9.996846e+08</td>\n",
	" <td>IC50</td>\n",
	" <td>nM</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>C[C@@H](C1CCCCC1)n1c(-c2cc3c(cc2Br)OCO3)nc2cc(...</td>\n",
	" <td>2260454</td>\n",
	" <td>None</td>\n",
	" <td>1804482</td>\n",
	" <td>6.025600e+02</td>\n",
	" <td>IC50</td>\n",
	" <td>nM</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" canonical_smiles molregno synonyms \\\n",
	"0 O=C(O)c1ccc(N2C(=O)/C(=C/c3ccc(-c4ccc([N+](=O)... 1033892 None \n",
	"1 O=C(O)/C(Cc1ccccc1[N+](=O)[O-])=N\\Nc1nc(-c2ccc... 425006 None \n",
	"2 CCNC(=O)c1cc2c(-c3cc(C(C)(C)O)ccc3Oc3c(C)cc(F)... 2335425 None \n",
	"3 CCC(=O)CCCCC[C@@H]1NC(=O)[C@H]2CCCCN2C(=O)[C@H... 275631 None \n",
	"4 C[C@@H](C1CCCCC1)n1c(-c2cc3c(cc2Br)OCO3)nc2cc(... 2260454 None \n",
	"\n",
	" assay_id standard_value standard_type standard_units \n",
	"0 1804482 8.709636e+06 IC50 nM \n",
	"1 1804482 2.089300e+03 IC50 nM \n",
	"2 1804482 3.467368e+06 IC50 nM \n",
	"3 1804482 9.996846e+08 IC50 nM \n",
	"4 1804482 6.025600e+02 IC50 nM "
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"covid_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}