Skip to content

Instantly share code, notes, and snippets.

@ptosco
Created November 26, 2019 21:47
Show Gist options
  • Save ptosco/40c2530c67d9c0930b8efbc8c92da0be to your computer and use it in GitHub Desktop.
Save ptosco/40c2530c67d9c0930b8efbc8c92da0be to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"from rdkit import Chem\n",
"from rdkit.Chem import AllChem\n",
"from rdkit.Chem.Draw import IPythonConsole\n",
"import random\n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"ache_2 = \"\"\"\\\n",
"1-3e\n",
" Cerius2 12110213523D 1 1.00000 \n",
"Structure written by Cerius2 SD Exporter\n",
" 60 63 0 0 0 0 0 0 0 0999 V2000\n",
" 5.1741 63.7666 67.2298 C 0 0 0 0 0 0\n",
" 4.3855 65.0953 67.1679 C 0 0 0 0 0 0\n",
" 5.2770 66.2628 67.6398 C 0 0 0 0 0 0\n",
" 4.5857 67.6253 67.4818 C 0 0 0 0 0 0\n",
" 4.1925 67.8073 66.0805 N 0 3 0 0 0 0\n",
" 3.1996 66.8221 65.6392 C 0 0 0 0 0 0\n",
" 3.8229 65.4102 65.7568 C 0 0 0 0 0 0\n",
" 4.3302 69.1509 65.5151 C 0 0 0 0 0 0\n",
" 4.2612 69.1992 64.0021 C 0 0 0 0 0 0\n",
" 3.0583 69.6122 63.4103 C 0 0 0 0 0 0\n",
" 2.9137 69.6073 62.0214 C 0 0 0 0 0 0\n",
" 3.9685 69.1928 61.2101 C 0 0 0 0 0 0\n",
" 5.1698 68.7828 61.7855 C 0 0 0 0 0 0\n",
" 5.3166 68.7866 63.1737 C 0 0 0 0 0 0\n",
" 5.6679 63.6802 68.2153 H 0 0 0 0 0 0\n",
" 5.9893 63.7965 66.4837 H 0 0 0 0 0 0\n",
" 3.5310 65.0411 67.8689 H 0 0 0 0 0 0\n",
" 5.5824 66.1197 68.6906 H 0 0 0 0 0 0\n",
" 6.2068 66.2717 67.0398 H 0 0 0 0 0 0\n",
" 3.7145 67.7004 68.1590 H 0 0 0 0 0 0\n",
" 5.2829 68.4214 67.7923 H 0 0 0 0 0 0\n",
" 2.2727 66.8895 66.2384 H 0 0 0 0 0 0\n",
" 2.9028 66.9965 64.5928 H 0 0 0 0 0 0\n",
" 4.6504 65.3441 65.0247 H 0 0 0 0 0 0\n",
" 3.0777 64.6579 65.4492 H 0 0 0 0 0 0\n",
" 3.5737 69.8400 65.9317 H 0 0 0 0 0 0\n",
" 5.3143 69.5451 65.8234 H 0 0 0 0 0 0\n",
" 2.2062 69.9310 64.0444 H 0 0 0 0 0 0\n",
" 1.9555 69.9248 61.5679 H 0 0 0 0 0 0\n",
" 3.8512 69.1849 60.1094 H 0 0 0 0 0 0\n",
" 6.0072 68.4449 61.1453 H 0 0 0 0 0 0\n",
" 6.2681 68.4425 63.6279 H 0 0 0 0 0 0\n",
" 4.3640 62.4886 66.9831 C 0 0 0 0 0 0\n",
" 3.3310 62.2808 67.9859 N 0 0 0 0 0 0\n",
" 3.5065 61.3987 69.0541 C 0 0 0 0 0 0\n",
" 4.5000 60.6868 69.1648 O 0 0 0 0 0 0\n",
" 2.3360 61.2610 69.9910 C 0 0 0 0 0 0\n",
" 1.6635 62.3620 70.5477 C 0 0 0 0 0 0\n",
" 0.5511 62.1510 71.3717 C 0 0 0 0 0 0\n",
" 0.0779 60.8542 71.6144 N 0 0 0 0 0 0\n",
" 0.7186 59.7598 71.0147 C 0 0 0 0 0 0\n",
" 1.8345 59.9629 70.1974 C 0 0 0 0 0 0\n",
" 3.8980 62.5103 65.9824 H 0 0 0 0 0 0\n",
" 5.0400 61.6176 66.9795 H 0 0 0 0 0 0\n",
" 1.9908 63.3965 70.3365 H 0 0 0 0 0 0\n",
" 0.0324 63.0186 71.8235 H 0 0 0 0 0 0\n",
" 0.3312 58.7358 71.1801 H 0 0 0 0 0 0\n",
" 2.3313 59.0997 69.7146 H 0 0 0 0 0 0\n",
" 4.9170 67.4352 65.6976 H 0 0 0 0 0 0\n",
" 2.1042 62.9814 67.6870 C 0 0 0 0 0 0\n",
" 1.8528 64.2567 68.2543 C 0 0 0 0 0 0\n",
" 2.5775 64.7082 68.9631 H 0 0 0 0 0 0\n",
" 0.6840 64.9680 67.9645 C 0 0 0 0 0 0\n",
" 0.5083 65.9542 68.4393 H 0 0 0 0 0 0\n",
" -0.2705 64.4289 67.0940 C 0 0 0 0 0 0\n",
" -1.2049 64.9815 66.8771 H 0 0 0 0 0 0\n",
" -0.0553 63.1639 66.5409 C 0 0 0 0 0 0\n",
" -0.8215 62.7141 65.8773 H 0 0 0 0 0 0\n",
" 1.1155 62.4477 66.8318 C 0 0 0 0 0 0\n",
" 1.2206 61.4326 66.3976 H 0 0 0 0 0 0\n",
" 1 2 1 0 0 0\n",
" 1 15 1 0 0 0\n",
" 1 16 1 0 0 0\n",
" 1 33 1 0 0 0\n",
" 2 3 1 0 0 0\n",
" 2 7 1 0 0 0\n",
" 2 17 1 0 0 0\n",
" 3 4 1 0 0 0\n",
" 3 18 1 0 0 0\n",
" 3 19 1 0 0 0\n",
" 4 5 1 0 0 0\n",
" 4 20 1 0 0 0\n",
" 4 21 1 0 0 0\n",
" 5 6 1 0 0 0\n",
" 5 8 1 0 0 0\n",
" 5 49 1 0 0 0\n",
" 6 7 1 0 0 0\n",
" 6 22 1 0 0 0\n",
" 6 23 1 0 0 0\n",
" 7 24 1 0 0 0\n",
" 7 25 1 0 0 0\n",
" 8 9 1 0 0 0\n",
" 8 26 1 0 0 0\n",
" 8 27 1 0 0 0\n",
" 9 10 1 0 0 0\n",
" 9 14 2 0 0 0\n",
" 10 11 2 0 0 0\n",
" 10 28 1 0 0 0\n",
" 11 12 1 0 0 0\n",
" 11 29 1 0 0 0\n",
" 12 13 2 0 0 0\n",
" 12 30 1 0 0 0\n",
" 13 14 1 0 0 0\n",
" 13 31 1 0 0 0\n",
" 14 32 1 0 0 0\n",
" 33 34 1 0 0 0\n",
" 33 43 1 0 0 0\n",
" 33 44 1 0 0 0\n",
" 34 35 1 0 0 0\n",
" 34 50 1 0 0 0\n",
" 35 36 2 0 0 0\n",
" 35 37 1 0 0 0\n",
" 37 38 2 0 0 0\n",
" 37 42 1 0 0 0\n",
" 38 39 1 0 0 0\n",
" 38 45 1 0 0 0\n",
" 39 40 2 0 0 0\n",
" 39 46 1 0 0 0\n",
" 40 41 1 0 0 0\n",
" 41 42 2 0 0 0\n",
" 41 47 1 0 0 0\n",
" 42 48 1 0 0 0\n",
" 50 51 1 0 0 0\n",
" 50 59 2 0 0 0\n",
" 51 52 1 0 0 0\n",
" 51 53 2 0 0 0\n",
" 53 54 1 0 0 0\n",
" 53 55 1 0 0 0\n",
" 55 56 1 0 0 0\n",
" 55 57 2 0 0 0\n",
" 57 58 1 0 0 0\n",
" 57 59 1 0 0 0\n",
" 59 60 1 0 0 0\n",
"M CHG 1 5 1\n",
"M END\n",
"> <ACTIVITY>\n",
"7.19\n",
"\n",
"> <SET>\n",
"2\n",
"\n",
"> <Min_dist>\n",
"0.09\n",
"\n",
"> <Avg_dist>\n",
"0.41\n",
"\n",
"$$$$\n",
"2-34\n",
" Cerius2 12110213523D 1 1.00000 \n",
"Structure written by Cerius2 SD Exporter\n",
" 51 53 0 0 0 0 0 0 0 0999 V2000\n",
" 5.1741 63.7666 67.2298 C 0 0 0 0 0 0\n",
" 4.3855 65.0953 67.1679 C 0 0 0 0 0 0\n",
" 5.2770 66.2628 67.6398 C 0 0 0 0 0 0\n",
" 4.5857 67.6253 67.4818 C 0 0 0 0 0 0\n",
" 4.1925 67.8073 66.0805 N 0 3 0 0 0 0\n",
" 3.1996 66.8221 65.6392 C 0 0 0 0 0 0\n",
" 3.8229 65.4102 65.7568 C 0 0 0 0 0 0\n",
" 4.3302 69.1509 65.5151 C 0 0 0 0 0 0\n",
" 4.2612 69.1992 64.0021 C 0 0 0 0 0 0\n",
" 3.4089 68.9383 62.7868 C 0 0 0 0 0 0\n",
" 4.8921 68.6128 62.7679 C 0 0 0 0 0 0\n",
" 5.6679 63.6802 68.2153 H 0 0 0 0 0 0\n",
" 5.9893 63.7965 66.4837 H 0 0 0 0 0 0\n",
" 3.5310 65.0411 67.8689 H 0 0 0 0 0 0\n",
" 5.5824 66.1197 68.6906 H 0 0 0 0 0 0\n",
" 6.2068 66.2717 67.0398 H 0 0 0 0 0 0\n",
" 3.7145 67.7004 68.1590 H 0 0 0 0 0 0\n",
" 5.2829 68.4214 67.7923 H 0 0 0 0 0 0\n",
" 2.2727 66.8895 66.2384 H 0 0 0 0 0 0\n",
" 2.9028 66.9965 64.5928 H 0 0 0 0 0 0\n",
" 4.6504 65.3441 65.0247 H 0 0 0 0 0 0\n",
" 3.0777 64.6579 65.4492 H 0 0 0 0 0 0\n",
" 3.5737 69.8400 65.9317 H 0 0 0 0 0 0\n",
" 5.3143 69.5451 65.8234 H 0 0 0 0 0 0\n",
" 2.6592 68.1505 62.6898 H 0 0 0 0 0 0\n",
" 4.3640 62.4886 66.9831 C 0 0 0 0 0 0\n",
" 3.3310 62.2808 67.9859 N 0 0 0 0 0 0\n",
" 3.5065 61.3987 69.0541 C 0 0 0 0 0 0\n",
" 4.5000 60.6868 69.1648 O 0 0 0 0 0 0\n",
" 2.3360 61.2610 69.9910 C 0 0 0 0 0 0\n",
" 2.1816 60.0574 70.7052 C 0 0 0 0 0 0\n",
" 1.0937 59.8814 71.5627 C 0 0 0 0 0 0\n",
" 0.1451 60.9030 71.7211 C 0 0 0 0 0 0\n",
" 0.3027 62.1039 71.0182 C 0 0 0 0 0 0\n",
" 1.3932 62.2913 70.1618 C 0 0 0 0 0 0\n",
" 3.8980 62.5103 65.9824 H 0 0 0 0 0 0\n",
" 5.0400 61.6176 66.9795 H 0 0 0 0 0 0\n",
" 1.9897 62.5131 67.4053 C 0 0 0 0 0 0\n",
" 2.9204 59.2427 70.5798 H 0 0 0 0 0 0\n",
" 0.9794 58.9301 72.1176 H 0 0 0 0 0 0\n",
" -0.7170 60.7611 72.4002 H 0 0 0 0 0 0\n",
" -0.4380 62.9164 71.1493 H 0 0 0 0 0 0\n",
" 1.5043 63.2620 69.6464 H 0 0 0 0 0 0\n",
" 4.9170 67.4352 65.6976 H 0 0 0 0 0 0\n",
" 1.2417 61.8253 67.8173 H 0 0 0 0 0 0\n",
" 1.6688 63.5555 67.5482 H 0 0 0 0 0 0\n",
" 1.9674 62.3016 66.3226 H 0 0 0 0 0 0\n",
" 5.5427 69.2852 62.1965 H 0 0 0 0 0 0\n",
" 4.4766 70.2822 63.8998 H 0 0 0 0 0 0\n",
" 3.1023 69.8390 62.2409 H 0 0 0 0 0 0\n",
" 5.2248 67.5746 62.6917 H 0 0 0 0 0 0\n",
" 1 2 1 0 0 0\n",
" 1 12 1 0 0 0\n",
" 1 13 1 0 0 0\n",
" 1 26 1 0 0 0\n",
" 2 3 1 0 0 0\n",
" 2 7 1 0 0 0\n",
" 2 14 1 0 0 0\n",
" 3 4 1 0 0 0\n",
" 3 15 1 0 0 0\n",
" 3 16 1 0 0 0\n",
" 4 5 1 0 0 0\n",
" 4 17 1 0 0 0\n",
" 4 18 1 0 0 0\n",
" 5 6 1 0 0 0\n",
" 5 8 1 0 0 0\n",
" 5 44 1 0 0 0\n",
" 6 7 1 0 0 0\n",
" 6 19 1 0 0 0\n",
" 6 20 1 0 0 0\n",
" 7 21 1 0 0 0\n",
" 7 22 1 0 0 0\n",
" 8 9 1 0 0 0\n",
" 8 23 1 0 0 0\n",
" 8 24 1 0 0 0\n",
" 9 10 1 0 0 0\n",
" 9 11 1 0 0 0\n",
" 9 49 1 0 0 0\n",
" 10 11 1 0 0 0\n",
" 10 25 1 0 0 0\n",
" 10 50 1 0 0 0\n",
" 11 48 1 0 0 0\n",
" 11 51 1 0 0 0\n",
" 26 27 1 0 0 0\n",
" 26 36 1 0 0 0\n",
" 26 37 1 0 0 0\n",
" 27 28 1 0 0 0\n",
" 27 38 1 0 0 0\n",
" 28 29 2 0 0 0\n",
" 28 30 1 0 0 0\n",
" 30 31 2 0 0 0\n",
" 30 35 1 0 0 0\n",
" 31 32 1 0 0 0\n",
" 31 39 1 0 0 0\n",
" 32 33 2 0 0 0\n",
" 32 40 1 0 0 0\n",
" 33 34 1 0 0 0\n",
" 33 41 1 0 0 0\n",
" 34 35 2 0 0 0\n",
" 34 42 1 0 0 0\n",
" 35 43 1 0 0 0\n",
" 38 45 1 0 0 0\n",
" 38 46 1 0 0 0\n",
" 38 47 1 0 0 0\n",
"M CHG 1 5 1\n",
"M END\n",
"> <ACTIVITY>\n",
"4.42\n",
"\n",
"> <SET>\n",
"1\n",
"\n",
"> <Min_dist>\n",
"999.9\n",
"\n",
"> <Avg_dist>\n",
"999.9\n",
"\n",
"$$$$\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"suppl = Chem.SDMolSupplier()\n",
"suppl.SetData(ache_2)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"prb = suppl[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"ref = suppl[1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's shuffle the `prb` atoms to make sure the alignment algorithm is doind its job:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"atom_idx_list = list(range(prb.GetNumAtoms()))\n",
"random.seed(1)\n",
"random.shuffle(atom_idx_list)\n",
"prb = AllChem.RenumberAtoms(prb, atom_idx_list)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"prb = Chem.AddHs(prb, addCoords=True)\n",
"ref = Chem.AddHs(ref, addCoords=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"w = Chem.SDWriter(\"ache_3dqsar_ren.sdf\")\n",
"w.write(ref)\n",
"w.write(prb)\n",
"w.close()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"o3a = AllChem.GetO3A(prb, ref, options=3, maxIters=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is the score \"in place\":"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"179.47884843543616"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"o3a.Score()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is the RMSD between matching atoms:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.36493268557365655"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"math.sqrt(sum([(prb.GetConformer().GetAtomPosition(i)\n",
" - ref.GetConformer().GetAtomPosition(j)).LengthSq()\n",
" for i, j in o3a.Matches()]) / len(o3a.Matches()))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"These are the matches:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(2, 14)\n",
"(5, 17)\n",
"(6, 2)\n",
"(7, 6)\n",
"(9, 8)\n",
"(10, 20)\n",
"(11, 10)\n",
"(13, 15)\n",
"(14, 11)\n",
"(15, 1)\n",
"(16, 21)\n",
"(17, 18)\n",
"(18, 7)\n",
"(20, 19)\n",
"(21, 12)\n",
"(22, 13)\n",
"(23, 4)\n",
"(24, 9)\n",
"(25, 3)\n",
"(26, 22)\n",
"(29, 16)\n",
"(30, 5)\n"
]
}
],
"source": [
"[print((i+1, j+1)) for i, j in sorted(o3a.Matches())];"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The match seems correct:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"\">"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment