Skip to content

Instantly share code, notes, and snippets.

@greglandrum
Last active August 29, 2015 14:10
Show Gist options
  • Select an option

  • Save greglandrum/aaffa3c00a84c64b6d2d to your computer and use it in GitHub Desktop.

Select an option

Save greglandrum/aaffa3c00a84c64b6d2d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:9d5d296f48a6cfa3fb79b82732a22e94a643a520cdfd076e3ac0920903706760"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from rdkit import Chem\n",
"from rdkit import DistanceGeometry,ForceField\n",
"from rdkit.Chem import AllChem\n",
"from rdkit.Chem import rdForceFieldHelpers as rdff\n",
"from rdkit.Chem.Draw import IPythonConsole\n",
"import copy\n",
"%pylab inline\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING: pylab import has clobbered these variables: ['copy']\n",
"`%matplotlib` prevents importing * from pylab and numpy\n"
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"IPythonConsole.ipython_3d=True"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Start with a simple molecule with one torsion:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"m = Chem.MolFromSmiles('CCCC')\n",
"bm = AllChem.GetMoleculeBoundsMatrix(m)\n",
"print(bm)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[ 0. 1.524 2.51279063 3.81071948]\n",
" [ 1.504 0. 1.524 2.51279063]\n",
" [ 2.43279063 1.504 0. 1.524 ]\n",
" [ 2.52476717 2.43279063 1.504 0. ]]\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"DistanceGeometry.DoTriangleSmoothing(bm)\n",
"bm"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"array([[ 0. , 1.524 , 2.51279063, 3.81071948],\n",
" [ 1.504 , 0. , 1.524 , 2.51279063],\n",
" [ 2.43279063, 1.504 , 0. , 1.524 ],\n",
" [ 2.52476717, 2.43279063, 1.504 , 0. ]])"
]
}
],
"prompt_number": 6
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Embed it a bunch of times and look at the distribution of torsions that we get:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"diheds=[]\n",
"m.RemoveAllConformers()\n",
"for i in range(100):\n",
" coords = DistanceGeometry.EmbedBoundsMatrix(bm)\n",
" nAts = m.GetNumAtoms()\n",
" conf = Chem.Conformer(nAts)\n",
" conf.SetId(i)\n",
" for i in range(nAts):\n",
" conf.SetAtomPosition(i,list(coords[i]))\n",
" dihed=AllChem.GetDihedralDeg(conf,0,1,2,3)\n",
" diheds.append(dihed)\n",
" m.AddConformer(conf)\n",
" #diheds.append(dihed if dihed>0 else -1*dihed)\n",
"_=hist(diheds,bins=10)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAEACAYAAABMEua6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADuFJREFUeJzt3V+sHPdZxvHnaexcNEllolS2SdzaiFZNUSSnFSlSgroS\nIrhCpIkQgSCQVVDVG5IILkgcJHwENzRSqoAQualT2RUKqqga4kKKHeQR4YJaCXbi/DEmEEsNtk9K\nSdU4qRqnfbmYOfZ22XPOntnZmdn3fD/SyrO/3Zl5d/a3z5n9zYzXESEAQA7v6boAAEBzCHUASIRQ\nB4BECHUASIRQB4BECHUASGTFULe9zfYR2y/afsH2PVX7gu3XbB+rbrvaKRcAsBKvdJ667S2StkTE\ncdtXSnpW0u2S7pT0ZkR8oZ0yAQCT2LDSgxFxTtK5avq87ZclXVs97BnXBgBYo4nH1G1vl3SjpH+t\nmu62/ZztfbY3zaA2AMAaTRTq1dDL30q6NyLOS3pE0g5JOyWdlfTQzCoEAExsxTF1SbK9UdLXJT0Z\nEQ+PeXy7pIMRccNIO/+pDADUEBG1h7dXO/vFkvZJemk40G1vHXraHZJOLFPY3N727t3beQ3U330d\n1L+0bxYt3qbLjnnf9tNa8UCppJsl/Zak520fq9oekHSX7Z3VO/CqpM9NXQkAYGqrnf3yLxq/N//k\nbMoBAEyDK0qXMRgMui5hKtTfLervzjzX3oRVD5TWXrAds1o2gPaUh9ba/Cy7kbHleWVbMasDpQCA\n+UKoA0AihDoAJLLaKY2YsXK8sl3rebwSyI5Q74V2D0IByIvhFwBIhFAHgEQIdQBIhFAHgEQIdQBI\nhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAH\ngEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIZMVQt73N\n9hHbL9p+wfY9VfvVtg/bPmX7kO1N7ZQLAFiJI2L5B+0tkrZExHHbV0p6VtLtkj4j6X8i4kHb90n6\niYi4f2TeWGnZKNmW1OZ2snhfsBb00XbZVkS47vwr7qlHxLmIOF5Nn5f0sqRrJd0maX/1tP0qgx4A\n0LGJx9Rtb5d0o6RvStocEYvVQ4uSNjdeGQBgzTZM8qRq6OWrku6NiDfLr2OliAjbY78rLSwsXJwe\nDAYaDAbT1AoA6RRFoaIoGlveimPqkmR7o6SvS3oyIh6u2k5KGkTEOdtbJR2JiI+MzMeY+gQYr0Tf\n0UfbNdMxdZfv5j5JLy0FeuUJSbur6d2SHq9bAACgOaud/XKLpH+W9Lwu/aneI+mopK9I+oCk05Lu\njIjvjszLnvoE2AtC39FH2zXtnvqqwy+1F0yoT4QPDPqOPtqumQ6/AADmC6EOAIkQ6gCQCKEOAIkQ\n6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQ\nCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEO\nAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQyKqhbvtR24u2Twy1Ldh+zfax6rZrtmUCACYxyZ76lySN\nhnZI+kJE3FjdvtF8aQCAtVo11CPiaUlvjHnIzZcDAJjGNGPqd9t+zvY+25saqwgAUFvdUH9E0g5J\nOyWdlfRQYxUBAGrbUGemiHh9adr2FyUdHPe8hYWFi9ODwUCDwaDO6gAgraIoVBRFY8tzRKz+JHu7\npIMRcUN1f2tEnK2mf1/Sz0bEb47ME5Mse72zrfK4c2trFO8L1oI+2i7biojaxyxX3VO3/ZikT0q6\nxva3JO2VNLC9U+U7/aqkz9UtAADQnIn21GstmD31ibAXhL6jj7Zr2j11rigFgEQIdQBIhFAHgEQI\ndQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBI\nhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQIdQBIhFAHgEQ2dF0AgLWx3XUJ6DFCHZhL0eK6+CMyTxh+\nAYBECHUASIRQB4BECHUASIRQB4BECHUASIRQB4BEOE99BBd2AN3r4nMY0ea5/7NDqI/FhR1At9oO\n2DyfQ4ZfACARQh0AElk11G0/anvR9omhtqttH7Z9yvYh25tmWyYAYBKT7Kl/SdKukbb7JR2OiA9L\n+qfqPgCgY6uGekQ8LemNkebbJO2vpvdLur3hugAANdQdU98cEYvV9KKkzQ3VAwCYwtQHSqM8uTPH\nCZ4AMOfqnqe+aHtLRJyzvVXS6+OetLCwcHF6MBhoMBjUXB0A5FQUhYqiaGx5nuQqKtvbJR2MiBuq\n+w9K+k5EfN72/ZI2RcT9I/PEPF6hVV7J1vbFR+2ubx7fF1yyHvpoFxcf9eVzYVsRUftqqFVD3fZj\nkj4p6RqV4+d/LOnvJH1F0gcknZZ0Z0R8d2Q+Qn2yNba+vnl8X3DJeuijhPoMQ732ggn1SdfY+vrm\n8X3BJeuhjxLq9UOdK0oBIBFCHQASIdQBIBFCHQAS4f9TX4fa/gGCvhyAAtYDQn1d4kdAgKwYfgGA\nRAh1AEiEUAeARAh1AEiEUAeARAh1AEiEUAeARAh1AEiEUAeARAh1AEiEUAeARAh1AEiEUAeARAh1\nAEiEUAeARAh1AEiEUAeARAh1AEiEUAeARAh1AEiEUAeARAh1AEiEUAeARAh1AEiEUAeARAh1AEiE\nUAeARAh1AEiEUAeARAh1AEhkwzQz2z4t6XuSfijpQkTc1ERRAIB6pgp1SSFpEBH/20QxAIDpNDH8\n4gaWAQBowLShHpKesv2M7c82URAAoL5ph19ujoiztt8v6bDtkxHx9NKDCwsLF584GAw0GAymXB0A\n5FIUhYqiaGx5johmFmTvlXQ+Ih6q7kdTy26TbZVfQFpbY/r1zWM/6DP66GzW2Zd+alsRUXtYu/bw\ni+332r6qmr5C0q2STtRdHgBgetMMv2yW9LVyr0EbJP11RBxqpCoAQC2NDb/8vwUz/DLpGtOvbx77\nQZ/RR2ezzr70086GXwAA/UOoA0AihDoAJDLteeoz89Zbb+nAgQNdlwEAc6W3B0rPnDmjD37wp7Vx\n4+4Gq1rZu+9+WRcuvKXcB4U4UDrvOFA6m3X2pZ9Oe6C0t3vqknT55Zv09tuPtLa+K674+yrUAWA+\nMaYOAIkQ6gCQCKEOAIkQ6gCQCKEOAIkQ6gCQCKEOAIn0+jx1YB5U//000AuEOtCItq+4BMZj+AUA\nEiHUASARQh0AEiHUASARQh0AEiHUASARQh0AEiHUASARQh0AEiHUASARQh0AEiHUASARQh0AEiHU\nASARQh0AEiHUASARfiQD6fBLRFjPCHUkxS8RYX1i+AUAEiHUASCR2qFue5ftk7b/w/Z9TRYFAKin\nVqjbvkzSX0raJemjku6yfX2ThXWv6LqAKRVdFzCVoii6LmFKRdcFTKnouoApFF0X0Km6e+o3SXol\nIk5HxAVJfyPp082V1QdF1wVMqei6gKkQ6l0rui5gCkXXBXSqbqhfK+lbQ/dfq9oAAB2qe0pjK+eL\n/eAH39H73vcrbaxKkvT2299ubV0AMAuOWHs+2/45SQsRsau6v0fSjyLi80PPafNEYQBIIyJqX/xQ\nN9Q3SPp3Sb8g6Yyko5LuioiX6xYCAJhereGXiHjX9u9J+kdJl0naR6ADQPdq7akDAPqpkStKbf+a\n7Rdt/9D2x4bat9v+vu1j1e2vhh77uO0T1cVLf95EHXUsV3v12J6qvpO2bx1q70Xto2wv2H5taHt/\nauixsa+lb+bxojbbp20/X23zo1Xb1bYP2z5l+5DtTV3XucT2o7YXbZ8Yalu23r71nWXqn4u+b3ub\n7SNV5rxg+56qvbntHxFT3yR9RNKHJR2R9LGh9u2STiwzz1FJN1XT/yBpVxO1NFj7RyUdl7Sxeh2v\n6NI3m17UPua17JX0B2Pax72W93Rd75g6L6tq217VelzS9V3XNUHdr0q6eqTtQUl/WE3fJ+nPuq5z\nqLafl3Tj8GdzuXr72HeWqX8u+r6kLZJ2VtNXqjw2eX2T27+RPfWIOBkRpyZ9vu2tkq6KiKNV0wFJ\ntzdRy1qtUPunJT0WERci4rTKjfmJPtW+jHFHzce9lptarWoy83xR2+h2v03S/mp6v3rURyLiaUlv\njDQvV2/v+s4y9Utz0Pcj4lxEHK+mz0t6WeU1Po1t/zb+Q68d1dehwvYtVdu1Ki9YWvLf6t/FSz+p\nH69x6QKr0fa+1X637eds7xv6Crfca+mbeb2oLSQ9ZfsZ25+t2jZHxGI1vShpczelTWy5euel70hz\n1vdtb1f5jeObanD7T3z2i+3DKr86jHogIg4uM9sZSdsi4o1qvPpx2z8z6TqbUrP2XlrhtfyRpEck\n/Ul1/08lPSTpd5dZVB+PkPexpkncHBFnbb9f0mHbJ4cfjIiYp+s2Jqi3j69lrvq+7SslfVXSvRHx\n5vAPu0y7/ScO9Yj4xUmfOzTPO5Leqab/zfZ/SvqQyr3b64aeel3VNhN1aldZz7ah+9ep/CvZau2j\nJn0ttr8oaekP1rjX0lrNazBa5zb9+F5KL0XE2erfb9v+msqvx4u2t0TEuWrI7vVOi1zdcvXORd+J\niIvbt+993/ZGlYH+5Yh4vGpubPvPYvjl4p8c29e4/B8dZfunVAb6f1Ufgu/Z/oTLP1G/LenxsUtr\n1/CY3BOSfsP25bZ3qKz9aEScUz9rXzpWseQOSUtnB4x9LW3XN4FnJH3I5VlTl0v6dZW195bt99q+\nqpq+QtKtKrf7E5J2V0/brZ70kRUsV+9c9J156ftVZuyT9FJEPDz0UHPbv6EjuneoHAv9vqRzkp6s\n2n9V0guSjkl6VtIvD83zcZUb/hVJf9Hh0eixtVePPVDVd1LSL/Wt9jGv5YCk5yU9V3WKzau9lr7d\nJH1K5RkBr0ja03U9E9S7Q+XZCcervr6nar9a0lOSTkk6JGlT17UO1fyYyqHRd6q+/5mV6u1b3xlT\n/+/MS9+XdIukH1X95Vh129Xk9ufiIwBIhJ+zA4BECHUASIRQB4BECHUASIRQB4BECHUASIRQB4BE\nCHUASOT/AOEv1DXHi35fAAAAAElFTkSuQmCC\n",
"text": [
"<matplotlib.figure.Figure at 0x7f648814cc18>"
]
}
],
"prompt_number": 66
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Pick one that's at around 100:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"diheds=np.array(diheds)\n",
"v1=diheds>95\n",
"v2=diheds<105\n",
"[(x,diheds[x]) for x in range(len(diheds)) if (v1&v2)[x]]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 67,
"text": [
"[(16, 104.26835252382864),\n",
" (23, 104.3320129910401),\n",
" (39, 99.235291673106488),\n",
" (53, 98.608209294812198),\n",
" (58, 104.84685405724763),\n",
" (66, 100.81049001360684),\n",
" (98, 103.1860413576712)]"
]
}
],
"prompt_number": 67
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# pick a conformer:\n",
"confid=66\n",
"cm = Chem.Mol(m.ToBinary())\n",
"conf = cm.GetConformer(confid)\n",
"\n",
"# we can't just create an empty force field from python, so build a version of the\n",
"# molecule that has no bonds:\n",
"tm = Chem.EditableMol(cm)\n",
"for i in range(cm.GetNumBonds()):\n",
" bnd = cm.GetBondWithIdx(i)\n",
" tm.RemoveBond(bnd.GetBeginAtomIdx(),bnd.GetEndAtomIdx())\n",
"nm = tm.GetMol()\n",
"\n",
"# build a force field for that no-bond molecule by constraining all distances\n",
"# that aren't the torsion we care about to their current value += .01\n",
"# in a real molecule, we'd probably want to only do 1-2 and 1-3 distances \n",
"# (i.e. bonds and angles) here in order to avoid over-constraining the system\n",
"dm = AllChem.Get3DDistanceMatrix(nm,confId=confid)\n",
"ff = rdff.UFFGetMoleculeForceField(nm,confId=confid)\n",
"for i in range(cm.GetNumAtoms()):\n",
" for j in range(i+1,cm.GetNumAtoms()):\n",
" if (i==0 and j==3): \n",
" continue\n",
" ff.AddDistanceConstraint(i,j,dm[i,j]-.01,dm[i,j]+.01,1000)\n",
"# add the torsion contraint:\n",
"ff.UFFAddTorsionConstraint(0,1,2,3,False,89,90,100)\n",
"\n",
"# now minimize and show the impact\n",
"print(\"init: \",ff.CalcEnergy())\n",
"print(\" dihed: \",AllChem.GetDihedralDeg(nm.GetConformer(confid),0,1,2,3))\n",
"\n",
"ff.Minimize()\n",
"\n",
"print(\"done: \",ff.CalcEnergy())\n",
"print(\" dihed: \",AllChem.GetDihedralDeg(nm.GetConformer(confid),0,1,2,3))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"init: 1.7799810505355833\n",
" dihed: 100.81048851403006\n",
"done: 0.0\n",
" dihed: 89.74331779800123\n"
]
}
],
"prompt_number": 83
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Look at the bounds matrices on output"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(AllChem.Get3DDistanceMatrix(m,confId=confid,force=True))\n",
"print(AllChem.Get3DDistanceMatrix(nm,confId=confid,force=True))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[ 0. 1.51611661 2.47397097 3.33549392]\n",
" [ 1.51611661 0. 1.51791201 2.46130593]\n",
" [ 2.47397097 1.51791201 0. 1.51854743]\n",
" [ 3.33549392 2.46130593 1.51854743 0. ]]\n",
"[[ 0. 1.50985942 2.47535342 3.23121866]\n",
" [ 1.50985942 0. 1.50943113 2.46683804]\n",
" [ 2.47535342 1.50943113 0. 1.51118927]\n",
" [ 3.23121866 2.46683804 1.51118927 0. ]]\n"
]
}
],
"prompt_number": 84
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try to really twist the molecule and see if that works:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# pick a conformer:\n",
"confid=66\n",
"cm = Chem.Mol(m.ToBinary())\n",
"conf = cm.GetConformer(confid)\n",
"\n",
"# we can't just create an empty force field from python, so build a version of the\n",
"# molecule that has no bonds:\n",
"tm = Chem.EditableMol(cm)\n",
"for i in range(cm.GetNumBonds()):\n",
" bnd = cm.GetBondWithIdx(i)\n",
" tm.RemoveBond(bnd.GetBeginAtomIdx(),bnd.GetEndAtomIdx())\n",
"nm = tm.GetMol()\n",
"\n",
"# build a force field for that no-bond molecule by constraining all distances\n",
"# that aren't the torsion we care about to their current value += .01\n",
"# in a real molecule, we'd probably want to only do 1-2 and 1-3 distances \n",
"# (i.e. bonds and angles) here in order to avoid over-constraining the system\n",
"dm = AllChem.Get3DDistanceMatrix(nm,confId=confid)\n",
"ff = rdff.UFFGetMoleculeForceField(nm,confId=confid)\n",
"for i in range(cm.GetNumAtoms()):\n",
" for j in range(i+1,cm.GetNumAtoms()):\n",
" if (i==0 and j==3): \n",
" continue\n",
" ff.AddDistanceConstraint(i,j,dm[i,j]-.01,dm[i,j]+.01,1000)\n",
"# add the torsion contraint:\n",
"ff.UFFAddTorsionConstraint(0,1,2,3,False,3,4,100)\n",
"\n",
"# now minimize and show the impact\n",
"print(\"init: \",ff.CalcEnergy())\n",
"print(\" dihed: \",AllChem.GetDihedralDeg(nm.GetConformer(confid),0,1,2,3))\n",
"\n",
"ff.Minimize()\n",
"\n",
"print(\"done: \",ff.CalcEnergy())\n",
"print(\" dihed: \",AllChem.GetDihedralDeg(nm.GetConformer(confid),0,1,2,3))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"init: 142.7478457005485\n",
" dihed: 100.81048851403006\n",
"done: 0.0\n",
" dihed: 3.881667593344122\n"
]
}
],
"prompt_number": 85
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment