Skip to content

Instantly share code, notes, and snippets.

@ptosco
Created August 23, 2018 11:27
Show Gist options
  • Save ptosco/a35ac28a14103b47096f6d6af1aec831 to your computer and use it in GitHub Desktop.
Save ptosco/a35ac28a14103b47096f6d6af1aec831 to your computer and use it in GitHub Desktop.
DummyMatch
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from rdkit import Chem\n",
"from rdkit.Chem import rdFMCS\n",
"from rdkit.Chem.Draw import IPythonConsole"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"m1 = Chem.MolFromSmiles('[*:1][CH2:2][C:3]([CH3:4])=[CH2:5]')\n",
"m2 = Chem.MolFromSmiles('[F:11][CH2:12][C:13]([*:14])=[CH2:15]')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAEBElEQVR4nO3d0W6bMACGUZj64Lw5u0iHEKYuy08AwzkXU5QAxVL0zSa06cdx7AB415+zTwCgbTIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgMjX2SfAg/R9Pz0ex3F6cnpcPjPtsthmy6ufs/tAfjoCTZBRDrJoxJZkLDJUj9RhAdp9INPzO54kR7Ko5whlO7ZUr77NKRO3Twxk9bA0REZpwOrqePXVK+v7vj4QGmVRz/nqQVlUsrK0P119IK/znE74lCsSfIKM8kEbA1GWceMRjgnQ9p9SGUh+cC5LRvmIV0HCQJze0F1G0W0YyMYtuSbXRtnfqwWLlfhianb9K4PlKLoPDGSc6U763IyQ2Sh7qkzfFgHa2Ityl58+pdkxQPVJ6BsDme/ikuj9mI2yj2EYVqdvc4uZ1/RkuVm5/fzJyqu5X0fxxkDmu2zcnob4/5DUMAzTv03b60ooT2NRz/tuE9DOEpuAtw5vGobhmIAKHBfnDUoDLLe5MhmlGWLKNckojRFTrkZGaZKYch0ySsPElCuQUYCI32Ki6367A7StO0P7mfoGB58Yd+X2e7ru302gZS7bCmi34Rs+zvrqEW7MbJSuq2a0oZJu+YYP3WR3MkrXFYv6htK5UfnX7UxF2YuMsuJ+GS1nqeVfDoX3yChAREZ5FjNQdiej3EeLX1XCDcgoK9q9Nvoq6aT8upH5Bj5iYhfeSQARs1GAiIwCRGSUO6j8+vzBZ8IDyShAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIr4ZFCBiNgoQkVGAiIwCRGQUICKjABEZBYh8nX0CsLO+76fH0/18fb+8t2/+zLRLef/f/GirG4CMciuLXJb1rO+yur10UmdRz32UEdxSQJUkJKPwbXWZ/3r8ctJ5cXUW9TxFvYOLmez0+H8vEfBAMspTlB8xzR/rI2+zqOfp6g21ludXMsp9jOO4qJ4IcgCLem5lUdKNS/Vyl2mKOj+ghT+rXBICiFjUA0RkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEPkLuvUi55PrZjkAAAAASUVORK5CYII=\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f99abc75760>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAEL0lEQVR4nO3dQW6jMACGURj1Rp0DcsDmTMwCDYowcZL+CRjy3mIUIZywGH21g1v6cRw7AH7rz94XAHBsMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAyNfeF8AH6ft+fj2O43xwfl0emYcszlk9efERlVHwQjLKRsre3W3coqdlbVdHSScbs6hnC2UEH4ld/ZxxHBWTFsgoB7C6zK+cPHn/dUHXWdTTgnryFjPZu1PUWwPhTWSU/VW+9JRC2mdRT7t+0VBrebYno2xhHMdF4PSO07CoZyOLkj44zSyH1Keo15/i2wC24YsngIhFPUBERgEiMsrb3bqb5C4T5yCjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYh4MihAxGwUICKjABEZBYjIKEBERgEiMgoQ+dr7Avgsfd/Pr29ttuv75T68edTu+/P+Xi6LIz/f36unlcdXD3ICMsp2Fn2s5PLWqHLI9uopLDt76yCnYVHPRsoClkEcx3H14Huv7KV+vr/Lzq4e5DRklP2tTkLLc1qYit5ivvnJLOrZ3yNxnM5poaTXxZznmCabn0xG4TmKyYJFPe8yLcNf8j75m/za38vFgp06s1FebwpfeUPp7p36pkz1NPfkLrNRXmkYhimOq32cSjq73sZUec/rUdtkt+/7aY/n4w01Y/1kTU8HOJBhGOZ/D63xOTINsqgndaaAdkfbpkoLZJTIMAwnCGhnEkrAfx1aJ3A0zi0mWjffYtr7QmCdn/Mchu8uaZPZKIcx7aMyLaU1MsrBWOPTGot6jsoan0aYjXJU1vg0Qkbpuntb6J89vqVyNnr9+6bXB8vTyiG3PmV1uO8WmNh+T9f930VfZrESyhYaWvrFXz+pP6TkEM81YV9mo3RdNaOruWzzl5ceeU5JqX7OCZ5rwrvJKF1XLOobTOQGHv+jU6tD+FgW9ayoZ7TNqehd9TIugvhIHDWUiYzynIM2tCvKuLjF9GwQNZSZjPK064wet6ozDSUkozyn5YYe7jklnINbTKxoKo5PufWckrpyq+ndW0yru1P5TH5WA0TMRgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYj8A3c7m+coeA00AAAAAElFTkSuQmCC\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f99abc75710>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m2"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"qp = Chem.AdjustQueryParameters()\n",
"qp.makeDummiesQueries = True\n",
"m1 = Chem.AdjustQueryProperties(m1, qp)\n",
"m2 = Chem.AdjustQueryProperties(m2, qp)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"()"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m1.GetSubstructMatches(m2)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"()"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m2.GetSubstructMatches(m1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"mcs = rdFMCS.FindMCS((m1, m2), atomCompare=rdFMCS.AtomCompare.CompareAny)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mcs.numAtoms"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'[#0,#9]-[#6]-[#6](-[#6,#0])=[#6]'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mcs.smartsString"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"mcsQuery = Chem.MolFromSmarts(mcs.smartsString)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"match = (m1.GetSubstructMatch(mcsQuery), m2.GetSubstructMatch(mcsQuery))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((0, 1, 2, 3, 4), (0, 1, 2, 3, 4))"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"match"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"assert(len(match[0]) == len(match[1]))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# if either atom in the match is a query atom, make the corresponding atom a query atom too,\n",
"# so GetSubstructMatches() will match them\n",
"# Then we will repeat the substructure match. This is because, as we used CompareAny, the MCS\n",
"# would also match a C to N even though they were not marked as dummy atoms, while\n",
"# GetSubstructMatches() will only be tolerant with atoms marked as dummy\n",
"for i in range(len(match[0])):\n",
" a = (m1.GetAtomWithIdx(match[0][i]), m2.GetAtomWithIdx(match[1][i]))\n",
" atomIsDummy = (a[0].GetAtomicNum() == 0, a[1].GetAtomicNum() == 0)\n",
" if (atomIsDummy[0] and atomIsDummy[1]):\n",
" continue\n",
" if (not (atomIsDummy[0] or atomIsDummy[1])):\n",
" continue\n",
" for j in (0, 1):\n",
" k = 1 - j\n",
" if (atomIsDummy[j]):\n",
" a[k].SetAtomicNum(a[j].GetAtomicNum())\n",
" a[k].SetFormalCharge(a[j].GetFormalCharge())\n",
" a[k].SetNumExplicitHs(a[j].GetNumExplicitHs())\n",
" a[k].SetNumRadicalElectrons(a[j].GetNumRadicalElectrons())\n",
" a[k].UpdatePropertyCache()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((0, 1, 2, 3, 4),)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m1.GetSubstructMatches(m2)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((0, 1, 2, 3, 4),)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m2.GetSubstructMatches(m1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAD4klEQVR4nO3d206rQACG0WHH939l9kUjEk6l/TkOa8WLqkAhMZ8zhZambdsCwLf+nb0DAPcmowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyCgHapq/r/4Px4uNV1ne4JFeT7fwpAfvD2f7OXsHeIymKW07++3bVcbLL/92P2279HQa+jxGoxxi3J011Vte5rBuDryOZTKXR9acy5BR7mAw8Fz47QFez7WwPzyMST0XsFyiQSUXpvanOOWFBa5ERtnN+qyMy7hyI8eU69OXcXkYGWUHXQfDuJze0I8OZH39qYuMsrWuJoOZ+Bdn6s81eSBz7nVobEpG2U5/ODbuyODs9srQjFeZO920Ybk+2qxoPl7T+gsgt1POjlfNgXAgo1EyNXXno1k8/JJRAtV0p6Z/BhzO5fd8pf9O9sNOl++nu6JeQ/mc10b51mGnVqoZ81IpGeUmxJSrklFuRUy5HhnlhsSUK5FRbktMuQZn6rktJ9a5BhmllPLuxhhX/kjNcUknb1UyuQBsweX3lFLmb4xxu9a8/QCUs249Qr2MRimlzN8Y414T5zW3KrnR4XATMkop5QE3xhgc0WtSL6lsQUaZUl9fxqPUudvSwYdkFCAiozyMEShbk1EqMp6niyb7c8ETU+57+mXuViXdEfUXuOkxcjHeDAoQMakHiMgoQERGqcLce+S9d579yShAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIu4MChAxGgWIyChAREYBIjIKEJFRgIiMAkR+zt4B2FrT/D3urudrmjK4tq//k26V8fV//a1NLsDjySh1GeRyXM/lVSaXl04WmdRTkXEE1xRQJcnIKPyanOa/Hr++YIpJPY+x3MHBSLZ7/OlLBDyPjPIY41NM/cf6yLdM6nm85Yaay/OOjFKRth1WTwTZn0k9dRmUdOVUfbxKN0Ttb9DEnyk+bxQgYlIPEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChAREYBIjIKEJFRgIiMAkRkFCAiowARGQWIyChA5D80KRNUV0S8QAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f99aa60e120>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m1"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAD8klEQVR4nO3dXY+iMACG0bKZ//+X2QszjgGswCtf5ZzMxe4E0CabZ1tF2/V9XwBY69/RTwDg2mQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZZUdd9/fz+svxYeNTKtd89yhbeFz23cWX/p4m/Bz9BLiNrit9//avH08ZHz/Zpvopub6fvuyi0NMWs1F2MU7PnMDVj+n7iQO+3s2Bx0DGZZx8MmWblHMyMsoVDOaYS0/5osc1lz4ZmmZRzwnUYzQI4pw47jYHrD+Kqeg9yCibmR+RyoueK0r09Xitu6CG3oaMsoFnB8OUHN7QcCDh/wdchIzybc92DFbiK96pP9bkQOa71mAJyCjf8zr5mnwPvX7AzGt+TNKKR1l9BX2klK73j4BcXq6TaGYg7MhslFi4+D2PZgbCvmSUQDNzt2YGwhHcfk/geS/6punZ4Rb3fQZCo7w2yulZa3NuMspFiClnJaNciphyPjLKBYkpZyKjXJaYcg7eqeeyvLHOOcgopZRVe2Oc5Ks2J7+I/mxbj9A0t99TSlm4N8aZW/PxC1AO2XqEppmNUkpZuDfGaVfTc7YqOWTrEZomo5RSbrA3xpwRPRb1kspCMsqU9lIycwe9ySk5VMkoQERGocUXMdiRjNKQ8ZJcH9meG56Yct13Wt5tVVIf0etZFx04x/FhUICIRT1AREYBIjJKE959HN7H5NmejAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgIqMAERkFiMgoQERGASIyChCRUYCIjAJEZBQgYmdQgIjZKEBERgEiMgoQkVGAiIwCRGQUIPJz9BOAb+u6vz8/7+frujK4t+/1N89T3t3/Nzj99SEqZ3EPMkpbxr372LhBT8e1nSSd/LKopyHjCM6JXf2YvldM6mQUfk0u8ysHP364PYt6bqOevMFM9uMU9d2J3I+MchuVFz2lkIBFPbe3oqHW8ryQURrS98PA6R3bs6inLYOSzpxmjk+pT1FfH8WrAbfn+0YBIhb1ABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRGQUICKjABEZBYjIKEBERgEiMgoQkVGAiIwCRP4Dc3ArSlUCLqYAAAAASUVORK5CYII=\n",
"text/plain": [
"<rdkit.Chem.rdchem.Mol at 0x7f99aa60e760>"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment