Created
May 19, 2020 14:39
-
-
Save ptosco/e410e45278b94e8f047ff224193d7788 to your computer and use it in GitHub Desktop.
SubstructMatchAromaticity
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from rdkit import Chem\n", | |
"from rdkit.Chem.Draw import MolsToGridImage" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"smiles_strings = '''\n", | |
"C12=CC=CN1NCCC2\n", | |
"C12=CC=CC(C=C3)=C1N3NCC2\n", | |
"'''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['C12=CC=CN1NCCC2', 'C12=CC=CC(C=C3)=C1N3NCC2']\n" | |
] | |
} | |
], | |
"source": [ | |
"smiles_list = smiles_strings.splitlines()[1:]\n", | |
"print(smiles_list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"params = Chem.SmilesParserParams()\n", | |
"params.sanitize = False" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mols = [Chem.MolFromSmiles(x, params) for x in smiles_list]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"If you sanitize normally, the larger molecule has one more atom and one more bond marked as aromatic, and so it fails the match:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"c1cc2n(c1)NCCC2\n", | |
"0 C True\n", | |
"1 C True\n", | |
"2 C True\n", | |
"3 C True\n", | |
"4 N True\n", | |
"5 N False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 C False\n", | |
"0 1 AROMATIC True\n", | |
"1 2 AROMATIC True\n", | |
"2 3 AROMATIC True\n", | |
"3 4 AROMATIC True\n", | |
"4 5 SINGLE False\n", | |
"5 6 SINGLE False\n", | |
"6 7 SINGLE False\n", | |
"7 8 SINGLE False\n", | |
"4 0 AROMATIC True\n", | |
"8 0 SINGLE False\n", | |
"\n", | |
"c1cc2c3c(c1)ccn3NCC2\n", | |
"0 C True\n", | |
"1 C True\n", | |
"2 C True\n", | |
"3 C True\n", | |
"4 C True\n", | |
"5 C True\n", | |
"6 C True\n", | |
"7 C True\n", | |
"8 N True\n", | |
"9 N False\n", | |
"10 C False\n", | |
"11 C False\n", | |
"0 1 AROMATIC True\n", | |
"1 2 AROMATIC True\n", | |
"2 3 AROMATIC True\n", | |
"3 4 AROMATIC True\n", | |
"4 5 AROMATIC True\n", | |
"5 6 AROMATIC True\n", | |
"4 7 AROMATIC True\n", | |
"7 8 AROMATIC True\n", | |
"8 9 SINGLE False\n", | |
"9 10 SINGLE False\n", | |
"10 11 SINGLE False\n", | |
"7 0 AROMATIC True\n", | |
"11 0 SINGLE False\n", | |
"8 6 AROMATIC True\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in mols:\n", | |
" Chem.SanitizeMol(m, Chem.SANITIZE_ALL)\n", | |
" print(Chem.MolToSmiles(m))\n", | |
" for a in m.GetAtoms():\n", | |
" print(a.GetIdx(), a.GetSymbol(), a.GetIsAromatic())\n", | |
" for b in m.GetBonds():\n", | |
" print(b.GetBeginAtomIdx(), b.GetEndAtomIdx(), b.GetBondType(), b.GetIsAromatic())\n", | |
" print()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAADICAYAAAA0n5+2AAAsGElEQVR4nO3deVRV16EG8A+QWRCciSM4RQRRBoGAQ0DLU6HxxUJiFBoTczHpUzL5IJootc8EXh0gaVW0WrkOqdoYA45AfEZQBhnUqAgKjXE2GnFghrvfHwSqEZDhwLnD91trr65wzz3nk9Xkfu6z7z56QggBIiKi1tOTOwCRutKXOwARERGRtmHBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYixYRERERBJjwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYl3kDkBE2uvWrVs4duwYjI2NYWZmhq5du8LQ0BDW1tYwMjKCubk5zM3NYWRkJHdUIiJJ6QkhhNwhiEj7PHr0CM7OzqisrIRKpUJpaSlKS0tRVVXV6PFWVlYwNDSEhYUFTE1NYWJiAktLSxgaGqJbt24NJc3CwgKGhoawsrJ6qqQ1dQ4jIyNYWlp28m9AJ+jJHYBIXXEGi4g6xFdffYXS0lL8+OOPMDAweOK1kpISVFdX4+HDhygvL0dFRQXu37+P6upqPHjwABUVFSgvL8fDhw9RXV2NkpISVFVVobS0FDdv3kRVVVXDzx49eoSysjJUVlY+cY7GmJiYwNTUFN26dcOXX34JDw+PzvhVEJEO4gwWEXUIX19fuLq6Ijo6WpbrN1fSdu7ciby8PGRnZ8uSTYtwBouoCSxYRCS5a9euYdCgQcjLy4Ojo6PccZ5y/vx5ODg44Nq1a7CxsZE7jiZjwSJqAr9FSESSi4+Px5gxY9SyXAGAvb09Bg4ciJSUFLmjEJGWYsEiIslt3boVISEhcsdolp+fHw4dOiR3DCLSUixYRCSpzMxMXLp0Ca+88orcUZrl5+eHpKQkqFQquaMQkRbiGiwiktQf/vAHXL16Fd98881Try1evBiXLl1q8fYKv96ioWvXrg3Ht9eDBw/Qs2dPpKWlYdy4ce0+n47iGiyiJnCbBiKSTFVVFXbt2oX169c3+rqjoyOEEA1bNNy5c6dFWzQ0pjUlzc7ODosWLXri/ZaWlnjhhRdw6NAhFiwikhxnsIhIMnv27MG8efNw48YNGBsbS3be+u0V6jcqfXwfrcb2wPp1SevatStWr1791Hk/++wzJCYm4sSJE5Jl1TGcwSJqAgsWEUlmxowZeO6557B27Vq5o7RIXl4e3NzccOvWLfTo0UPuOJqIBYuoCVzkTkSSuHv3Lg4ePKj23x583JgxY9C7d29u10BEkmPBIiJJfPVVMmxtbTXq8TN6enr4zW9+g8OHD8sdhYi0DAsWEUli48ZX8cYbJ+WO0Wr/8R//gYMHD4KrJYhISlyDRUTtlp8PjBoFFBUBtrZyp2mde/fuoVevXsjOzsaYMWPkjqNpuAaLqAmcwSKidouPByZN0rxyBQDW1tZwc3Pjru5EJCkWLCJqF5UK2LED0KC17U/x8/PjOiwikhQLFhG1y7ffAj//DMycKXeStps6dSqOHz+O+/fvyx2FiLQECxYRtYtSCbz8MmBhIXeStnNzc4OVlRX+7//+T+4oRKQlWLB0WG1tLdavX4+PPvoIX331ldxxSAM9egTs3QsEB8udpH309fUxefJkrsMiIsmwYOmoo0ePwsXFBUuXLsWtW7fw+9//Hj4+Pjh9+rTc0UiD7N4NdOsG+PjInaT9/Pz8cODAAbljEJGWYMHSMVeuXEFISAh+85vfwN3dHefPn8fmzZtRWFiIYcOGwdXVFSEhIbh9+7bcUUkDbN1aN3tlYCB3kvbz8/PD1atXkZ+fL3cUItICLFg6oqysDJGRkRg+fDhu3LiBvLw8xMXFoWfPngCA5557DnFxcUhPT0dRURFGjBiB6OhoVFZWypyc1NXly8B33wGzZ8udRBp9+/bFmDFjeJuQiCTBgqXlhBDYvXs3Ro4ciR07dmD37t1ITk7GqFGjGj3e1dUVaWlp2LBhA9auXYvRo0dj3759nZyaNMHWrYCLC+DgIHcS6UydOpUFi4gkwYKlxbKzszF+/HgoFAq88847+P777+Hv7//M9+np6SEwMBD5+fmYNWsWAgMDMWXKFJw7d64TUpOm2L5d8xe3/5qfnx+OHTuGsrIyuaMQkYZjwdJC169fR2hoKDw9PWFnZ4cLFy4gPDwcxsbGrTqPmZkZIiMjUVhYCBsbG4wdOxZhYWEoKSnpmOCkMe7eBXr1AmbNkjuJtF544QUYGxvj6NGjckchIg2nUwXr5s2bUCqVuHPnjtxROkRVVRViY2MxcuRIFBQU4OTJk1AqlejTp0+7zjtgwAAolUokJSXhu+++w5AhQxAbG4va2lqJkpOm6dEDOHYM+GUJn9bo0qULfH19uas7EbWbzjzs+ezZs3B3d4e1tTVu3ryJSZMmYebMmZgxYwZsbGzkjtduiYmJePfdd1FVVYUVK1YgODgYenrSP4dVpVJh27ZtWLRoEZ577jnExsZiwoQJkl+HSC4bN27EypUrUVBQIHcUTcCHPRM1QSdmsCoqKjB79mzMmjULV69exenTp+Ht7Y0vvvgC/fv3h6urKyIjI3Hp0iW5o7Zafn4+pk6dildffRXBwcEoLCxESEhIh5QroG5DxpCQEBQUFMDPzw9TpkxBQEAA/vWvf3XI9Yg629SpU1FYWIiioiK5oxCRBtOJgvXee++hrKwMa9asAQCMGjUKkZGROH/+PM6cOYPAwECkpKRg2LBhDa+p+99ef/75Z4SFhcHJyQkWFhY4f/48IiMjYWpq2inXt7KyQlRUFL7//nsIITBq1ChERETg4cOHnXJ9oo7Sv39/jBo1it8mJKJ20fpbhF9//TVmzZqF9PR0jB07ttljf/jhB3zzzTfYvXs3Tpw4gZEjRyIwMBABAQFwcXHppMTNq6mpwebNm7FkyRIMGjQIMTEx8Pb2ljsWUlJS8O677+LevXtYtmwZ5s2bB319nejvpIU+/PBDFBYWIiEhQe4o6o63CImaIrTYlStXRI8ePURMTEyr33v58mURExMjJk+eLAwMDISdnZ1YuHChSE1NFSqVqgPSPltKSopwcHAQNjY2Ii4uTtTW1sqSoylVVVUiJiZGdOvWTbi5uYnjx4/LHYnao1aIv/oIoddFiPeOPfbzKiEWDBTita/lCtbxkpOThbm5uaioqJA7iroDBwdH40NrpxhUKhVCQkIwbtw4LFy4sNXvHzhwIMLCwpCcnIybN29i2bJlKC4uho+PD2xtbREWFoa0tDQI0fETgBcvXkRQUBCmTZsGHx8fXLhwAQqFQu1miAwNDREWFoaioiJ4enpi4sSJCAoKwpUrV+SORu3QzQrYvAQo1KEvjU6YMAF6enpIS0uTOwoRaSj1+oSW0PLly3H+/Hn8/e9/b/eC7549eyIkJASJiYm4efMmli9fjuLiYvj6+mLQoEEIDQ1FYmIiampqJEpf59GjR4iMjISjoyPKy8uRn5+P2NhYWFpaSnodqfXo0QOxsbHIysrCrVu3MHLkSERGRqKiokLuaNQGtq8DM34EIr8GtHE9QWN/STIyMsKkSZO4DouI2kwrC1ZaWho+/fRTbNu2rdE9oO7du9fmc3fv3r2hbN26dQurVq1CeXk5Zs2ahb59+za8Vl1d3eZrqFQqKJVKDB06FDt37sTevXuRmJgIOzu7Np9TDmPHjsV3332HL7/8EkqlEsOHD4dSqZQ7FrWSnhXwSQRweBmQUS53Gmnt378fDg4OuH79+lOv+fn5sWARUdsJLXPv3j0xePBg8dFHHzV5zLhx48Tw4cNFRESEyM7OluS6paWlIiEhQQQHBwsLCwthbW0tgoODRUJCQqvWcWRkZAh3d3fRvXt3ERMTI6qrqyXJJ7eysjIRFRUlLCwsxIsvvihOnz4tdyR6ll/WYDn/jxA1FUIsdhTixS+EqNWCNVglJSVCoVAIQ0NDER4eLiorK586pri4WAAQly9fliGhxpB9nQsHh7oOrZvBevvtt9GrVy9ERkY2ecyBAwewZMkSnD17Fp6enhg0aFDDmiqVStWm65qZmSEgIABKpRK3b99GfHw8ACA4OBjdu3dveO3Ro0eNvv/q1asICQnB+PHj4eTkhIKCAoSFhaFLly5tyqNuTE1NER4ejvz8fPTv3x/Ozs4ICQnB7du35Y5GLWEMfLAcuBgNJGj4gxAOHz4MR0dHZGRkIDMzE1FRUTAyMnrqOFtbWwwdOhRJSUkypCQijSe0yPr160XXrl1FYWFhi9/z888/i/j4eOHv7y+MjY1Fz549G2aeqqqq2p2ppqZGpKamioULF4o+ffoIU1NT4e/vL+Lj48X9+/dFaWmpiIqKEl27dhW+vr7izJkz7b6mJsjKyhKenp7C2tpaREVFNTqDQDJ7fAbrl3/+fJIQjh8K8ZYGzmC1ZNbq1xYsWCB+97vfdUI6jSX7LAEHh7oOCC1x7tw5YWZmJrZt29bmczx+m69r166ie/fuDWVLigJQXV0tkpOTRWhoqOjTp48wMTERw4YNE8OGDRMJCQntPr+mqa2tFZs2bRJ9+/YVM2fOlKTQkoR+XbCEEGXHhRhpKUQPc80qWIcOHRIDBgwQo0ePFjk5OS16T0lJiZg+fboYPny4UCgU4urVqx2cUiPJ/iHGwaGuA0ILVFRUCCcnJzF37lzJzllWVtZQtiwtLYWVlZUIDg4Wu3btEo8ePWr3+WtqasRnn30munfvrvOzNzdu3BAARH5+vtxRdN4TW6s1UrCESohdrwqhr6cZBev+/futnrUSQoiDBw+K/v37i7Fjx4p169YJDw8PYWJiIhYsWCCuXLnSwak1iuwfYhwc6jogtMAf/vAHMXToUPHgwYMOOX95eblISEgQCoVC9O7dW5iZmTXc5mvPNRMTE0WfPn0kTKqZHj58yIKlBrZsEcLRUYi2/F9aHf+OcPjwYTFgwADh6OjY4lmr5gpZamqq8PHxEUZGRiI4OFhcunSpo6JrEtk/xDg41HVAaLj9+/cLIyMjkZmZ2SnXe3xNVd++fYWJiUlD2SopKWnVuViw6rBgyev6dSFeekkIMzMhoqJ+NYvVAnfuCDFokBA7dnREutZr66xVSwtZamqq8Pf3F4aGhiI4OLhVaz61kOwfYhwc6jogNNjVq1dFz549xerVq2W5fm1tbUPZ6tevnzAwMBBeXl4iJiZG3Lx585nvZ8Gqw4IlD5VKiLg4ISwthfDzE6I9uxHExwthaiqEQiGEnEvpDh8+LAYOHCgcHR1bvAVLWwvZr4tWQUFBe6JrKtk/xDg41HVAaKja2lrh4+Mj/Pz8ZHs24K/zZGdni2XLlolhw4Y9UbauX7/e6HtYsOqwYHW+4mIhfH2FsLKqK1lS/CuUkyPE4MFCjB8vxI0b7T9fa7Rn1qq1hezXjh8/Lvz9/YWBgYEIDAwUFy5caNN5NJTsH2IcHOo6NHYfrBUrVuDs2bPYsmVLux+FIwV9fX24uLggMjIShYWFyMjIwPjx4/HXv/4VAwYMwI4dO+SOSASVCtiwARg9GjA1Bc6eBRQKQIp/hZydgZMnAWNjwNUVSE9v/zlbIjk5GY6Ojjhx4gTS09Ob3NfqcQ8ePEBoaCj8/f0xa9YsZGdnw8XFpU3Xf+GFF5CYmIjs7GwAwKhRoxAUFIT8/Pw2nY+ItITQQJmZmcLY2FgkJSXJHaVFTp8+LW7duvXUzzmDVYczWJ3j7Fkh3N2F6N277pZeR6mpESI8XAhDw7o1XR2ltLRULFy4sGHWqqVPTEhKShIDBw4UDg4Okj3J4XGnTp0SgYGBwsDAQPj7+7d4gb2Gkn2WgINDXYfGzWCVlJTglVdeQVhYGKZMmSJ3nBYZPXo0evfuLXcM0mEqFRAYCAwdCpw/D4SEdNy1DAyAqChg927gs8+A4GCgrEzaa6SmpsLJyQlHjhzBiRMnEBUVBWNj42bfU1ZWhrCwMEyfPr3ds1bNcXJywq5du5CXlwdra2u4u7sjICAAOTk5kl+LiNSXxhWsd955Bz169MCf/vQnuaMQaQx9feD4cWDbNqBHj8655ksvAZmZQG4u4O0N/Otf7T9nWVkZIiIiMHnyZMycORPZ2dlwdXV95vvaUsjay9HREUqlsqFoeXh4ICAgACdPnuzQ6xKRetCogrVx40YkJiZix44dz1xjQURPsrbu/GuOGAFkZAC2toCbG9Cex/qlpaXByckJ+/btw/Hjx1s8a9WWQiYlBwcHKJVKnD59GtbW1vD09MSUKVOQlZXVqTmIqHNpTMG6ePEi3n//faxbtw7Dhw+XOw4RtZCFBfDPfwKLFgEBAUBkZN0ty5aqL0m+vr6YOXMmcnJyWlSS2lLIOpK9vT2USiUKCwthZ2cHLy8vTJkyBRkZGbJlIqKOoxEFq7KyEkFBQXj55ZcxZ84cueMQUSvp6QHh4cDevcC+faWYM+dNPHz48JnvO3r0KBwcHLB///5Wz1q1tpB1Fjs7O8TFxTUUrfHjx8Pb2xtHjhyROxoRSUgjCtaiRYvw8OFDfPHFF3JHIaJ2mDoV2LPnLi5d+h6urq44d+5cs8dfunQJr776aotv7aWlpWHMmDHYt28f0tLSZJ+1ao6trS3i4uJw8eJFuLi4YNq0afD29kZiYqLc0YhIAmpfsA4ePIi4uDhs374dlpaWcseRlKGhFezsnOSOQepIBaz1BfQNgfdTH/t5NbBwEDB775PHOXwC1ODJ4963A2Zs77TELTZw4EAcO3YM3t7e8PT0xD//+c8mj503bx4+/fTTVs1avfzyy8jJyYGbm5vU0TvE4MGDERsbi4KCAri4uCAoKIhFi0gLqHXBunbtGkJCQrBixQq4u7vLHUdy1dXeKC4+LHcMUmPdrIDNS4DCWrmTSMvExASbNm3CypUrMWfOHERERKC2tm1/SE2atWrOoEGDEBsbi8LCQri4uOCVV16Bl5cXixaRhlLbgqVSqfD73/8ezs7OeP/99+WOQyQL29eBGT8CkV8DQu4wHUChUODbb7+FUqnE5MmTcfv27Ra/t7y8XGNnrZozYMAAxMbGNtw6DAoKgqenJw4dOiR3NCJqBbUtWJ999hm+//57bNmyBfr6ahuTqEPpWQGfRACHlwEZ5XKn6RheXl44deoU9PT04Orq2qLtC44fPw4nJyckJiYiNTVVY2etmtOvXz98/vnnKC4uhoeHB5YuXcrH7xBpELVsLidPnsTy5cuxadMm2NjYPPV6fn4+p81JZwyeC8w3AJZsApra3aBgNdC3J9CzftgA63/s1Jjt0rt3byQlJeG1117DxIkTsWnTpkaPq5+18vHxwcsvv4zc3FyMGzeuk9N2LhsbG6xZswa3bt1iwSLSIF3kDtCYefPm4Z133oG/v3+jr584cQLvvfcecnNzMXTo0E5OR9TJjIEPlgNOC4CEmY0fMmQecHgRYFD/gxpg+USg5Tfc5NelSxdERUXB0dERCoUCWVlZ+OKLLxo2FT5x4gTmzp0LlUqFlJQUjB8/XubEncva2hr37t2TOwYRtZBazmA9y5tvvgl/f3+8+uqrqKqqkjsOUYfr/lvgv4cCS1cDFY283sUS6Ncf6F8/+gFdDRo5UAPMnj0bx48fR0pKCry8vFBYWIiIiAhMnDgRkyZNwunTp3WuXAF1BaukpETuGETUQmpZsP72t79h7dq12LdvX5PHrFu3Dj///DM+/vjjTkxGJBN9YN4KoGYDsOeu3GE63pgxY5CVlQVra2tMnjwZX3/9NVJTUxEXFwczMzO543WI8vJypKamQtXENvecwSLSLGpZsNzc3LB06VK8+eabuHHjRqPHdOvWDf/4xz8QGxuL/fv3d3JCos5n6gn8cRpwr0zuJJ2jR48e2LdvH65evYqtW7fCw8ND7kgd6saNG5gwYQLu37/f6OssWESaRS0LFgB89NFHcHR0xOuvv97k3+jGjRuHTz75BG+88QZu3rzZyQmJOpA+8M63QM6Sx9ZV6QGBXwK1KmD7jCePO/unXy2oNARWFwN7Z3dmaOkZGRnBwsICpaWlckfpcNa/PI27qduAVlZWLFhEGkRtC5a+vj7i4+ORm5uL1atXN3nc4sWLG4qYENq4UxCRbtOVYtGtWzfo6+s3+WflGiwizaK2BQuo2wdGqVRiyZIlyMzMbPSY+iKWk5PTbBEjIs2kK7fG9PX1YWlp2WzB0oXfA5G2UOuCBQBTp05FaGgoZs+ejQcPHjR6TL9+/RAfH4/Fixc3WcSISDPp0sxNc7N1LFhEmkXtCxYA/PnPf4aFhQUWLFjQ5DHTpk2DQqFotogRkebRpWLR3J9VV26VEmkLjShYxsbG2LVrF/bs2YNt27Y1edzKlSthYWGBhQsXdmK6tuvVC5g0Se4UROpN1wpWU7N1ujSTR6QNNKJgAcCwYcOwevVqvP322ygsLGz0GGNjY2zfvh27d+9utoipC3d34B//kDsFaYK9e4HvvpM7hTysrKx0plg0Vyatra1RVVWlE9+oJNIGGlOwAOCtt95CQEAAXnvttSZ3cLe3t8eaNWuaLWJEmmbzZuDoUblTyEPXZrCaK1hA09s4EJF60aiCBQBr167F3bt38cknnzR5jEKhgL+/P2bPns1H6ZBWKCsDtHQD82fSpYL1rEXuAHTmd0Gk6TSuYFlZWWHnzp2IjY1FcnJyk8etW7cOd+7cwdKlSzsxHVHHKC1lwdIFza2zMjIygpmZmc78Log0ncYVLKBuB/clS5Zgzpw5Te7gXl/EYmJimi1iRJpAl2ewdOnbc88qk7r0uyDSdBpZsABgyZIlcHBwaHYH93HjxmHx4sXNFrFOpQLW+gL6hsD7qY/9vBpYOAiYvVeuYKTudLlg6dK3555VsHTpd0Gk6TS2YOnr60OpVCInJwcxMTFNHvfxxx/DwcEBc+fOVZtH6XSzAjYvAQpr5U5CmkLXC1ZVVRXKyrT/KdfPmqHSpdulRJpOYwsW8O8d3CMiIpCVldXoMfVF7OTJk80Wsc5k+zow40cg8mtAPSofqTtdL1iAbizurp+hauovgyxYRJpDowsWULeD+1tvvYXZs2fj4cOHjR7z+KN0cnNzOznh0/SsgE8igMPLgIxyudOQJmDB0p2CVVNTg0ePHjX5ui78Hoi0gcYXLABYtWoVzM3NERYW1uQx06dPxxtvvIFXXnmlySLWmQbPBeYbAEs2ASq5w8isqQ8TqlNTA1RV6W7B0qVvzz1rrytd2nSVSNNpRcEyNjbGjh07sHPnTmzfvr3J41pSxDqNMfDBcuBiNJBwR+4w8lCpVNi8eTPGjh2LmTNnYsiQIXJHUkv1S4/MzeXNISdd+faclZUV9PT0+MBnIi2gFQULqNvBffXq1Zg/fz4uXrzY6DEmJiYtKmJSqqwEfvih8de6/xb476HA0tVARaekUR8nT56Et7c3PvzwQ7z77rvYsWMHDA0N5Y6lluoLlq7OYAG68+05Q0NDmJubs2ARaQGtKVgAEBoa+swd3O3t7bFq1apmi1h7lZcDe/YAc+YAffoAb73VxIH6wLwVQM0GYM/dDomidq5du4aQkBB4enpi6NChKCgoQHh4OIyMjOSOprZYsHSrWDxrN3dd+T0QaTqtKlhA3Q7uP/30EyIjI5s8Zv78+Zg+fbqkj9IpKwMSE4GQkH+XKn19YOtWYN++pt9n6gn8cRpw71ffQK+pAZTKuv/VBuXl5YiOjsbIkSNx9epV5ObmQqlUolevXnJHU3ssWLpVLJr7s+rKrVIibaB1BcvKygpbt27FypUrkZKS0uRx69atw4QJE1Bb2/bNqEpKgN2760pV797AG2/U/Xz7duDmzbqCFBAAGBv/8gZ94J1vgZwlgEH9SfSAwC+BWhWwfca/z11YCCxaBIwdC3z7bZsjyk4IYOdOYMKEydi4cSOUSiWOHDmC0aNHyx1NY1RVXcaLLy6BLk/y6VKxaO52qK7cKiXSBlpXsADA29u7YQf3W7duNXqMtbU1Vq5cCVNT01ad++ef/12c+vQBPvgAMDUFvvwSuHHj36+1dzmRvT1QXAzMnAlMn153zqKi9p2zs+XmAhMn1s3mzZ27AefOncOMGTPkjqVxHjz4F3Jy/iJ3DFnpUrFobgbL2toaZWVlqKys7ORURNRaWlmwAGDp0qWwt7eXZAf369evY8uWDPj41M1ULV9eV4BSU4HLl4G4uLoC1KWLROF/YW4OREYCZ8/WlTh7eyAsDHjwQNrrSO3u3bqc7u7A4MF1s3HvvDMKxg1TedQaZWVlMNPl+4PgLcLHXwOa3saBiNSH1has+h3cs7Ky8Pnnn7f6/ZcvX8bq1avh5eWFAQMGIC7uf+DlBWRnA5cuAdHRwLhxgJ5eB4T/laFDgV27gAMHgCNHgOefBzZsANpxd7NDVFcDsbHAkCFAejpw7FjdjF7fvnIn02wsWLpVsJq7HWplZQVANzZdJdJ0WluwAKB///7YuHEjwsPDkZeX98zjf/jhB8TGxsLb2xu2trbYtGkTpkyZgqysLKSn78Of/gSMGdPxuZvi6wvk5dXNai1ZUjdDlJYmX57HpaTU/W7+93+Bzz8HMjMBT0+5U2kHFiyuwapnbm4OY2NjnfldEGkyrS5YAPCf//mfmDt3LoKCghrdwf3cuXOIjo5uKFUbNmzA5MmTkZ+fj3PnziEyMhIuLi4yJG9cly6AQgEUFABeXoCPDxAUVHerUi4//QTMmlW3XqywsG7Rf2fM7OkKFizdmsF61p+1W7duOvO7INJkWl+wAGDNmjUwMzPDe++9BwANxcne3h6jR4/G7t27MXnyZFy8eLHhtREjRsicunndu9fdjjtzBnj4sG59VmRk3R5cna1Xr7qCt3y5bu823lFKS0tZsLjI/YnXdeV3QaTJJF6WrZ5MTEywfft2uLu749ChQ7h58yYmTZqEBQsWYMaMGbCxsZE7Yps9/zxw8GDdHlzvvgts2gSsWAEEB3fuLJKOf/53KM5gPfntOW3/ssSzbofq0mwekSbTiYIFAA4ODigqKkJSUhKmTZuGnj17yh1JUgEBgJ8fsG4dsGABsHkzEBMj75oxkkZ5eTkL1mPfnuvTp4/MaTpWS2awWLCI1J9O3CKs17dvX4SEhGhduapnZFS3PcKFC8CIEYCbW916qCa2AiMNwRks3fr23KhRo/DVV181ub2MLi34J9JkOlWwdIWNTd3eXGlpdYvOn38eWLPml20dVMBaX0DfEHg/9bE3VQMLBwGz9/7yz78c5/AJ8MTTeqqB9+2AGZ3zrGwCCxagW9+es7KywvTp06HXxD1+rsEi0gwsWFrM3b1uP6rPPwe+++7JNVndrIDNS4BCNdtLi57GglWnuW/P3b9/H3/7298ke7aoOuMtQiLNwIKl5fT06ha8791b9/DperavAzN+BCK/Btq3zz11NBasOs3N3OTn5+Pjjz/G8OHDsW7dOq19lMydO3eQnp4OBwcHuaMQ0TOwYOkoPSvgkwjg8DIgQ4atHajlysrKtP6bcy3R3MyNh4cHLl++jMWLF2PFihUYNGgQoqOjUS7HviUdZM+ePXBwcEB5eTlef/11ueMQ0TOwYOmwwXOB+QbAkk2AqoljClYDfXsCPeuHDbD+x06NqfP8/f2xdu1aJCYmyh1FVs+6NWZsbAyFQoHi4mIsX74cf/nLXzB48GBER0ejrKysE5NKq6SkBKGhoZg9ezbee+89pKamws7OTu5YRPQMLFi6zBj4YDlwMRpIuNP4IUPmATmngFP1IxuY068TMxJCQ0MRFhaGoKAgBAQE4MqVK3JHkkVLvz1nZGQEhUKBoqIi/PnPf8bGjRsxePBgREZG4oG6Pyn9V/bv349Ro0bhzJkzyMvLQ3h4OAwMDOSORUQtwIKl47r/FvjvocDS1UBFI693sQT69Qf6149+QFf+971TGRoaIjw8HGfPnkVlZSUcHR0RGxuLWnV72ncHqqysxE8//YTvv/8e9+/fb9F7jIyMEBISgvz8fKxcuRI7duzAkCFDEBkZ2eJzyKV+1up3v/sdFi5ciLS0NDz//PNyxyKiVmDB0nX6wLwVQM0GYM9ducNQc4YMGYKkpCRs3LgRK1aswIQJE3D27Fm5Y3Wo2tpabNmyBSNGjMC5c+dQXFwMW1tb/PGPf2zxVgWGhoYICQnBuXPnsGrVKnz55ZcNRUsdtzs4cOAAHBwccPr0aeTm5nLWikhDsWARTD2BP04D7mnuMhWdEhgYiAsXLsDBwQHOzs4ICwtDaWmp3LEkl5KSAmdnZ3zwwQd4++23UVRUhMLCQsTHx+Obb77BgAEDEBYWhlst3Em3vmjl5+dj3bp12LlzJwYNGoSIiAi12PagftZq5syZWLBgAY4fP46RI0fKHYuI2koQkcY6evSoGDFihBgyZIhISkqSO44kkpOThaurqzA3Nxfh4eHi3r17Tx1TW1srEhIShIuLi+jatatYuHChuHHjRquuU1tbK3bt2iVGjhwpLCwsRHh4uLh7965Ef4rW2b9/v+jXr59wd3cX58+flyVDG4GDg6PxAUFEGq2srEwsW7ZMGBkZicDAQHH79m25I7VJRkaG8PHxEUZGRkKhULSoMKlUKpGQkNBQyBYuXCiuX7/equvWlzVnZ+c2l7W2unfvnlAoFMLExERERUWJmpqaTrmuhGT/EOPgUNcBQURa4cyZM8LDw0NYW1uLuLg4oVKp5I7UIufOnROBgYHCwMBABAYGiqKiojadJzk5WYwbN04YGxsLhUIhrl692qr3S1HWWqN+1srJyUnk5eV12HU6mOwfYhwc6jogiEhrqFQqERcXJywtLcXEiRPFhQsX5I7UpB9++EEoFArRpUsX4e/vL86cOSPJeZOTk4WHh0fDTNiVK1fadA43N7eGonXt2jVJsgnx71krQ0NDER4eLqqqqiQ7twxk/xDj4FDXwUXuRFpET08PCoUCFy5cQK9evTB27FhERkaq1TP6fvrpJ0RERGDEiBEoLi5GRkYGEhMT4ejoKMn5J0+ejPT0dHz77be4ePEihgwZgpCQEBQVFbXqHFlZWdi7dy8yMzNhZ2eH0NBQXLt2rV3ZDh48CEdHR2RmZiIrKwtRUVEwNDRs1zmJSE0JItJaCQkJYsCAAcLR0VGkp6fLmuXu3bsiPDxcmJmZCQ8PD3HkyJFOuW5qaqrw9fUVhoaGIjg4WFy8eLHV5zhw4IDw9PQUJiYm4r/+679aPaNVUlKiTbNWj5N9loCDQ10HZ7CItFhAQADy8/Mxbdo0jB8/HqGhoZ2+m3lpaSmio6MxZMgQ7Nu3D1u2bEF6ejpefPHFTrm+t7c3UlJScOTIEdy7dw/29vYICQlBYWFhi88xdepUnDhxAgkJCTh9+jR+/LHlz4s6dOgQHBwcOGtFpGsEEemEnJwc4eLiImxsbER8fHyHX6+yslLExcWJvn37isGDB4u4uDi1+Jbc8ePHhb+/f8Oi+o5ap6bFs1aPk32WgINDXQcEEemM6upqERMTI8zNzYW/v7+4fPmy5Neo31/Kzs5O9OvXT8TExIiKigrJr9NeJ06ceKJo5efnS3bugwcPiv79+4vRo0eL3Nxcyc6rhmT/EOPgUNfBW4REOqRLly4ICwvDmTNnUFVVBXt7e0RHR0v2XMOUlBSMHTsW8+fPh0KhwMWLFxEWFgZjY2NJzi8lT09PJCYmIicnBwDg4OCAgIAA5OXltfmc9+/fR2hoKH77299i9uzZOHnyJMaOHStVZCLSICxYRDrIzs4Ohw8fxt///nesWrUK48aNaygabZGSkgI3NzfMmDEDU6dORVFREcLDw2Fqaiph6o7h5OSEXbt2ITc3F6ampnBzc0NAQAByc3NbdZ7Dhw/D0dERGRkZyMzMRFRUFIyMjDooNRGpOxYsIh0WGBiIgoICuLq6wtPTE2FhYXj06FGL35+ZmQlfX19Mnz4dzs7OuHTpEqKiomBlZdVxoTvI6NGjsWvXLpw6dQrW1tZwd3dHQEAAsrOzm31f/axVQEAAXnvtNc5aEREAFiwinWdtbY24uDikpKQgKSkJTk5OOHz4cLPvOX/+PIKCguDl5YUePXrgwoULiIuLQ9++fTspdcdxcHCAUqlsKFoeHh6YMmUKsrKynjr28VmrjIwMzloRUQMWLCICAEyYMAF5eXkIDg7GSy+9hKCgINy+ffuJYy5fvozQ0FA4OTkBqCtau3btgq2trRyRO9SoUaOgVCpRUFAAOzs7eHl5YcqUKcjIyMCDBw+emrVydnaWOzIRqRE9IYSQOwQRqZfCwkLMnz8fp06dQlRUFGbMmIHVq1cjJiYG48ePR1RUFFxcXOSO2akKCwuxYsUK7NixA0OHDoWJiQm2bNnSUDZ1lJ7cAYjUFQsWETVKpVJh/fr1WLx4MUxMTDBs2DB8+umnGD9+vNzRZHXp0iXcv38fo0eP5oahLFhETWLBIqJm3b59GxUVFRg4cKDcUUj9sGARNYEFi4iI2ooFi6gJXOROREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYixYRERERBJjwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJPb/CH5uCVU2JPsAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<PIL.Image.Image image mode=RGBA size=600x200 at 0x7FF8471459B0>" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"MolsToGridImage(mols)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1\n" | |
] | |
} | |
], | |
"source": [ | |
"pattern = mols[0] # MolFromSmiles\n", | |
"matches = [smiles_list[i] for i, m in enumerate(mols) if m.HasSubstructMatch(pattern)]\n", | |
"print(len(matches)) # result: 1, why not 2?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mols = [Chem.MolFromSmiles(x, params) for x in smiles_list]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"If you remove the aromatization step from sanitization, single and double bonds will be maintained as in the input SMILES and no atom/bond will be marked as aromatic, so you will get 2 matches:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"C1=CN2NCCCC2=C1\n", | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 N False\n", | |
"5 N False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 C False\n", | |
"0 1 DOUBLE False\n", | |
"1 2 SINGLE False\n", | |
"2 3 DOUBLE False\n", | |
"3 4 SINGLE False\n", | |
"4 5 SINGLE False\n", | |
"5 6 SINGLE False\n", | |
"6 7 SINGLE False\n", | |
"7 8 SINGLE False\n", | |
"4 0 SINGLE False\n", | |
"8 0 SINGLE False\n", | |
"\n", | |
"C1=CC2=C3C(=C1)CCNN3C=C2\n", | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 C False\n", | |
"5 C False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 N False\n", | |
"9 N False\n", | |
"10 C False\n", | |
"11 C False\n", | |
"0 1 DOUBLE False\n", | |
"1 2 SINGLE False\n", | |
"2 3 DOUBLE False\n", | |
"3 4 SINGLE False\n", | |
"4 5 SINGLE False\n", | |
"5 6 DOUBLE False\n", | |
"4 7 DOUBLE False\n", | |
"7 8 SINGLE False\n", | |
"8 9 SINGLE False\n", | |
"9 10 SINGLE False\n", | |
"10 11 SINGLE False\n", | |
"7 0 SINGLE False\n", | |
"11 0 SINGLE False\n", | |
"8 6 SINGLE False\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in mols:\n", | |
" Chem.SanitizeMol(m, Chem.SANITIZE_ALL ^ Chem.SANITIZE_SETAROMATICITY)\n", | |
" print(Chem.MolToSmiles(m))\n", | |
" for a in m.GetAtoms():\n", | |
" print(a.GetIdx(), a.GetSymbol(), a.GetIsAromatic())\n", | |
" for b in m.GetBonds():\n", | |
" print(b.GetBeginAtomIdx(), b.GetEndAtomIdx(), b.GetBondType(), b.GetIsAromatic())\n", | |
" print()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<PIL.Image.Image image mode=RGBA size=600x200 at 0x7FF8471453C8>" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"MolsToGridImage(mols)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2\n" | |
] | |
} | |
], | |
"source": [ | |
"pattern = mols[0] # MolFromSmiles\n", | |
"matches = [smiles_list[i] for i, m in enumerate(mols) if m.HasSubstructMatch(pattern)]\n", | |
"print(len(matches)) # result: 1, why not 2?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"However, if you change the location of double bonds writing a different Kekule structure for the benzene ring, again the match will fail as single and double bonds will not match anymore across the structures:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"smiles_strings = '''\n", | |
"C12=CC=CN1NCCC2\n", | |
"C12C=CC=C(C=C3)C=1N3NCC2\n", | |
"'''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['C12=CC=CN1NCCC2', 'C12C=CC=C(C=C3)C=1N3NCC2']\n" | |
] | |
} | |
], | |
"source": [ | |
"smiles_list = smiles_strings.splitlines()[1:]\n", | |
"print(smiles_list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"params = Chem.SmilesParserParams()\n", | |
"params.sanitize = False" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mols = [Chem.MolFromSmiles(x, params) for x in smiles_list]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"C1=CN2NCCCC2=C1\n", | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 N False\n", | |
"5 N False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 C False\n", | |
"0 1 DOUBLE False\n", | |
"1 2 SINGLE False\n", | |
"2 3 DOUBLE False\n", | |
"3 4 SINGLE False\n", | |
"4 5 SINGLE False\n", | |
"5 6 SINGLE False\n", | |
"6 7 SINGLE False\n", | |
"7 8 SINGLE False\n", | |
"4 0 SINGLE False\n", | |
"8 0 SINGLE False\n", | |
"\n", | |
"C1=CC2=C3C(=C1)C=CN3NCC2\n", | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 C False\n", | |
"5 C False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 N False\n", | |
"9 N False\n", | |
"10 C False\n", | |
"11 C False\n", | |
"0 1 SINGLE False\n", | |
"1 2 DOUBLE False\n", | |
"2 3 SINGLE False\n", | |
"3 4 DOUBLE False\n", | |
"4 5 SINGLE False\n", | |
"5 6 DOUBLE False\n", | |
"4 7 SINGLE False\n", | |
"7 8 SINGLE False\n", | |
"8 9 SINGLE False\n", | |
"9 10 SINGLE False\n", | |
"10 11 SINGLE False\n", | |
"7 0 DOUBLE False\n", | |
"11 0 SINGLE False\n", | |
"8 6 SINGLE False\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in mols:\n", | |
" Chem.SanitizeMol(m, Chem.SANITIZE_ALL ^ Chem.SANITIZE_SETAROMATICITY)\n", | |
" print(Chem.MolToSmiles(m))\n", | |
" for a in m.GetAtoms():\n", | |
" print(a.GetIdx(), a.GetSymbol(), a.GetIsAromatic())\n", | |
" for b in m.GetBonds():\n", | |
" print(b.GetBeginAtomIdx(), b.GetEndAtomIdx(), b.GetBondType(), b.GetIsAromatic())\n", | |
" print()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<PIL.Image.Image image mode=RGBA size=600x200 at 0x7FF82A805160>" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"MolsToGridImage(mols)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1\n" | |
] | |
} | |
], | |
"source": [ | |
"pattern = mols[0] # MolFromSmiles\n", | |
"matches = [smiles_list[i] for i, m in enumerate(mols) if m.HasSubstructMatch(pattern)]\n", | |
"print(len(matches)) # result: 1, why not 2?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"A possible solution is to mark the bonds in your pattern as generic, so they will match both single and double:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"query_params = Chem.AdjustQueryParameters()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"query_params.makeBondsGeneric = True\n", | |
"query_params.aromatizeIfPossible = False\n", | |
"query_params.adjustDegree = False\n", | |
"query_params.adjustHeavyDegree = False" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pattern_generic_bonds = Chem.AdjustQueryProperties(pattern, query_params)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAYAAABNcIgQAAAPd0lEQVR4nO3dfWxUZd7G8WumM32xrRSmiCzItggr1G6fJrLiuiomKGSBRVZFTRDXxvoWI5g2bNySgstGjBKU7UaXF1cFFJGiqwZ8aCwFxSgvLRqgi0KKtLUCMgVrS20705nnj1lYeFawrTNzn5nz/fzTMp05cxFCr7l/c+45jmAwGBQAADblNB0AAACTKEIAgK1RhAAAW6MIAQC2RhECAGyNIgQA2BpFCACwNYoQAGBrFCEAwNYoQgCArVGEAABbowgBALZGEQIAbI0iBADYGkUIALA1ihAAYGsUIQDA1ihCAICtUYQAAFujCAEAtkYRAgBsjSIEANgaRQgAsDWKEABgaxQhAMDWKEIAgK1RhAAAW6MIAQC2RhECAGyNIgQA2BpFCACwNYoQAGBrFCEAwNYoQgCArVGEAABbowgBmykvL1dlZaW6urpMRwEsgSIEbOL48eOSpIyMDH377bcqKSnR4sWLDacCzHMEg8Gg6RAAIuvLL7/UuHHjVFdXJ7fbLUlqampScnKyPB6Pli9frrvuuksXX3yx4aRA9LEiBOLYU089pcOHDys7O1s1NTVnSlCShgwZIo/Ho1OnTun9999XYmKigsGgfD6fwcRA9FGEQJwJBoPyer2SpIEDB57z/Q9JTU1VeXm5kpOTtXbtWt17773RigpYAqNRIM68/fbbWrp0qTZt2tTrx7a1tembb77R8OHDtX79eo0ZM0ZZWVnhDwlYCCtCIA74fD4VFxers7NTU6dO1auvvtqn46SlpWn48OGSpE2bNun062TOMEU8owgR144dOyZJ6uzsNJwkMjo6OvTdd9/J7XarX79+am5ultPpVGZm5k8+9osvvqjs7GzV1tZqzJgxYniEeMVoFHGrq6tLubm5Ki8v19SpU/Xxxx9ryJAhOnHihAYMGGA6XljMmzdPfr9fCxcujNhzdHd36/PPP9eVV16pjz76SC6XS9dcc03Eng+INooQca2trU1paWnat2+fcnNz5fV6dcUVV+jo0aNyOp1qamrSsGHDTMfslSNHjmjZsmV64okn1NraKpfLpZSUlKg89+LFizVq1ChNnjxZXV1dSkxMjMrzApHEaBRxqbS0VFu3blVaWpokKTc3V5KUmZmpvXv3yu12q7q6WtOmTZMknTx5UnV1dabi9khzc7MCgYA8Ho9aW1vV1dWl9PT0qJWgJBUXF2vy5Mlqa2vT6NGjz2zSB2IZK0LEpa1btyojI0P5+fkXvF9ra6vS09P1xhtvaOPGjVq1apXq6uoUCAQ0cuTI6ITtoSlTpqigoEC33Xab6SiSpD179igvL0/19fXasWOH7rjjDtORgD6hCBFXmpqa5HQ6NXjw4F4/1u/3y+VyadGiRZKkOXPmaPv27brkkkvOnEkZbbt371Z1dbUeeOABeb1eeTweORwOI1nOZ8OGDTpw4ICKiooYlyImMRpFXKmqqlJJSUmfHutyuSSFCnDOnDmSpHXr1umLL76QJL355ptqamoKT9AfcXrkOHDgwDPfZ2ZmWq4EpdBKtaioSJJ0880368MPPzScCOgdVoSIO8FgMCKFMXPmTC1YsEDZ2dl67rnnVFBQoIyMjLA/j9/v14gRI1RZWakRI0aE/fiRtG/fPuXk5Mjn82nFihV65JFHLFnewNlYESIufPbZZ5owYYL8fn/EfvGuXr1a2dnZ8vv92rVrl1JTU+X3+1VUVKRAIPCTj//WW2+poqJCLpdLu3btirkSlEInJTmdTh06dEj79++Xw+FgMz4sjyJEXMjLy1NJScmZ8WYkuVwurVmzRm63WydOnFBSUpKcTqcaGhrOjFR74/9fHkk6/+eCxorRo0fr+eeflxQaNZeVlRlOBJwfo1HEvNdff13Tp0+PSgleyMGDB/XBBx+osLBQVVVV+uSTTzR37twLPqahoUHXXnut6urqlJSUFKWk0dXY2Ki0tDT1799fS5cu1YwZM5Senm46FnAGK0LEtPb29qiexHIhI0eOVGFhoSTJ4/Ho6quvliS98MILWr58+Tn3feaZZ1RXV6dhw4bp008/jdsSlKTLLrtM/fv3V2trq6qqqpSYmKhAICC/3286GiCJFSFiWGdnZ0wUyM6dO3XRRRcpNzdXDz/8sCZNmiSv16ucnByNHTvWdDwjVq1apaqqKr3yyiumowAUIWLXpEmTVFhYqFtvvdV0lB6rrKyU2+3WuHHjTEcxqrW1Vc3NzVziCZbAaBQxa+XKlZowYYLpGL2SnJwc0Q/IjhUJCQm68cYbLf+xdrAHVoSIORs3bpTL5dLEiRNNR0EfBYNB1dfXa8iQIXK73abjwOZYESLmpKen66uvvjIdo09aWlq0Z88e0zGMczgcysrKogRhCRQhYkZ3d7eOHTumG264Qffdd5/pOH2yd+/ePu01jDft7e3KyspiNApLYDSKmLFt2zbNnj1bNTU1fGxXjGM0CiuhCBFTTp06pdTUVNMx+qylpUX19fXKy8szHQXAvzEaheW1tLTo7rvvltfrjekSlBiNnsZoFFbCihCWFwgE9NJLL+mee+7hWndxgtEorIQihKVVV1crPz/f+OeIhgujUcB6GI3C0hYuXKhNmzaZjhE2jEZDGI3CSlgRwtIidZFdmMVoFFbCihCWVFpaqrKysrgrQTbUh7ChHlZCEcKSZs2apZtuusl0jLBjNBrCaBRWwmgUltLU1KTa2tqY+zDtnvJ6pdpayeYXn2A0CkthRQhLOXLkiNauXWs6RsTU1Ul//7vpFOYxGoWVsCKEZXR0dCg5Odl0DERBe3u7cnJytHnzZl1++eWm48DmWBHCEo4cOaJRo0bp5MmTpqNElNcrffCB6RTmpaSkaOvWrRo2bJjpKABFCGsIBAKqrKxU//79TUeJKEajIYxGYSUUISzh9ttvV2Njo+kYETd2rBTHb4H2GGeNwkp4jxCW0NnZqaSkJNMxIo6zRkM4axRWwooQlpCUlKQTJ05o2rRp8nq9puNEDKPREEajsBKKEJaRkZGhO++8Ux6Px3SUiGE0GsJoFFbCaBSW4/P5tH37dl1//fWmo4Qdo9EQRqOwElaEsJxt27ZpyZIlisfXaIxGQxiNwkpYEcLSuPpEfGJDPayEFSEsq6ysTMXFxaZjhBUb6kPYUA8rYUUIyzp06JAyMjI0YMAA01HCZscO6bnnOGEGsBKKEJa3YcMGjRo1SiNGjDAdBWHCaBRWwmgUlrd582a1tLSYjoEwYjQKK2FFiJjR1tamtLQ00zEAxBlWhIgJra2tysnJscXnkdoBG+phJawIETOOHTumQYMGmY6BMGBDPayEFSFixqBBg9TY2KjHH388djbbB6QXxktOt1S07azbfdKsn0sz3jYVzCw21MNKKELEFIfDocGDB5uO0Wv9MqSX5koHuk0nsQZGo7ASihAxZejQoZo9e7b8fr8aGhpMx+mx7HulaQ3SE/+UYmQtG1GcNQoroQgRk5YsWaJFixaZjtFjjgyp9HGpYr60/XvTacxjNAoroQgRkx577DGVlZWZjtErWQXSQwnS3H9IAdNhDGtubtbQoUMZjcISKELEJLfbLYfDodLSUq1bt850nJ5JkooXSAeflt6N32sPX1AgEFB3d7fS09M1Y8YMRqOwBIoQMe26667T+PHjTcc4R3e3dPjwD/9swFTpjyOkec9KHVFNZQ2zZ8/WihUrlJiYqKeffprRKCyBIkRMmzhxojwej7Zs2SKfz2c0yzvvSN9/LzkcUmnpee7klAqflPzLpbeaoxrPmKNHj55ZtRcXF2v69OmGEwHnoggR8zo6OjR//nwjZ5Fu3Pif1d+aNdLnn0tOp7Rq1fkfk/Jr6c+TpJPt597u94ceFytbJH/M6RcmwWBQ69evVzAYVFZWljwej+FkwLn4ZBnElUAgIKczsq/vNm+WUlKka6+V/vIXacwY6be//enH/de/pCeflF57TQoEQoUaq3bvlh566Dr97W+LNXbsWNNxgAuK4f9qwLn279+vq666Sl1dXWE/9vbtoRWfJDU3S6dPdiwtDU8JSlJOTqgEJamkRHr++fAcN5oOHgx9HT5cKixcpvz8fKN5gJ5gRYi40d3drdraWuXl5YXleKtW7VR9/dUqLZVqaqSPP5YefTQsh/5RBw5I/fpJgwaFLuL7u99JqanRee6+CgalX/4y9IIhTP8EQFSwIkTcSEhIUF5enhoaGlReXt6nY7z88ss6dOiQJKm6+n/lcoVuv+qq6JWgJP3iF6ESPHVKevnl0G3BYGhkajXl5aH3Nh0OaedOShCxhyJE3Nm9e7e+/vrrHt+/vLxcW7ZskRTa6H36sWVl8/WnP0UkYo+lpkoVFaGv5eVSYaHZPGc7PQa94gopPT30/UUXmcsD9BWjUcQtv98vn8+nlJSU//pZRUWFGhsbVVhYqHfeeUcJCQmaMmWKgZQ95/VKx49Lo0dL770n5edLP/uZmSzHj4dWyfv3W39kC/wYVoSIW/fff79WrFhx5s81NTWaM2eOJOnSSy9V6r9/g99yyy2WL0FJyswMlaAkrVwptbWFvjcxLh04MLRVhBJEPGBFiLh1/PhxdXR06MEHH9R7772n5uZmvfvuuyooKDAdLWz275f+8Adpx47Qe3QAeo8iRFwLBoNavXq1Zs6cKUccNkVHh7R3r/SrX0m7dklJSZysAvQWRQjEiQULQtsXfv/72N+QD0QT/1WAODFvXqgE29pCq0Lv6StcBKQXxktOt1S07awH+KRZP5dmvH3u/XJLJb/OvV/RcGnaa9H4WwDRRxECcSY1VVq2LHRyzdn6ZUgvzZUOdBuJBVgWRQjEGYdD+s1v/vv27HulaQ3SE/+UeD8E+A+KELAJR4ZU+rhUMV/a/r3pNIB1UISAjWQVSA8lSHP/IZ1v++EXz0qXZoZGq5mZUuZgaWn0r3AFRI3LdAAAUZQkFS+Q/udR6d3bfvgulxdKFXOkhNM3+KUF46RvopURiDKKELCZAVOlP/5VmvesdM0P/Nx1sTRk6Fm/HHxSWgJFiPjFaBSwG6dU+KTkXy691Ww6DGAeRQjYUMqvpT9Pkk62m04CmMcnywAAbI0VIQDA1ihCAICtUYQAAFujCAEAtkYRAgBsjSIEANgaRQgAsDWKEABgaxQhAMDWKEIAgK1RhAAAW6MIAQC2RhECAGyNIgQA2BpFCACwNYoQAGBrFCEAwNYoQgCArVGEAABbowgBALZGEQIAbI0iBADYGkUIALA1ihAAYGsUIQDA1ihCAICtUYQAAFujCAEAtkYRAgBsjSIEANgaRQgAsDWKEABga/8HQdsn5M7XcR0AAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<rdkit.Chem.rdchem.Mol at 0x7ff82a7f9a30>" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pattern_generic_bonds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2\n" | |
] | |
} | |
], | |
"source": [ | |
"matches = [smiles_list[i] for i, m in enumerate(mols) if m.HasSubstructMatch(pattern_generic_bonds)]\n", | |
"print(len(matches)) # result: 1, why not 2?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 N False\n", | |
"5 N False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 C False\n", | |
"0 1 UNSPECIFIED False\n", | |
"1 2 UNSPECIFIED False\n", | |
"2 3 UNSPECIFIED False\n", | |
"3 4 UNSPECIFIED False\n", | |
"4 5 UNSPECIFIED False\n", | |
"5 6 UNSPECIFIED False\n", | |
"6 7 UNSPECIFIED False\n", | |
"7 8 UNSPECIFIED False\n", | |
"4 0 UNSPECIFIED False\n", | |
"8 0 UNSPECIFIED False\n" | |
] | |
} | |
], | |
"source": [ | |
"for a in pattern_generic_bonds.GetAtoms():\n", | |
" print(a.GetIdx(), a.GetSymbol(), a.GetIsAromatic())\n", | |
"for b in pattern_generic_bonds.GetBonds():\n", | |
" print(b.GetBeginAtomIdx(), b.GetEndAtomIdx(), b.GetBondType(), b.GetIsAromatic())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment