Created
May 19, 2020 14:39
-
-
Save ptosco/e410e45278b94e8f047ff224193d7788 to your computer and use it in GitHub Desktop.
SubstructMatchAromaticity
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from rdkit import Chem\n", | |
"from rdkit.Chem.Draw import MolsToGridImage" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"smiles_strings = '''\n", | |
"C12=CC=CN1NCCC2\n", | |
"C12=CC=CC(C=C3)=C1N3NCC2\n", | |
"'''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['C12=CC=CN1NCCC2', 'C12=CC=CC(C=C3)=C1N3NCC2']\n" | |
] | |
} | |
], | |
"source": [ | |
"smiles_list = smiles_strings.splitlines()[1:]\n", | |
"print(smiles_list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"params = Chem.SmilesParserParams()\n", | |
"params.sanitize = False" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mols = [Chem.MolFromSmiles(x, params) for x in smiles_list]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"If you sanitize normally, the larger molecule has one more atom and one more bond marked as aromatic, and so it fails the match:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"c1cc2n(c1)NCCC2\n", | |
"0 C True\n", | |
"1 C True\n", | |
"2 C True\n", | |
"3 C True\n", | |
"4 N True\n", | |
"5 N False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 C False\n", | |
"0 1 AROMATIC True\n", | |
"1 2 AROMATIC True\n", | |
"2 3 AROMATIC True\n", | |
"3 4 AROMATIC True\n", | |
"4 5 SINGLE False\n", | |
"5 6 SINGLE False\n", | |
"6 7 SINGLE False\n", | |
"7 8 SINGLE False\n", | |
"4 0 AROMATIC True\n", | |
"8 0 SINGLE False\n", | |
"\n", | |
"c1cc2c3c(c1)ccn3NCC2\n", | |
"0 C True\n", | |
"1 C True\n", | |
"2 C True\n", | |
"3 C True\n", | |
"4 C True\n", | |
"5 C True\n", | |
"6 C True\n", | |
"7 C True\n", | |
"8 N True\n", | |
"9 N False\n", | |
"10 C False\n", | |
"11 C False\n", | |
"0 1 AROMATIC True\n", | |
"1 2 AROMATIC True\n", | |
"2 3 AROMATIC True\n", | |
"3 4 AROMATIC True\n", | |
"4 5 AROMATIC True\n", | |
"5 6 AROMATIC True\n", | |
"4 7 AROMATIC True\n", | |
"7 8 AROMATIC True\n", | |
"8 9 SINGLE False\n", | |
"9 10 SINGLE False\n", | |
"10 11 SINGLE False\n", | |
"7 0 AROMATIC True\n", | |
"11 0 SINGLE False\n", | |
"8 6 AROMATIC True\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in mols:\n", | |
" Chem.SanitizeMol(m, Chem.SANITIZE_ALL)\n", | |
" print(Chem.MolToSmiles(m))\n", | |
" for a in m.GetAtoms():\n", | |
" print(a.GetIdx(), a.GetSymbol(), a.GetIsAromatic())\n", | |
" for b in m.GetBonds():\n", | |
" print(b.GetBeginAtomIdx(), b.GetEndAtomIdx(), b.GetBondType(), b.GetIsAromatic())\n", | |
" print()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAADICAYAAAA0n5+2AAAsGElEQVR4nO3deVRV16EG8A+QWRCciSM4RQRRBoGAQ0DLU6HxxUJiFBoTczHpUzL5IJootc8EXh0gaVW0WrkOqdoYA45AfEZQBhnUqAgKjXE2GnFghrvfHwSqEZDhwLnD91trr65wzz3nk9Xkfu6z7z56QggBIiKi1tOTOwCRutKXOwARERGRtmHBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYixYRERERBJjwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYl3kDkBE2uvWrVs4duwYjI2NYWZmhq5du8LQ0BDW1tYwMjKCubk5zM3NYWRkJHdUIiJJ6QkhhNwhiEj7PHr0CM7OzqisrIRKpUJpaSlKS0tRVVXV6PFWVlYwNDSEhYUFTE1NYWJiAktLSxgaGqJbt24NJc3CwgKGhoawsrJ6qqQ1dQ4jIyNYWlp28m9AJ+jJHYBIXXEGi4g6xFdffYXS0lL8+OOPMDAweOK1kpISVFdX4+HDhygvL0dFRQXu37+P6upqPHjwABUVFSgvL8fDhw9RXV2NkpISVFVVobS0FDdv3kRVVVXDzx49eoSysjJUVlY+cY7GmJiYwNTUFN26dcOXX34JDw+PzvhVEJEO4gwWEXUIX19fuLq6Ijo6WpbrN1fSdu7ciby8PGRnZ8uSTYtwBouoCSxYRCS5a9euYdCgQcjLy4Ojo6PccZ5y/vx5ODg44Nq1a7CxsZE7jiZjwSJqAr9FSESSi4+Px5gxY9SyXAGAvb09Bg4ciJSUFLmjEJGWYsEiIslt3boVISEhcsdolp+fHw4dOiR3DCLSUixYRCSpzMxMXLp0Ca+88orcUZrl5+eHpKQkqFQquaMQkRbiGiwiktQf/vAHXL16Fd98881Try1evBiXLl1q8fYKv96ioWvXrg3Ht9eDBw/Qs2dPpKWlYdy4ce0+n47iGiyiJnCbBiKSTFVVFXbt2oX169c3+rqjoyOEEA1bNNy5c6dFWzQ0pjUlzc7ODosWLXri/ZaWlnjhhRdw6NAhFiwikhxnsIhIMnv27MG8efNw48YNGBsbS3be+u0V6jcqfXwfrcb2wPp1SevatStWr1791Hk/++wzJCYm4sSJE5Jl1TGcwSJqAgsWEUlmxowZeO6557B27Vq5o7RIXl4e3NzccOvWLfTo0UPuOJqIBYuoCVzkTkSSuHv3Lg4ePKj23x583JgxY9C7d29u10BEkmPBIiJJfPVVMmxtbTXq8TN6enr4zW9+g8OHD8sdhYi0DAsWEUli48ZX8cYbJ+WO0Wr/8R//gYMHD4KrJYhISlyDRUTtlp8PjBoFFBUBtrZyp2mde/fuoVevXsjOzsaYMWPkjqNpuAaLqAmcwSKidouPByZN0rxyBQDW1tZwc3Pjru5EJCkWLCJqF5UK2LED0KC17U/x8/PjOiwikhQLFhG1y7ffAj//DMycKXeStps6dSqOHz+O+/fvyx2FiLQECxYRtYtSCbz8MmBhIXeStnNzc4OVlRX+7//+T+4oRKQlWLB0WG1tLdavX4+PPvoIX331ldxxSAM9egTs3QsEB8udpH309fUxefJkrsMiIsmwYOmoo0ePwsXFBUuXLsWtW7fw+9//Hj4+Pjh9+rTc0UiD7N4NdOsG+PjInaT9/Pz8cODAAbljEJGWYMHSMVeuXEFISAh+85vfwN3dHefPn8fmzZtRWFiIYcOGwdXVFSEhIbh9+7bcUUkDbN1aN3tlYCB3kvbz8/PD1atXkZ+fL3cUItICLFg6oqysDJGRkRg+fDhu3LiBvLw8xMXFoWfPngCA5557DnFxcUhPT0dRURFGjBiB6OhoVFZWypyc1NXly8B33wGzZ8udRBp9+/bFmDFjeJuQiCTBgqXlhBDYvXs3Ro4ciR07dmD37t1ITk7GqFGjGj3e1dUVaWlp2LBhA9auXYvRo0dj3759nZyaNMHWrYCLC+DgIHcS6UydOpUFi4gkwYKlxbKzszF+/HgoFAq88847+P777+Hv7//M9+np6SEwMBD5+fmYNWsWAgMDMWXKFJw7d64TUpOm2L5d8xe3/5qfnx+OHTuGsrIyuaMQkYZjwdJC169fR2hoKDw9PWFnZ4cLFy4gPDwcxsbGrTqPmZkZIiMjUVhYCBsbG4wdOxZhYWEoKSnpmOCkMe7eBXr1AmbNkjuJtF544QUYGxvj6NGjckchIg2nUwXr5s2bUCqVuHPnjtxROkRVVRViY2MxcuRIFBQU4OTJk1AqlejTp0+7zjtgwAAolUokJSXhu+++w5AhQxAbG4va2lqJkpOm6dEDOHYM+GUJn9bo0qULfH19uas7EbWbzjzs+ezZs3B3d4e1tTVu3ryJSZMmYebMmZgxYwZsbGzkjtduiYmJePfdd1FVVYUVK1YgODgYenrSP4dVpVJh27ZtWLRoEZ577jnExsZiwoQJkl+HSC4bN27EypUrUVBQIHcUTcCHPRM1QSdmsCoqKjB79mzMmjULV69exenTp+Ht7Y0vvvgC/fv3h6urKyIjI3Hp0iW5o7Zafn4+pk6dildffRXBwcEoLCxESEhIh5QroG5DxpCQEBQUFMDPzw9TpkxBQEAA/vWvf3XI9Yg629SpU1FYWIiioiK5oxCRBtOJgvXee++hrKwMa9asAQCMGjUKkZGROH/+PM6cOYPAwECkpKRg2LBhDa+p+99ef/75Z4SFhcHJyQkWFhY4f/48IiMjYWpq2inXt7KyQlRUFL7//nsIITBq1ChERETg4cOHnXJ9oo7Sv39/jBo1it8mJKJ20fpbhF9//TVmzZqF9PR0jB07ttljf/jhB3zzzTfYvXs3Tpw4gZEjRyIwMBABAQFwcXHppMTNq6mpwebNm7FkyRIMGjQIMTEx8Pb2ljsWUlJS8O677+LevXtYtmwZ5s2bB319nejvpIU+/PBDFBYWIiEhQe4o6o63CImaIrTYlStXRI8ePURMTEyr33v58mURExMjJk+eLAwMDISdnZ1YuHChSE1NFSqVqgPSPltKSopwcHAQNjY2Ii4uTtTW1sqSoylVVVUiJiZGdOvWTbi5uYnjx4/LHYnao1aIv/oIoddFiPeOPfbzKiEWDBTita/lCtbxkpOThbm5uaioqJA7iroDBwdH40NrpxhUKhVCQkIwbtw4LFy4sNXvHzhwIMLCwpCcnIybN29i2bJlKC4uho+PD2xtbREWFoa0tDQI0fETgBcvXkRQUBCmTZsGHx8fXLhwAQqFQu1miAwNDREWFoaioiJ4enpi4sSJCAoKwpUrV+SORu3QzQrYvAQo1KEvjU6YMAF6enpIS0uTOwoRaSj1+oSW0PLly3H+/Hn8/e9/b/eC7549eyIkJASJiYm4efMmli9fjuLiYvj6+mLQoEEIDQ1FYmIiampqJEpf59GjR4iMjISjoyPKy8uRn5+P2NhYWFpaSnodqfXo0QOxsbHIysrCrVu3MHLkSERGRqKiokLuaNQGtq8DM34EIr8GtHE9QWN/STIyMsKkSZO4DouI2kwrC1ZaWho+/fRTbNu2rdE9oO7du9fmc3fv3r2hbN26dQurVq1CeXk5Zs2ahb59+za8Vl1d3eZrqFQqKJVKDB06FDt37sTevXuRmJgIOzu7Np9TDmPHjsV3332HL7/8EkqlEsOHD4dSqZQ7FrWSnhXwSQRweBmQUS53Gmnt378fDg4OuH79+lOv+fn5sWARUdsJLXPv3j0xePBg8dFHHzV5zLhx48Tw4cNFRESEyM7OluS6paWlIiEhQQQHBwsLCwthbW0tgoODRUJCQqvWcWRkZAh3d3fRvXt3ERMTI6qrqyXJJ7eysjIRFRUlLCwsxIsvvihOnz4tdyR6ll/WYDn/jxA1FUIsdhTixS+EqNWCNVglJSVCoVAIQ0NDER4eLiorK586pri4WAAQly9fliGhxpB9nQsHh7oOrZvBevvtt9GrVy9ERkY2ecyBAwewZMkSnD17Fp6enhg0aFDDmiqVStWm65qZmSEgIABKpRK3b99GfHw8ACA4OBjdu3dveO3Ro0eNvv/q1asICQnB+PHj4eTkhIKCAoSFhaFLly5tyqNuTE1NER4ejvz8fPTv3x/Ozs4ICQnB7du35Y5GLWEMfLAcuBgNJGj4gxAOHz4MR0dHZGRkIDMzE1FRUTAyMnrqOFtbWwwdOhRJSUkypCQijSe0yPr160XXrl1FYWFhi9/z888/i/j4eOHv7y+MjY1Fz549G2aeqqqq2p2ppqZGpKamioULF4o+ffoIU1NT4e/vL+Lj48X9+/dFaWmpiIqKEl27dhW+vr7izJkz7b6mJsjKyhKenp7C2tpaREVFNTqDQDJ7fAbrl3/+fJIQjh8K8ZYGzmC1ZNbq1xYsWCB+97vfdUI6jSX7LAEHh7oOCC1x7tw5YWZmJrZt29bmczx+m69r166ie/fuDWVLigJQXV0tkpOTRWhoqOjTp48wMTERw4YNE8OGDRMJCQntPr+mqa2tFZs2bRJ9+/YVM2fOlKTQkoR+XbCEEGXHhRhpKUQPc80qWIcOHRIDBgwQo0ePFjk5OS16T0lJiZg+fboYPny4UCgU4urVqx2cUiPJ/iHGwaGuA0ILVFRUCCcnJzF37lzJzllWVtZQtiwtLYWVlZUIDg4Wu3btEo8ePWr3+WtqasRnn30munfvrvOzNzdu3BAARH5+vtxRdN4TW6s1UrCESohdrwqhr6cZBev+/futnrUSQoiDBw+K/v37i7Fjx4p169YJDw8PYWJiIhYsWCCuXLnSwak1iuwfYhwc6jogtMAf/vAHMXToUPHgwYMOOX95eblISEgQCoVC9O7dW5iZmTXc5mvPNRMTE0WfPn0kTKqZHj58yIKlBrZsEcLRUYi2/F9aHf+OcPjwYTFgwADh6OjY4lmr5gpZamqq8PHxEUZGRiI4OFhcunSpo6JrEtk/xDg41HVAaLj9+/cLIyMjkZmZ2SnXe3xNVd++fYWJiUlD2SopKWnVuViw6rBgyev6dSFeekkIMzMhoqJ+NYvVAnfuCDFokBA7dnREutZr66xVSwtZamqq8Pf3F4aGhiI4OLhVaz61kOwfYhwc6jogNNjVq1dFz549xerVq2W5fm1tbUPZ6tevnzAwMBBeXl4iJiZG3Lx585nvZ8Gqw4IlD5VKiLg4ISwthfDzE6I9uxHExwthaiqEQiGEnEvpDh8+LAYOHCgcHR1bvAVLWwvZr4tWQUFBe6JrKtk/xDg41HVAaKja2lrh4+Mj/Pz8ZHs24K/zZGdni2XLlolhw4Y9UbauX7/e6HtYsOqwYHW+4mIhfH2FsLKqK1lS/CuUkyPE4MFCjB8vxI0b7T9fa7Rn1qq1hezXjh8/Lvz9/YWBgYEIDAwUFy5caNN5NJTsH2IcHOo6NHYfrBUrVuDs2bPYsmVLux+FIwV9fX24uLggMjIShYWFyMjIwPjx4/HXv/4VAwYMwI4dO+SOSASVCtiwARg9GjA1Bc6eBRQKQIp/hZydgZMnAWNjwNUVSE9v/zlbIjk5GY6Ojjhx4gTS09Ob3NfqcQ8ePEBoaCj8/f0xa9YsZGdnw8XFpU3Xf+GFF5CYmIjs7GwAwKhRoxAUFIT8/Pw2nY+ItITQQJmZmcLY2FgkJSXJHaVFTp8+LW7duvXUzzmDVYczWJ3j7Fkh3N2F6N277pZeR6mpESI8XAhDw7o1XR2ltLRULFy4sGHWqqVPTEhKShIDBw4UDg4Okj3J4XGnTp0SgYGBwsDAQPj7+7d4gb2Gkn2WgINDXYfGzWCVlJTglVdeQVhYGKZMmSJ3nBYZPXo0evfuLXcM0mEqFRAYCAwdCpw/D4SEdNy1DAyAqChg927gs8+A4GCgrEzaa6SmpsLJyQlHjhzBiRMnEBUVBWNj42bfU1ZWhrCwMEyfPr3ds1bNcXJywq5du5CXlwdra2u4u7sjICAAOTk5kl+LiNSXxhWsd955Bz169MCf/vQnuaMQaQx9feD4cWDbNqBHj8655ksvAZmZQG4u4O0N/Otf7T9nWVkZIiIiMHnyZMycORPZ2dlwdXV95vvaUsjay9HREUqlsqFoeXh4ICAgACdPnuzQ6xKRetCogrVx40YkJiZix44dz1xjQURPsrbu/GuOGAFkZAC2toCbG9Cex/qlpaXByckJ+/btw/Hjx1s8a9WWQiYlBwcHKJVKnD59GtbW1vD09MSUKVOQlZXVqTmIqHNpTMG6ePEi3n//faxbtw7Dhw+XOw4RtZCFBfDPfwKLFgEBAUBkZN0ty5aqL0m+vr6YOXMmcnJyWlSS2lLIOpK9vT2USiUKCwthZ2cHLy8vTJkyBRkZGbJlIqKOoxEFq7KyEkFBQXj55ZcxZ84cueMQUSvp6QHh4cDevcC+faWYM+dNPHz48JnvO3r0KBwcHLB///5Wz1q1tpB1Fjs7O8TFxTUUrfHjx8Pb2xtHjhyROxoRSUgjCtaiRYvw8OFDfPHFF3JHIaJ2mDoV2LPnLi5d+h6urq44d+5cs8dfunQJr776aotv7aWlpWHMmDHYt28f0tLSZJ+1ao6trS3i4uJw8eJFuLi4YNq0afD29kZiYqLc0YhIAmpfsA4ePIi4uDhs374dlpaWcseRlKGhFezsnOSOQepIBaz1BfQNgfdTH/t5NbBwEDB775PHOXwC1ODJ4963A2Zs77TELTZw4EAcO3YM3t7e8PT0xD//+c8mj503bx4+/fTTVs1avfzyy8jJyYGbm5vU0TvE4MGDERsbi4KCAri4uCAoKIhFi0gLqHXBunbtGkJCQrBixQq4u7vLHUdy1dXeKC4+LHcMUmPdrIDNS4DCWrmTSMvExASbNm3CypUrMWfOHERERKC2tm1/SE2atWrOoEGDEBsbi8LCQri4uOCVV16Bl5cXixaRhlLbgqVSqfD73/8ezs7OeP/99+WOQyQL29eBGT8CkV8DQu4wHUChUODbb7+FUqnE5MmTcfv27Ra/t7y8XGNnrZozYMAAxMbGNtw6DAoKgqenJw4dOiR3NCJqBbUtWJ999hm+//57bNmyBfr6ahuTqEPpWQGfRACHlwEZ5XKn6RheXl44deoU9PT04Orq2qLtC44fPw4nJyckJiYiNTVVY2etmtOvXz98/vnnKC4uhoeHB5YuXcrH7xBpELVsLidPnsTy5cuxadMm2NjYPPV6fn4+p81JZwyeC8w3AJZsApra3aBgNdC3J9CzftgA63/s1Jjt0rt3byQlJeG1117DxIkTsWnTpkaPq5+18vHxwcsvv4zc3FyMGzeuk9N2LhsbG6xZswa3bt1iwSLSIF3kDtCYefPm4Z133oG/v3+jr584cQLvvfcecnNzMXTo0E5OR9TJjIEPlgNOC4CEmY0fMmQecHgRYFD/gxpg+USg5Tfc5NelSxdERUXB0dERCoUCWVlZ+OKLLxo2FT5x4gTmzp0LlUqFlJQUjB8/XubEncva2hr37t2TOwYRtZBazmA9y5tvvgl/f3+8+uqrqKqqkjsOUYfr/lvgv4cCS1cDFY283sUS6Ncf6F8/+gFdDRo5UAPMnj0bx48fR0pKCry8vFBYWIiIiAhMnDgRkyZNwunTp3WuXAF1BaukpETuGETUQmpZsP72t79h7dq12LdvX5PHrFu3Dj///DM+/vjjTkxGJBN9YN4KoGYDsOeu3GE63pgxY5CVlQVra2tMnjwZX3/9NVJTUxEXFwczMzO543WI8vJypKamQtXENvecwSLSLGpZsNzc3LB06VK8+eabuHHjRqPHdOvWDf/4xz8QGxuL/fv3d3JCos5n6gn8cRpwr0zuJJ2jR48e2LdvH65evYqtW7fCw8ND7kgd6saNG5gwYQLu37/f6OssWESaRS0LFgB89NFHcHR0xOuvv97k3+jGjRuHTz75BG+88QZu3rzZyQmJOpA+8M63QM6Sx9ZV6QGBXwK1KmD7jCePO/unXy2oNARWFwN7Z3dmaOkZGRnBwsICpaWlckfpcNa/PI27qduAVlZWLFhEGkRtC5a+vj7i4+ORm5uL1atXN3nc4sWLG4qYENq4UxCRbtOVYtGtWzfo6+s3+WflGiwizaK2BQuo2wdGqVRiyZIlyMzMbPSY+iKWk5PTbBEjIs2kK7fG9PX1YWlp2WzB0oXfA5G2UOuCBQBTp05FaGgoZs+ejQcPHjR6TL9+/RAfH4/Fixc3WcSISDPp0sxNc7N1LFhEmkXtCxYA/PnPf4aFhQUWLFjQ5DHTpk2DQqFotogRkebRpWLR3J9VV26VEmkLjShYxsbG2LVrF/bs2YNt27Y1edzKlSthYWGBhQsXdmK6tuvVC5g0Se4UROpN1wpWU7N1ujSTR6QNNKJgAcCwYcOwevVqvP322ygsLGz0GGNjY2zfvh27d+9utoipC3d34B//kDsFaYK9e4HvvpM7hTysrKx0plg0Vyatra1RVVWlE9+oJNIGGlOwAOCtt95CQEAAXnvttSZ3cLe3t8eaNWuaLWJEmmbzZuDoUblTyEPXZrCaK1hA09s4EJF60aiCBQBr167F3bt38cknnzR5jEKhgL+/P2bPns1H6ZBWKCsDtHQD82fSpYL1rEXuAHTmd0Gk6TSuYFlZWWHnzp2IjY1FcnJyk8etW7cOd+7cwdKlSzsxHVHHKC1lwdIFza2zMjIygpmZmc78Log0ncYVLKBuB/clS5Zgzpw5Te7gXl/EYmJimi1iRJpAl2ewdOnbc88qk7r0uyDSdBpZsABgyZIlcHBwaHYH93HjxmHx4sXNFrFOpQLW+gL6hsD7qY/9vBpYOAiYvVeuYKTudLlg6dK3555VsHTpd0Gk6TS2YOnr60OpVCInJwcxMTFNHvfxxx/DwcEBc+fOVZtH6XSzAjYvAQpr5U5CmkLXC1ZVVRXKyrT/KdfPmqHSpdulRJpOYwsW8O8d3CMiIpCVldXoMfVF7OTJk80Wsc5k+zow40cg8mtAPSofqTtdL1iAbizurp+hauovgyxYRJpDowsWULeD+1tvvYXZs2fj4cOHjR7z+KN0cnNzOznh0/SsgE8igMPLgIxyudOQJmDB0p2CVVNTg0ePHjX5ui78Hoi0gcYXLABYtWoVzM3NERYW1uQx06dPxxtvvIFXXnmlySLWmQbPBeYbAEs2ASq5w8isqQ8TqlNTA1RV6W7B0qVvzz1rrytd2nSVSNNpRcEyNjbGjh07sHPnTmzfvr3J41pSxDqNMfDBcuBiNJBwR+4w8lCpVNi8eTPGjh2LmTNnYsiQIXJHUkv1S4/MzeXNISdd+faclZUV9PT0+MBnIi2gFQULqNvBffXq1Zg/fz4uXrzY6DEmJiYtKmJSqqwEfvih8de6/xb476HA0tVARaekUR8nT56Et7c3PvzwQ7z77rvYsWMHDA0N5Y6lluoLlq7OYAG68+05Q0NDmJubs2ARaQGtKVgAEBoa+swd3O3t7bFq1apmi1h7lZcDe/YAc+YAffoAb73VxIH6wLwVQM0GYM/dDomidq5du4aQkBB4enpi6NChKCgoQHh4OIyMjOSOprZYsHSrWDxrN3dd+T0QaTqtKlhA3Q7uP/30EyIjI5s8Zv78+Zg+fbqkj9IpKwMSE4GQkH+XKn19YOtWYN++pt9n6gn8cRpw71ffQK+pAZTKuv/VBuXl5YiOjsbIkSNx9epV5ObmQqlUolevXnJHU3ssWLpVLJr7s+rKrVIibaB1BcvKygpbt27FypUrkZKS0uRx69atw4QJE1Bb2/bNqEpKgN2760pV797AG2/U/Xz7duDmzbqCFBAAGBv/8gZ94J1vgZwlgEH9SfSAwC+BWhWwfca/z11YCCxaBIwdC3z7bZsjyk4IYOdOYMKEydi4cSOUSiWOHDmC0aNHyx1NY1RVXcaLLy6BLk/y6VKxaO52qK7cKiXSBlpXsADA29u7YQf3W7duNXqMtbU1Vq5cCVNT01ad++ef/12c+vQBPvgAMDUFvvwSuHHj36+1dzmRvT1QXAzMnAlMn153zqKi9p2zs+XmAhMn1s3mzZ27AefOncOMGTPkjqVxHjz4F3Jy/iJ3DFnpUrFobgbL2toaZWVlqKys7ORURNRaWlmwAGDp0qWwt7eXZAf369evY8uWDPj41M1ULV9eV4BSU4HLl4G4uLoC1KWLROF/YW4OREYCZ8/WlTh7eyAsDHjwQNrrSO3u3bqc7u7A4MF1s3HvvDMKxg1TedQaZWVlMNPl+4PgLcLHXwOa3saBiNSH1has+h3cs7Ky8Pnnn7f6/ZcvX8bq1avh5eWFAQMGIC7uf+DlBWRnA5cuAdHRwLhxgJ5eB4T/laFDgV27gAMHgCNHgOefBzZsANpxd7NDVFcDsbHAkCFAejpw7FjdjF7fvnIn02wsWLpVsJq7HWplZQVANzZdJdJ0WluwAKB///7YuHEjwsPDkZeX98zjf/jhB8TGxsLb2xu2trbYtGkTpkyZgqysLKSn78Of/gSMGdPxuZvi6wvk5dXNai1ZUjdDlJYmX57HpaTU/W7+93+Bzz8HMjMBT0+5U2kHFiyuwapnbm4OY2NjnfldEGkyrS5YAPCf//mfmDt3LoKCghrdwf3cuXOIjo5uKFUbNmzA5MmTkZ+fj3PnziEyMhIuLi4yJG9cly6AQgEUFABeXoCPDxAUVHerUi4//QTMmlW3XqywsG7Rf2fM7OkKFizdmsF61p+1W7duOvO7INJkWl+wAGDNmjUwMzPDe++9BwANxcne3h6jR4/G7t27MXnyZFy8eLHhtREjRsicunndu9fdjjtzBnj4sG59VmRk3R5cna1Xr7qCt3y5bu823lFKS0tZsLjI/YnXdeV3QaTJJF6WrZ5MTEywfft2uLu749ChQ7h58yYmTZqEBQsWYMaMGbCxsZE7Yps9/zxw8GDdHlzvvgts2gSsWAEEB3fuLJKOf/53KM5gPfntOW3/ssSzbofq0mwekSbTiYIFAA4ODigqKkJSUhKmTZuGnj17yh1JUgEBgJ8fsG4dsGABsHkzEBMj75oxkkZ5eTkL1mPfnuvTp4/MaTpWS2awWLCI1J9O3CKs17dvX4SEhGhduapnZFS3PcKFC8CIEYCbW916qCa2AiMNwRks3fr23KhRo/DVV181ub2MLi34J9JkOlWwdIWNTd3eXGlpdYvOn38eWLPml20dVMBaX0DfEHg/9bE3VQMLBwGz9/7yz78c5/AJ8MTTeqqB9+2AGZ3zrGwCCxagW9+es7KywvTp06HXxD1+rsEi0gwsWFrM3b1uP6rPPwe+++7JNVndrIDNS4BCNdtLi57GglWnuW/P3b9/H3/7298ke7aoOuMtQiLNwIKl5fT06ha8791b9/DperavAzN+BCK/Btq3zz11NBasOs3N3OTn5+Pjjz/G8OHDsW7dOq19lMydO3eQnp4OBwcHuaMQ0TOwYOkoPSvgkwjg8DIgQ4atHajlysrKtP6bcy3R3MyNh4cHLl++jMWLF2PFihUYNGgQoqOjUS7HviUdZM+ePXBwcEB5eTlef/11ueMQ0TOwYOmwwXOB+QbAkk2AqoljClYDfXsCPeuHDbD+x06NqfP8/f2xdu1aJCYmyh1FVs+6NWZsbAyFQoHi4mIsX74cf/nLXzB48GBER0ejrKysE5NKq6SkBKGhoZg9ezbee+89pKamws7OTu5YRPQMLFi6zBj4YDlwMRpIuNP4IUPmATmngFP1IxuY068TMxJCQ0MRFhaGoKAgBAQE4MqVK3JHkkVLvz1nZGQEhUKBoqIi/PnPf8bGjRsxePBgREZG4oG6Pyn9V/bv349Ro0bhzJkzyMvLQ3h4OAwMDOSORUQtwIKl47r/FvjvocDS1UBFI693sQT69Qf6149+QFf+971TGRoaIjw8HGfPnkVlZSUcHR0RGxuLWnV72ncHqqysxE8//YTvv/8e9+/fb9F7jIyMEBISgvz8fKxcuRI7duzAkCFDEBkZ2eJzyKV+1up3v/sdFi5ciLS0NDz//PNyxyKiVmDB0nX6wLwVQM0GYM9ducNQc4YMGYKkpCRs3LgRK1aswIQJE3D27Fm5Y3Wo2tpabNmyBSNGjMC5c+dQXFwMW1tb/PGPf2zxVgWGhoYICQnBuXPnsGrVKnz55ZcNRUsdtzs4cOAAHBwccPr0aeTm5nLWikhDsWARTD2BP04D7mnuMhWdEhgYiAsXLsDBwQHOzs4ICwtDaWmp3LEkl5KSAmdnZ3zwwQd4++23UVRUhMLCQsTHx+Obb77BgAEDEBYWhlst3Em3vmjl5+dj3bp12LlzJwYNGoSIiAi12PagftZq5syZWLBgAY4fP46RI0fKHYuI2koQkcY6evSoGDFihBgyZIhISkqSO44kkpOThaurqzA3Nxfh4eHi3r17Tx1TW1srEhIShIuLi+jatatYuHChuHHjRquuU1tbK3bt2iVGjhwpLCwsRHh4uLh7965Ef4rW2b9/v+jXr59wd3cX58+flyVDG4GDg6PxAUFEGq2srEwsW7ZMGBkZicDAQHH79m25I7VJRkaG8PHxEUZGRkKhULSoMKlUKpGQkNBQyBYuXCiuX7/equvWlzVnZ+c2l7W2unfvnlAoFMLExERERUWJmpqaTrmuhGT/EOPgUNcBQURa4cyZM8LDw0NYW1uLuLg4oVKp5I7UIufOnROBgYHCwMBABAYGiqKiojadJzk5WYwbN04YGxsLhUIhrl692qr3S1HWWqN+1srJyUnk5eV12HU6mOwfYhwc6jogiEhrqFQqERcXJywtLcXEiRPFhQsX5I7UpB9++EEoFArRpUsX4e/vL86cOSPJeZOTk4WHh0fDTNiVK1fadA43N7eGonXt2jVJsgnx71krQ0NDER4eLqqqqiQ7twxk/xDj4FDXwUXuRFpET08PCoUCFy5cQK9evTB27FhERkaq1TP6fvrpJ0RERGDEiBEoLi5GRkYGEhMT4ejoKMn5J0+ejPT0dHz77be4ePEihgwZgpCQEBQVFbXqHFlZWdi7dy8yMzNhZ2eH0NBQXLt2rV3ZDh48CEdHR2RmZiIrKwtRUVEwNDRs1zmJSE0JItJaCQkJYsCAAcLR0VGkp6fLmuXu3bsiPDxcmJmZCQ8PD3HkyJFOuW5qaqrw9fUVhoaGIjg4WFy8eLHV5zhw4IDw9PQUJiYm4r/+679aPaNVUlKiTbNWj5N9loCDQ10HZ7CItFhAQADy8/Mxbdo0jB8/HqGhoZ2+m3lpaSmio6MxZMgQ7Nu3D1u2bEF6ejpefPHFTrm+t7c3UlJScOTIEdy7dw/29vYICQlBYWFhi88xdepUnDhxAgkJCTh9+jR+/LHlz4s6dOgQHBwcOGtFpGsEEemEnJwc4eLiImxsbER8fHyHX6+yslLExcWJvn37isGDB4u4uDi1+Jbc8ePHhb+/f8Oi+o5ap6bFs1aPk32WgINDXQcEEemM6upqERMTI8zNzYW/v7+4fPmy5Neo31/Kzs5O9OvXT8TExIiKigrJr9NeJ06ceKJo5efnS3bugwcPiv79+4vRo0eL3Nxcyc6rhmT/EOPgUNfBW4REOqRLly4ICwvDmTNnUFVVBXt7e0RHR0v2XMOUlBSMHTsW8+fPh0KhwMWLFxEWFgZjY2NJzi8lT09PJCYmIicnBwDg4OCAgIAA5OXltfmc9+/fR2hoKH77299i9uzZOHnyJMaOHStVZCLSICxYRDrIzs4Ohw8fxt///nesWrUK48aNaygabZGSkgI3NzfMmDEDU6dORVFREcLDw2Fqaiph6o7h5OSEXbt2ITc3F6ampnBzc0NAQAByc3NbdZ7Dhw/D0dERGRkZyMzMRFRUFIyMjDooNRGpOxYsIh0WGBiIgoICuLq6wtPTE2FhYXj06FGL35+ZmQlfX19Mnz4dzs7OuHTpEqKiomBlZdVxoTvI6NGjsWvXLpw6dQrW1tZwd3dHQEAAsrOzm31f/axVQEAAXnvtNc5aEREAFiwinWdtbY24uDikpKQgKSkJTk5OOHz4cLPvOX/+PIKCguDl5YUePXrgwoULiIuLQ9++fTspdcdxcHCAUqlsKFoeHh6YMmUKsrKynjr28VmrjIwMzloRUQMWLCICAEyYMAF5eXkIDg7GSy+9hKCgINy+ffuJYy5fvozQ0FA4OTkBqCtau3btgq2trRyRO9SoUaOgVCpRUFAAOzs7eHl5YcqUKcjIyMCDBw+emrVydnaWOzIRqRE9IYSQOwQRqZfCwkLMnz8fp06dQlRUFGbMmIHVq1cjJiYG48ePR1RUFFxcXOSO2akKCwuxYsUK7NixA0OHDoWJiQm2bNnSUDZ1lJ7cAYjUFQsWETVKpVJh/fr1WLx4MUxMTDBs2DB8+umnGD9+vNzRZHXp0iXcv38fo0eP5oahLFhETWLBIqJm3b59GxUVFRg4cKDcUUj9sGARNYEFi4iI2ooFi6gJXOROREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYixYRERERBJjwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJPb/CH5uCVU2JPsAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<PIL.Image.Image image mode=RGBA size=600x200 at 0x7FF8471459B0>" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"MolsToGridImage(mols)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1\n" | |
] | |
} | |
], | |
"source": [ | |
"pattern = mols[0] # MolFromSmiles\n", | |
"matches = [smiles_list[i] for i, m in enumerate(mols) if m.HasSubstructMatch(pattern)]\n", | |
"print(len(matches)) # result: 1, why not 2?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mols = [Chem.MolFromSmiles(x, params) for x in smiles_list]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"If you remove the aromatization step from sanitization, single and double bonds will be maintained as in the input SMILES and no atom/bond will be marked as aromatic, so you will get 2 matches:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"C1=CN2NCCCC2=C1\n", | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 N False\n", | |
"5 N False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 C False\n", | |
"0 1 DOUBLE False\n", | |
"1 2 SINGLE False\n", | |
"2 3 DOUBLE False\n", | |
"3 4 SINGLE False\n", | |
"4 5 SINGLE False\n", | |
"5 6 SINGLE False\n", | |
"6 7 SINGLE False\n", | |
"7 8 SINGLE False\n", | |
"4 0 SINGLE False\n", | |
"8 0 SINGLE False\n", | |
"\n", | |
"C1=CC2=C3C(=C1)CCNN3C=C2\n", | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 C False\n", | |
"5 C False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 N False\n", | |
"9 N False\n", | |
"10 C False\n", | |
"11 C False\n", | |
"0 1 DOUBLE False\n", | |
"1 2 SINGLE False\n", | |
"2 3 DOUBLE False\n", | |
"3 4 SINGLE False\n", | |
"4 5 SINGLE False\n", | |
"5 6 DOUBLE False\n", | |
"4 7 DOUBLE False\n", | |
"7 8 SINGLE False\n", | |
"8 9 SINGLE False\n", | |
"9 10 SINGLE False\n", | |
"10 11 SINGLE False\n", | |
"7 0 SINGLE False\n", | |
"11 0 SINGLE False\n", | |
"8 6 SINGLE False\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in mols:\n", | |
" Chem.SanitizeMol(m, Chem.SANITIZE_ALL ^ Chem.SANITIZE_SETAROMATICITY)\n", | |
" print(Chem.MolToSmiles(m))\n", | |
" for a in m.GetAtoms():\n", | |
" print(a.GetIdx(), a.GetSymbol(), a.GetIsAromatic())\n", | |
" for b in m.GetBonds():\n", | |
" print(b.GetBeginAtomIdx(), b.GetEndAtomIdx(), b.GetBondType(), b.GetIsAromatic())\n", | |
" print()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAADICAYAAAA0n5+2AAAsGElEQVR4nO3deVRV16EG8A+QWRCciSM4RQRRBoGAQ0DLU6HxxUJiFBoTczHpUzL5IJootc8EXh0gaVW0WrkOqdoYA45AfEZQBhnUqAgKjXE2GnFghrvfHwSqEZDhwLnD91trr65wzz3nk9Xkfu6z7z56QggBIiKi1tOTOwCRutKXOwARERGRtmHBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYixYRERERBJjwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYl3kDkBE2uvWrVs4duwYjI2NYWZmhq5du8LQ0BDW1tYwMjKCubk5zM3NYWRkJHdUIiJJ6QkhhNwhiEj7PHr0CM7OzqisrIRKpUJpaSlKS0tRVVXV6PFWVlYwNDSEhYUFTE1NYWJiAktLSxgaGqJbt24NJc3CwgKGhoawsrJ6qqQ1dQ4jIyNYWlp28m9AJ+jJHYBIXXEGi4g6xFdffYXS0lL8+OOPMDAweOK1kpISVFdX4+HDhygvL0dFRQXu37+P6upqPHjwABUVFSgvL8fDhw9RXV2NkpISVFVVobS0FDdv3kRVVVXDzx49eoSysjJUVlY+cY7GmJiYwNTUFN26dcOXX34JDw+PzvhVEJEO4gwWEXUIX19fuLq6Ijo6WpbrN1fSdu7ciby8PGRnZ8uSTYtwBouoCSxYRCS5a9euYdCgQcjLy4Ojo6PccZ5y/vx5ODg44Nq1a7CxsZE7jiZjwSJqAr9FSESSi4+Px5gxY9SyXAGAvb09Bg4ciJSUFLmjEJGWYsEiIslt3boVISEhcsdolp+fHw4dOiR3DCLSUixYRCSpzMxMXLp0Ca+88orcUZrl5+eHpKQkqFQquaMQkRbiGiwiktQf/vAHXL16Fd98881Try1evBiXLl1q8fYKv96ioWvXrg3Ht9eDBw/Qs2dPpKWlYdy4ce0+n47iGiyiJnCbBiKSTFVVFXbt2oX169c3+rqjoyOEEA1bNNy5c6dFWzQ0pjUlzc7ODosWLXri/ZaWlnjhhRdw6NAhFiwikhxnsIhIMnv27MG8efNw48YNGBsbS3be+u0V6jcqfXwfrcb2wPp1SevatStWr1791Hk/++wzJCYm4sSJE5Jl1TGcwSJqAgsWEUlmxowZeO6557B27Vq5o7RIXl4e3NzccOvWLfTo0UPuOJqIBYuoCVzkTkSSuHv3Lg4ePKj23x583JgxY9C7d29u10BEkmPBIiJJfPVVMmxtbTXq8TN6enr4zW9+g8OHD8sdhYi0DAsWEUli48ZX8cYbJ+WO0Wr/8R//gYMHD4KrJYhISlyDRUTtlp8PjBoFFBUBtrZyp2mde/fuoVevXsjOzsaYMWPkjqNpuAaLqAmcwSKidouPByZN0rxyBQDW1tZwc3Pjru5EJCkWLCJqF5UK2LED0KC17U/x8/PjOiwikhQLFhG1y7ffAj//DMycKXeStps6dSqOHz+O+/fvyx2FiLQECxYRtYtSCbz8MmBhIXeStnNzc4OVlRX+7//+T+4oRKQlWLB0WG1tLdavX4+PPvoIX331ldxxSAM9egTs3QsEB8udpH309fUxefJkrsMiIsmwYOmoo0ePwsXFBUuXLsWtW7fw+9//Hj4+Pjh9+rTc0UiD7N4NdOsG+PjInaT9/Pz8cODAAbljEJGWYMHSMVeuXEFISAh+85vfwN3dHefPn8fmzZtRWFiIYcOGwdXVFSEhIbh9+7bcUUkDbN1aN3tlYCB3kvbz8/PD1atXkZ+fL3cUItICLFg6oqysDJGRkRg+fDhu3LiBvLw8xMXFoWfPngCA5557DnFxcUhPT0dRURFGjBiB6OhoVFZWypyc1NXly8B33wGzZ8udRBp9+/bFmDFjeJuQiCTBgqXlhBDYvXs3Ro4ciR07dmD37t1ITk7GqFGjGj3e1dUVaWlp2LBhA9auXYvRo0dj3759nZyaNMHWrYCLC+DgIHcS6UydOpUFi4gkwYKlxbKzszF+/HgoFAq88847+P777+Hv7//M9+np6SEwMBD5+fmYNWsWAgMDMWXKFJw7d64TUpOm2L5d8xe3/5qfnx+OHTuGsrIyuaMQkYZjwdJC169fR2hoKDw9PWFnZ4cLFy4gPDwcxsbGrTqPmZkZIiMjUVhYCBsbG4wdOxZhYWEoKSnpmOCkMe7eBXr1AmbNkjuJtF544QUYGxvj6NGjckchIg2nUwXr5s2bUCqVuHPnjtxROkRVVRViY2MxcuRIFBQU4OTJk1AqlejTp0+7zjtgwAAolUokJSXhu+++w5AhQxAbG4va2lqJkpOm6dEDOHYM+GUJn9bo0qULfH19uas7EbWbzjzs+ezZs3B3d4e1tTVu3ryJSZMmYebMmZgxYwZsbGzkjtduiYmJePfdd1FVVYUVK1YgODgYenrSP4dVpVJh27ZtWLRoEZ577jnExsZiwoQJkl+HSC4bN27EypUrUVBQIHcUTcCHPRM1QSdmsCoqKjB79mzMmjULV69exenTp+Ht7Y0vvvgC/fv3h6urKyIjI3Hp0iW5o7Zafn4+pk6dildffRXBwcEoLCxESEhIh5QroG5DxpCQEBQUFMDPzw9TpkxBQEAA/vWvf3XI9Yg629SpU1FYWIiioiK5oxCRBtOJgvXee++hrKwMa9asAQCMGjUKkZGROH/+PM6cOYPAwECkpKRg2LBhDa+p+99ef/75Z4SFhcHJyQkWFhY4f/48IiMjYWpq2inXt7KyQlRUFL7//nsIITBq1ChERETg4cOHnXJ9oo7Sv39/jBo1it8mJKJ20fpbhF9//TVmzZqF9PR0jB07ttljf/jhB3zzzTfYvXs3Tpw4gZEjRyIwMBABAQFwcXHppMTNq6mpwebNm7FkyRIMGjQIMTEx8Pb2ljsWUlJS8O677+LevXtYtmwZ5s2bB319nejvpIU+/PBDFBYWIiEhQe4o6o63CImaIrTYlStXRI8ePURMTEyr33v58mURExMjJk+eLAwMDISdnZ1YuHChSE1NFSqVqgPSPltKSopwcHAQNjY2Ii4uTtTW1sqSoylVVVUiJiZGdOvWTbi5uYnjx4/LHYnao1aIv/oIoddFiPeOPfbzKiEWDBTita/lCtbxkpOThbm5uaioqJA7iroDBwdH40NrpxhUKhVCQkIwbtw4LFy4sNXvHzhwIMLCwpCcnIybN29i2bJlKC4uho+PD2xtbREWFoa0tDQI0fETgBcvXkRQUBCmTZsGHx8fXLhwAQqFQu1miAwNDREWFoaioiJ4enpi4sSJCAoKwpUrV+SORu3QzQrYvAQo1KEvjU6YMAF6enpIS0uTOwoRaSj1+oSW0PLly3H+/Hn8/e9/b/eC7549eyIkJASJiYm4efMmli9fjuLiYvj6+mLQoEEIDQ1FYmIiampqJEpf59GjR4iMjISjoyPKy8uRn5+P2NhYWFpaSnodqfXo0QOxsbHIysrCrVu3MHLkSERGRqKiokLuaNQGtq8DM34EIr8GtHE9QWN/STIyMsKkSZO4DouI2kwrC1ZaWho+/fRTbNu2rdE9oO7du9fmc3fv3r2hbN26dQurVq1CeXk5Zs2ahb59+za8Vl1d3eZrqFQqKJVKDB06FDt37sTevXuRmJgIOzu7Np9TDmPHjsV3332HL7/8EkqlEsOHD4dSqZQ7FrWSnhXwSQRweBmQUS53Gmnt378fDg4OuH79+lOv+fn5sWARUdsJLXPv3j0xePBg8dFHHzV5zLhx48Tw4cNFRESEyM7OluS6paWlIiEhQQQHBwsLCwthbW0tgoODRUJCQqvWcWRkZAh3d3fRvXt3ERMTI6qrqyXJJ7eysjIRFRUlLCwsxIsvvihOnz4tdyR6ll/WYDn/jxA1FUIsdhTixS+EqNWCNVglJSVCoVAIQ0NDER4eLiorK586pri4WAAQly9fliGhxpB9nQsHh7oOrZvBevvtt9GrVy9ERkY2ecyBAwewZMkSnD17Fp6enhg0aFDDmiqVStWm65qZmSEgIABKpRK3b99GfHw8ACA4OBjdu3dveO3Ro0eNvv/q1asICQnB+PHj4eTkhIKCAoSFhaFLly5tyqNuTE1NER4ejvz8fPTv3x/Ozs4ICQnB7du35Y5GLWEMfLAcuBgNJGj4gxAOHz4MR0dHZGRkIDMzE1FRUTAyMnrqOFtbWwwdOhRJSUkypCQijSe0yPr160XXrl1FYWFhi9/z888/i/j4eOHv7y+MjY1Fz549G2aeqqqq2p2ppqZGpKamioULF4o+ffoIU1NT4e/vL+Lj48X9+/dFaWmpiIqKEl27dhW+vr7izJkz7b6mJsjKyhKenp7C2tpaREVFNTqDQDJ7fAbrl3/+fJIQjh8K8ZYGzmC1ZNbq1xYsWCB+97vfdUI6jSX7LAEHh7oOCC1x7tw5YWZmJrZt29bmczx+m69r166ie/fuDWVLigJQXV0tkpOTRWhoqOjTp48wMTERw4YNE8OGDRMJCQntPr+mqa2tFZs2bRJ9+/YVM2fOlKTQkoR+XbCEEGXHhRhpKUQPc80qWIcOHRIDBgwQo0ePFjk5OS16T0lJiZg+fboYPny4UCgU4urVqx2cUiPJ/iHGwaGuA0ILVFRUCCcnJzF37lzJzllWVtZQtiwtLYWVlZUIDg4Wu3btEo8ePWr3+WtqasRnn30munfvrvOzNzdu3BAARH5+vtxRdN4TW6s1UrCESohdrwqhr6cZBev+/futnrUSQoiDBw+K/v37i7Fjx4p169YJDw8PYWJiIhYsWCCuXLnSwak1iuwfYhwc6jogtMAf/vAHMXToUPHgwYMOOX95eblISEgQCoVC9O7dW5iZmTXc5mvPNRMTE0WfPn0kTKqZHj58yIKlBrZsEcLRUYi2/F9aHf+OcPjwYTFgwADh6OjY4lmr5gpZamqq8PHxEUZGRiI4OFhcunSpo6JrEtk/xDg41HVAaLj9+/cLIyMjkZmZ2SnXe3xNVd++fYWJiUlD2SopKWnVuViw6rBgyev6dSFeekkIMzMhoqJ+NYvVAnfuCDFokBA7dnREutZr66xVSwtZamqq8Pf3F4aGhiI4OLhVaz61kOwfYhwc6jogNNjVq1dFz549xerVq2W5fm1tbUPZ6tevnzAwMBBeXl4iJiZG3Lx585nvZ8Gqw4IlD5VKiLg4ISwthfDzE6I9uxHExwthaiqEQiGEnEvpDh8+LAYOHCgcHR1bvAVLWwvZr4tWQUFBe6JrKtk/xDg41HVAaKja2lrh4+Mj/Pz8ZHs24K/zZGdni2XLlolhw4Y9UbauX7/e6HtYsOqwYHW+4mIhfH2FsLKqK1lS/CuUkyPE4MFCjB8vxI0b7T9fa7Rn1qq1hezXjh8/Lvz9/YWBgYEIDAwUFy5caNN5NJTsH2IcHOo6NHYfrBUrVuDs2bPYsmVLux+FIwV9fX24uLggMjIShYWFyMjIwPjx4/HXv/4VAwYMwI4dO+SOSASVCtiwARg9GjA1Bc6eBRQKQIp/hZydgZMnAWNjwNUVSE9v/zlbIjk5GY6Ojjhx4gTS09Ob3NfqcQ8ePEBoaCj8/f0xa9YsZGdnw8XFpU3Xf+GFF5CYmIjs7GwAwKhRoxAUFIT8/Pw2nY+ItITQQJmZmcLY2FgkJSXJHaVFTp8+LW7duvXUzzmDVYczWJ3j7Fkh3N2F6N277pZeR6mpESI8XAhDw7o1XR2ltLRULFy4sGHWqqVPTEhKShIDBw4UDg4Okj3J4XGnTp0SgYGBwsDAQPj7+7d4gb2Gkn2WgINDXYfGzWCVlJTglVdeQVhYGKZMmSJ3nBYZPXo0evfuLXcM0mEqFRAYCAwdCpw/D4SEdNy1DAyAqChg927gs8+A4GCgrEzaa6SmpsLJyQlHjhzBiRMnEBUVBWNj42bfU1ZWhrCwMEyfPr3ds1bNcXJywq5du5CXlwdra2u4u7sjICAAOTk5kl+LiNSXxhWsd955Bz169MCf/vQnuaMQaQx9feD4cWDbNqBHj8655ksvAZmZQG4u4O0N/Otf7T9nWVkZIiIiMHnyZMycORPZ2dlwdXV95vvaUsjay9HREUqlsqFoeXh4ICAgACdPnuzQ6xKRetCogrVx40YkJiZix44dz1xjQURPsrbu/GuOGAFkZAC2toCbG9Cex/qlpaXByckJ+/btw/Hjx1s8a9WWQiYlBwcHKJVKnD59GtbW1vD09MSUKVOQlZXVqTmIqHNpTMG6ePEi3n//faxbtw7Dhw+XOw4RtZCFBfDPfwKLFgEBAUBkZN0ty5aqL0m+vr6YOXMmcnJyWlSS2lLIOpK9vT2USiUKCwthZ2cHLy8vTJkyBRkZGbJlIqKOoxEFq7KyEkFBQXj55ZcxZ84cueMQUSvp6QHh4cDevcC+faWYM+dNPHz48JnvO3r0KBwcHLB///5Wz1q1tpB1Fjs7O8TFxTUUrfHjx8Pb2xtHjhyROxoRSUgjCtaiRYvw8OFDfPHFF3JHIaJ2mDoV2LPnLi5d+h6urq44d+5cs8dfunQJr776aotv7aWlpWHMmDHYt28f0tLSZJ+1ao6trS3i4uJw8eJFuLi4YNq0afD29kZiYqLc0YhIAmpfsA4ePIi4uDhs374dlpaWcseRlKGhFezsnOSOQepIBaz1BfQNgfdTH/t5NbBwEDB775PHOXwC1ODJ4963A2Zs77TELTZw4EAcO3YM3t7e8PT0xD//+c8mj503bx4+/fTTVs1avfzyy8jJyYGbm5vU0TvE4MGDERsbi4KCAri4uCAoKIhFi0gLqHXBunbtGkJCQrBixQq4u7vLHUdy1dXeKC4+LHcMUmPdrIDNS4DCWrmTSMvExASbNm3CypUrMWfOHERERKC2tm1/SE2atWrOoEGDEBsbi8LCQri4uOCVV16Bl5cXixaRhlLbgqVSqfD73/8ezs7OeP/99+WOQyQL29eBGT8CkV8DQu4wHUChUODbb7+FUqnE5MmTcfv27Ra/t7y8XGNnrZozYMAAxMbGNtw6DAoKgqenJw4dOiR3NCJqBbUtWJ999hm+//57bNmyBfr6ahuTqEPpWQGfRACHlwEZ5XKn6RheXl44deoU9PT04Orq2qLtC44fPw4nJyckJiYiNTVVY2etmtOvXz98/vnnKC4uhoeHB5YuXcrH7xBpELVsLidPnsTy5cuxadMm2NjYPPV6fn4+p81JZwyeC8w3AJZsApra3aBgNdC3J9CzftgA63/s1Jjt0rt3byQlJeG1117DxIkTsWnTpkaPq5+18vHxwcsvv4zc3FyMGzeuk9N2LhsbG6xZswa3bt1iwSLSIF3kDtCYefPm4Z133oG/v3+jr584cQLvvfcecnNzMXTo0E5OR9TJjIEPlgNOC4CEmY0fMmQecHgRYFD/gxpg+USg5Tfc5NelSxdERUXB0dERCoUCWVlZ+OKLLxo2FT5x4gTmzp0LlUqFlJQUjB8/XubEncva2hr37t2TOwYRtZBazmA9y5tvvgl/f3+8+uqrqKqqkjsOUYfr/lvgv4cCS1cDFY283sUS6Ncf6F8/+gFdDRo5UAPMnj0bx48fR0pKCry8vFBYWIiIiAhMnDgRkyZNwunTp3WuXAF1BaukpETuGETUQmpZsP72t79h7dq12LdvX5PHrFu3Dj///DM+/vjjTkxGJBN9YN4KoGYDsOeu3GE63pgxY5CVlQVra2tMnjwZX3/9NVJTUxEXFwczMzO543WI8vJypKamQtXENvecwSLSLGpZsNzc3LB06VK8+eabuHHjRqPHdOvWDf/4xz8QGxuL/fv3d3JCos5n6gn8cRpwr0zuJJ2jR48e2LdvH65evYqtW7fCw8ND7kgd6saNG5gwYQLu37/f6OssWESaRS0LFgB89NFHcHR0xOuvv97k3+jGjRuHTz75BG+88QZu3rzZyQmJOpA+8M63QM6Sx9ZV6QGBXwK1KmD7jCePO/unXy2oNARWFwN7Z3dmaOkZGRnBwsICpaWlckfpcNa/PI27qduAVlZWLFhEGkRtC5a+vj7i4+ORm5uL1atXN3nc4sWLG4qYENq4UxCRbtOVYtGtWzfo6+s3+WflGiwizaK2BQuo2wdGqVRiyZIlyMzMbPSY+iKWk5PTbBEjIs2kK7fG9PX1YWlp2WzB0oXfA5G2UOuCBQBTp05FaGgoZs+ejQcPHjR6TL9+/RAfH4/Fixc3WcSISDPp0sxNc7N1LFhEmkXtCxYA/PnPf4aFhQUWLFjQ5DHTpk2DQqFotogRkebRpWLR3J9VV26VEmkLjShYxsbG2LVrF/bs2YNt27Y1edzKlSthYWGBhQsXdmK6tuvVC5g0Se4UROpN1wpWU7N1ujSTR6QNNKJgAcCwYcOwevVqvP322ygsLGz0GGNjY2zfvh27d+9utoipC3d34B//kDsFaYK9e4HvvpM7hTysrKx0plg0Vyatra1RVVWlE9+oJNIGGlOwAOCtt95CQEAAXnvttSZ3cLe3t8eaNWuaLWJEmmbzZuDoUblTyEPXZrCaK1hA09s4EJF60aiCBQBr167F3bt38cknnzR5jEKhgL+/P2bPns1H6ZBWKCsDtHQD82fSpYL1rEXuAHTmd0Gk6TSuYFlZWWHnzp2IjY1FcnJyk8etW7cOd+7cwdKlSzsxHVHHKC1lwdIFza2zMjIygpmZmc78Log0ncYVLKBuB/clS5Zgzpw5Te7gXl/EYmJimi1iRJpAl2ewdOnbc88qk7r0uyDSdBpZsABgyZIlcHBwaHYH93HjxmHx4sXNFrFOpQLW+gL6hsD7qY/9vBpYOAiYvVeuYKTudLlg6dK3555VsHTpd0Gk6TS2YOnr60OpVCInJwcxMTFNHvfxxx/DwcEBc+fOVZtH6XSzAjYvAQpr5U5CmkLXC1ZVVRXKyrT/KdfPmqHSpdulRJpOYwsW8O8d3CMiIpCVldXoMfVF7OTJk80Wsc5k+zow40cg8mtAPSofqTtdL1iAbizurp+hauovgyxYRJpDowsWULeD+1tvvYXZs2fj4cOHjR7z+KN0cnNzOznh0/SsgE8igMPLgIxyudOQJmDB0p2CVVNTg0ePHjX5ui78Hoi0gcYXLABYtWoVzM3NERYW1uQx06dPxxtvvIFXXnmlySLWmQbPBeYbAEs2ASq5w8isqQ8TqlNTA1RV6W7B0qVvzz1rrytd2nSVSNNpRcEyNjbGjh07sHPnTmzfvr3J41pSxDqNMfDBcuBiNJBwR+4w8lCpVNi8eTPGjh2LmTNnYsiQIXJHUkv1S4/MzeXNISdd+faclZUV9PT0+MBnIi2gFQULqNvBffXq1Zg/fz4uXrzY6DEmJiYtKmJSqqwEfvih8de6/xb476HA0tVARaekUR8nT56Et7c3PvzwQ7z77rvYsWMHDA0N5Y6lluoLlq7OYAG68+05Q0NDmJubs2ARaQGtKVgAEBoa+swd3O3t7bFq1apmi1h7lZcDe/YAc+YAffoAb73VxIH6wLwVQM0GYM/dDomidq5du4aQkBB4enpi6NChKCgoQHh4OIyMjOSOprZYsHSrWDxrN3dd+T0QaTqtKlhA3Q7uP/30EyIjI5s8Zv78+Zg+fbqkj9IpKwMSE4GQkH+XKn19YOtWYN++pt9n6gn8cRpw71ffQK+pAZTKuv/VBuXl5YiOjsbIkSNx9epV5ObmQqlUolevXnJHU3ssWLpVLJr7s+rKrVIibaB1BcvKygpbt27FypUrkZKS0uRx69atw4QJE1Bb2/bNqEpKgN2760pV797AG2/U/Xz7duDmzbqCFBAAGBv/8gZ94J1vgZwlgEH9SfSAwC+BWhWwfca/z11YCCxaBIwdC3z7bZsjyk4IYOdOYMKEydi4cSOUSiWOHDmC0aNHyx1NY1RVXcaLLy6BLk/y6VKxaO52qK7cKiXSBlpXsADA29u7YQf3W7duNXqMtbU1Vq5cCVNT01ad++ef/12c+vQBPvgAMDUFvvwSuHHj36+1dzmRvT1QXAzMnAlMn153zqKi9p2zs+XmAhMn1s3mzZ27AefOncOMGTPkjqVxHjz4F3Jy/iJ3DFnpUrFobgbL2toaZWVlqKys7ORURNRaWlmwAGDp0qWwt7eXZAf369evY8uWDPj41M1ULV9eV4BSU4HLl4G4uLoC1KWLROF/YW4OREYCZ8/WlTh7eyAsDHjwQNrrSO3u3bqc7u7A4MF1s3HvvDMKxg1TedQaZWVlMNPl+4PgLcLHXwOa3saBiNSH1has+h3cs7Ky8Pnnn7f6/ZcvX8bq1avh5eWFAQMGIC7uf+DlBWRnA5cuAdHRwLhxgJ5eB4T/laFDgV27gAMHgCNHgOefBzZsANpxd7NDVFcDsbHAkCFAejpw7FjdjF7fvnIn02wsWLpVsJq7HWplZQVANzZdJdJ0WluwAKB///7YuHEjwsPDkZeX98zjf/jhB8TGxsLb2xu2trbYtGkTpkyZgqysLKSn78Of/gSMGdPxuZvi6wvk5dXNai1ZUjdDlJYmX57HpaTU/W7+93+Bzz8HMjMBT0+5U2kHFiyuwapnbm4OY2NjnfldEGkyrS5YAPCf//mfmDt3LoKCghrdwf3cuXOIjo5uKFUbNmzA5MmTkZ+fj3PnziEyMhIuLi4yJG9cly6AQgEUFABeXoCPDxAUVHerUi4//QTMmlW3XqywsG7Rf2fM7OkKFizdmsF61p+1W7duOvO7INJkWl+wAGDNmjUwMzPDe++9BwANxcne3h6jR4/G7t27MXnyZFy8eLHhtREjRsicunndu9fdjjtzBnj4sG59VmRk3R5cna1Xr7qCt3y5bu823lFKS0tZsLjI/YnXdeV3QaTJJF6WrZ5MTEywfft2uLu749ChQ7h58yYmTZqEBQsWYMaMGbCxsZE7Yps9/zxw8GDdHlzvvgts2gSsWAEEB3fuLJKOf/53KM5gPfntOW3/ssSzbofq0mwekSbTiYIFAA4ODigqKkJSUhKmTZuGnj17yh1JUgEBgJ8fsG4dsGABsHkzEBMj75oxkkZ5eTkL1mPfnuvTp4/MaTpWS2awWLCI1J9O3CKs17dvX4SEhGhduapnZFS3PcKFC8CIEYCbW916qCa2AiMNwRks3fr23KhRo/DVV181ub2MLi34J9JkOlWwdIWNTd3eXGlpdYvOn38eWLPml20dVMBaX0DfEHg/9bE3VQMLBwGz9/7yz78c5/AJ8MTTeqqB9+2AGZ3zrGwCCxagW9+es7KywvTp06HXxD1+rsEi0gwsWFrM3b1uP6rPPwe+++7JNVndrIDNS4BCNdtLi57GglWnuW/P3b9/H3/7298ke7aoOuMtQiLNwIKl5fT06ha8791b9/DperavAzN+BCK/Btq3zz11NBasOs3N3OTn5+Pjjz/G8OHDsW7dOq19lMydO3eQnp4OBwcHuaMQ0TOwYOkoPSvgkwjg8DIgQ4atHajlysrKtP6bcy3R3MyNh4cHLl++jMWLF2PFihUYNGgQoqOjUS7HviUdZM+ePXBwcEB5eTlef/11ueMQ0TOwYOmwwXOB+QbAkk2AqoljClYDfXsCPeuHDbD+x06NqfP8/f2xdu1aJCYmyh1FVs+6NWZsbAyFQoHi4mIsX74cf/nLXzB48GBER0ejrKysE5NKq6SkBKGhoZg9ezbee+89pKamws7OTu5YRPQMLFi6zBj4YDlwMRpIuNP4IUPmATmngFP1IxuY068TMxJCQ0MRFhaGoKAgBAQE4MqVK3JHkkVLvz1nZGQEhUKBoqIi/PnPf8bGjRsxePBgREZG4oG6Pyn9V/bv349Ro0bhzJkzyMvLQ3h4OAwMDOSORUQtwIKl47r/FvjvocDS1UBFI693sQT69Qf6149+QFf+971TGRoaIjw8HGfPnkVlZSUcHR0RGxuLWnV72ncHqqysxE8//YTvv/8e9+/fb9F7jIyMEBISgvz8fKxcuRI7duzAkCFDEBkZ2eJzyKV+1up3v/sdFi5ciLS0NDz//PNyxyKiVmDB0nX6wLwVQM0GYM9ducNQc4YMGYKkpCRs3LgRK1aswIQJE3D27Fm5Y3Wo2tpabNmyBSNGjMC5c+dQXFwMW1tb/PGPf2zxVgWGhoYICQnBuXPnsGrVKnz55ZcNRUsdtzs4cOAAHBwccPr0aeTm5nLWikhDsWARTD2BP04D7mnuMhWdEhgYiAsXLsDBwQHOzs4ICwtDaWmp3LEkl5KSAmdnZ3zwwQd4++23UVRUhMLCQsTHx+Obb77BgAEDEBYWhlst3Em3vmjl5+dj3bp12LlzJwYNGoSIiAi12PagftZq5syZWLBgAY4fP46RI0fKHYuI2koQkcY6evSoGDFihBgyZIhISkqSO44kkpOThaurqzA3Nxfh4eHi3r17Tx1TW1srEhIShIuLi+jatatYuHChuHHjRquuU1tbK3bt2iVGjhwpLCwsRHh4uLh7965Ef4rW2b9/v+jXr59wd3cX58+flyVDG4GDg6PxAUFEGq2srEwsW7ZMGBkZicDAQHH79m25I7VJRkaG8PHxEUZGRkKhULSoMKlUKpGQkNBQyBYuXCiuX7/equvWlzVnZ+c2l7W2unfvnlAoFMLExERERUWJmpqaTrmuhGT/EOPgUNcBQURa4cyZM8LDw0NYW1uLuLg4oVKp5I7UIufOnROBgYHCwMBABAYGiqKiojadJzk5WYwbN04YGxsLhUIhrl692qr3S1HWWqN+1srJyUnk5eV12HU6mOwfYhwc6jogiEhrqFQqERcXJywtLcXEiRPFhQsX5I7UpB9++EEoFArRpUsX4e/vL86cOSPJeZOTk4WHh0fDTNiVK1fadA43N7eGonXt2jVJsgnx71krQ0NDER4eLqqqqiQ7twxk/xDj4FDXwUXuRFpET08PCoUCFy5cQK9evTB27FhERkaq1TP6fvrpJ0RERGDEiBEoLi5GRkYGEhMT4ejoKMn5J0+ejPT0dHz77be4ePEihgwZgpCQEBQVFbXqHFlZWdi7dy8yMzNhZ2eH0NBQXLt2rV3ZDh48CEdHR2RmZiIrKwtRUVEwNDRs1zmJSE0JItJaCQkJYsCAAcLR0VGkp6fLmuXu3bsiPDxcmJmZCQ8PD3HkyJFOuW5qaqrw9fUVhoaGIjg4WFy8eLHV5zhw4IDw9PQUJiYm4r/+679aPaNVUlKiTbNWj5N9loCDQ10HZ7CItFhAQADy8/Mxbdo0jB8/HqGhoZ2+m3lpaSmio6MxZMgQ7Nu3D1u2bEF6ejpefPHFTrm+t7c3UlJScOTIEdy7dw/29vYICQlBYWFhi88xdepUnDhxAgkJCTh9+jR+/LHlz4s6dOgQHBwcOGtFpGsEEemEnJwc4eLiImxsbER8fHyHX6+yslLExcWJvn37isGDB4u4uDi1+Jbc8ePHhb+/f8Oi+o5ap6bFs1aPk32WgINDXQcEEemM6upqERMTI8zNzYW/v7+4fPmy5Neo31/Kzs5O9OvXT8TExIiKigrJr9NeJ06ceKJo5efnS3bugwcPiv79+4vRo0eL3Nxcyc6rhmT/EOPgUNfBW4REOqRLly4ICwvDmTNnUFVVBXt7e0RHR0v2XMOUlBSMHTsW8+fPh0KhwMWLFxEWFgZjY2NJzi8lT09PJCYmIicnBwDg4OCAgIAA5OXltfmc9+/fR2hoKH77299i9uzZOHnyJMaOHStVZCLSICxYRDrIzs4Ohw8fxt///nesWrUK48aNaygabZGSkgI3NzfMmDEDU6dORVFREcLDw2Fqaiph6o7h5OSEXbt2ITc3F6ampnBzc0NAQAByc3NbdZ7Dhw/D0dERGRkZyMzMRFRUFIyMjDooNRGpOxYsIh0WGBiIgoICuLq6wtPTE2FhYXj06FGL35+ZmQlfX19Mnz4dzs7OuHTpEqKiomBlZdVxoTvI6NGjsWvXLpw6dQrW1tZwd3dHQEAAsrOzm31f/axVQEAAXnvtNc5aEREAFiwinWdtbY24uDikpKQgKSkJTk5OOHz4cLPvOX/+PIKCguDl5YUePXrgwoULiIuLQ9++fTspdcdxcHCAUqlsKFoeHh6YMmUKsrKynjr28VmrjIwMzloRUQMWLCICAEyYMAF5eXkIDg7GSy+9hKCgINy+ffuJYy5fvozQ0FA4OTkBqCtau3btgq2trRyRO9SoUaOgVCpRUFAAOzs7eHl5YcqUKcjIyMCDBw+emrVydnaWOzIRqRE9IYSQOwQRqZfCwkLMnz8fp06dQlRUFGbMmIHVq1cjJiYG48ePR1RUFFxcXOSO2akKCwuxYsUK7NixA0OHDoWJiQm2bNnSUDZ1lJ7cAYjUFQsWETVKpVJh/fr1WLx4MUxMTDBs2DB8+umnGD9+vNzRZHXp0iXcv38fo0eP5oahLFhETWLBIqJm3b59GxUVFRg4cKDcUUj9sGARNYEFi4iI2ooFi6gJXOROREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYixYRERERBJjwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJPb/CH5uCVU2JPsAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<PIL.Image.Image image mode=RGBA size=600x200 at 0x7FF8471453C8>" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"MolsToGridImage(mols)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2\n" | |
] | |
} | |
], | |
"source": [ | |
"pattern = mols[0] # MolFromSmiles\n", | |
"matches = [smiles_list[i] for i, m in enumerate(mols) if m.HasSubstructMatch(pattern)]\n", | |
"print(len(matches)) # result: 1, why not 2?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"However, if you change the location of double bonds writing a different Kekule structure for the benzene ring, again the match will fail as single and double bonds will not match anymore across the structures:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"smiles_strings = '''\n", | |
"C12=CC=CN1NCCC2\n", | |
"C12C=CC=C(C=C3)C=1N3NCC2\n", | |
"'''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['C12=CC=CN1NCCC2', 'C12C=CC=C(C=C3)C=1N3NCC2']\n" | |
] | |
} | |
], | |
"source": [ | |
"smiles_list = smiles_strings.splitlines()[1:]\n", | |
"print(smiles_list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"params = Chem.SmilesParserParams()\n", | |
"params.sanitize = False" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mols = [Chem.MolFromSmiles(x, params) for x in smiles_list]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"C1=CN2NCCCC2=C1\n", | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 N False\n", | |
"5 N False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 C False\n", | |
"0 1 DOUBLE False\n", | |
"1 2 SINGLE False\n", | |
"2 3 DOUBLE False\n", | |
"3 4 SINGLE False\n", | |
"4 5 SINGLE False\n", | |
"5 6 SINGLE False\n", | |
"6 7 SINGLE False\n", | |
"7 8 SINGLE False\n", | |
"4 0 SINGLE False\n", | |
"8 0 SINGLE False\n", | |
"\n", | |
"C1=CC2=C3C(=C1)C=CN3NCC2\n", | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 C False\n", | |
"5 C False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 N False\n", | |
"9 N False\n", | |
"10 C False\n", | |
"11 C False\n", | |
"0 1 SINGLE False\n", | |
"1 2 DOUBLE False\n", | |
"2 3 SINGLE False\n", | |
"3 4 DOUBLE False\n", | |
"4 5 SINGLE False\n", | |
"5 6 DOUBLE False\n", | |
"4 7 SINGLE False\n", | |
"7 8 SINGLE False\n", | |
"8 9 SINGLE False\n", | |
"9 10 SINGLE False\n", | |
"10 11 SINGLE False\n", | |
"7 0 DOUBLE False\n", | |
"11 0 SINGLE False\n", | |
"8 6 SINGLE False\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"for m in mols:\n", | |
" Chem.SanitizeMol(m, Chem.SANITIZE_ALL ^ Chem.SANITIZE_SETAROMATICITY)\n", | |
" print(Chem.MolToSmiles(m))\n", | |
" for a in m.GetAtoms():\n", | |
" print(a.GetIdx(), a.GetSymbol(), a.GetIsAromatic())\n", | |
" for b in m.GetBonds():\n", | |
" print(b.GetBeginAtomIdx(), b.GetEndAtomIdx(), b.GetBondType(), b.GetIsAromatic())\n", | |
" print()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAADICAYAAAA0n5+2AAAsZUlEQVR4nO3deVRV5eI+8AeQQVEEccwRnBlEBkEChxAjFcqbXzBDKMtA66tkw8W0lLzXgvU1hfqloVfTY9pVbmZCDojeEhRUBmf0AKY5mwYOjMJ5f3+cIE1Ahs3Z5xyez1p7teLs4emsisd3v/vdBkIIASIiosYzkDsAkbYylDsAERERkb5hwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYixYRERERBJjwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGJt5A5ARPrrxo0bOHDgAExNTdGuXTu0b98exsbGsLKygomJCczNzWFubg4TExO5oxIRScpACCHkDkFE+uf+/ftwcXFBeXk5VCoViouLUVxcjIqKilr3t7S0hLGxMTp06IC2bdvCzMwMFhYWMDY2RseOHWtKWocOHWBsbAxLS8vHSlpd5zAxMYGFhYWGv4FWwUDuAETaiiNYRNQivvvuOxQXF+PXX3+FkZHRI58VFRXhwYMHuHfvHkpLS1FWVoY7d+7gwYMHuHv3LsrKylBaWop79+7hwYMHKCoqQkVFBYqLi3H9+nVUVFTU/Oz+/fsoKSlBeXn5I+eojZmZGdq2bYuOHTvi22+/xciRIzXxVRBRK8QRLCJqEePGjYObmxtiYmJkuX59JW3Lli3IyclBZmamLNn0CEewiOrAgkVEkrty5Qr69u2LnJwcODo6yh3nMWfOnIGDgwOuXLmCHj16yB1Hl7FgEdWBTxESkeQ2bNiA4cOHa2W5AgA7Ozv06dMHKSkpckchIj3FgkVEktu4cSNCQ0PljlEvPz8/7N69W+4YRKSnWLCISFKHDx9Gfn4+pk6dKneUevn5+SE5ORkqlUruKESkhzgHi4gk9dZbb+Hy5cv44YcfHvtswYIFyM/Pb/DyCn9doqF9+/Y1+zfX3bt30blzZ6SlpcHd3b3Z52ulOAeLqA5cpoGIJFNRUYGtW7fiq6++qvVzR0dHCCFqlmi4detWg5ZoqE1jSpqtrS3ef//9R463sLDA008/jd27d7NgEZHkOIJFRJLZtm0bZs6ciWvXrsHU1FSy81Yvr1C9UOnD62jVtgbWX0ta+/btsXz58sfO++mnnyIxMRGHDh2SLGsrwxEsojqwYBGRZCZPnoynnnoKK1eulDtKg+Tk5GDEiBG4ceMGrK2t5Y6ji1iwiOrASe5EJInbt29j165dWv/04MOGDx+Orl27crkGIpIcCxYRSeK77/bCxsZGp14/Y2BggGeffRZ79uyROwoR6RkWLCKSxJo1L+G1147KHaPRnnvuOezatQucLUFEUuIcLCJqttxcwN4eKCgAbGzkTtM4hYWF6NKlCzIzMzF8+HC54+gazsEiqgNHsIio2TZsAMaO1b1yBQBWVlYYMWIEV3UnIkmxYBFRs6hUwObNgA7NbX+Mn58f52ERkaRYsIioWfbtA37/HZgyRe4kTTdhwgQcPHgQd+7ckTsKEekJFiwiahaFAnjxRaBDB7mTNN2IESNgaWmJ//73v3JHISI9wYLVilVVVeGrr77CBx98gO+++07uOKSD7t8Htm8HQkLkTtI8hoaG8PX15TwsIpIMC1Yr9dNPP8HV1RWLFi3CjRs38Morr8DHxwfHjx+XOxrpkIQEoGNHwMdH7iTN5+fnh507d8odg4j0BAtWK3Pp0iWEhobi2WefhYeHB86cOYN169ZBqVRi4MCBcHNzQ2hoKG7evCl3VNIBGzeqR6+MjORO0nx+fn64fPkycnNz5Y5CRHqABauVKCkpQVRUFAYNGoRr164hJycH8fHx6Ny5MwDgqaeeQnx8PNLT01FQUIDBgwcjJiYG5eXlMicnbXXxIvDzz0BwsNxJpNG9e3cMHz6ctwmJSBIsWHpOCIGEhAQMHToUmzdvRkJCAvbu3Qt7e/ta93dzc0NaWhpWr16NlStXYtiwYUhKStJwatIFGzcCrq6Ag4PcSaQzYcIEFiwikgQLlh7LzMzEqFGjEBYWhjfffBMnT56Ev7//E48zMDBAYGAgcnNzMW3aNAQGBmL8+PE4ffq0BlKTrti0Sfcnt/+Vn58fDhw4gJKSErmjEJGOY8HSQ1evXkV4eDg8PT1ha2uLs2fPIjIyEqampo06T7t27RAVFQWlUokePXrA2dkZERERKCoqapngpDNu3wa6dAGmTZM7ibSefvppmJqa4qeffpI7ChHpuFZVsK5fvw6FQoFbt27JHaVFVFRUIC4uDkOHDsW5c+dw9OhRKBQKdOvWrVnn7d27NxQKBZKTk/Hzzz+jf//+iIuLQ1VVlUTJSddYWwMHDgB/TOHTG23atMG4ceO4qjsRNVurednzqVOn4OHhASsrK1y/fh1jx47FlClTMHnyZPTo0UPueM2WmJiIt99+GxUVFVi6dClCQkJgYCD9e1hVKhW++eYbvP/++3jqqacQFxeH0aNHS34dIrmsWbMGy5Ytw7lz5+SOogv4smeiOrSKEayysjIEBwdj2rRpuHz5Mo4fPw5vb2988cUX6NWrF9zc3BAVFYX8/Hy5ozZabm4uJkyYgJdeegkhISFQKpUIDQ1tkXIFqBdkDA0Nxblz5+Dn54fx48cjICAAv/zyS4tcj0jTJkyYAKVSiYKCArmjEJEOaxUFa968eSgpKcGKFSsAAPb29oiKisKZM2dw4sQJBAYGIiUlBQMHDqz5TNv/9Pr7778jIiICTk5O6NChA86cOYOoqCi0bdtWI9e3tLREdHQ0Tp48CSEE7O3tMX/+fNy7d08j1ydqKb169YK9vT2fJiSiZtH7W4Tff/89pk2bhvT0dDg7O9e774ULF/DDDz8gISEBhw4dwtChQxEYGIiAgAC4urpqKHH9KisrsW7dOixcuBB9+/ZFbGwsvL295Y6FlJQUvP322ygsLMTixYsxc+ZMGBq2iv5Oeui9996DUqnEjh075I6i7XiLkKguQo9dunRJWFtbi9jY2EYfe/HiRREbGyt8fX2FkZGRsLW1FXPnzhWpqalCpVK1QNonS0lJEQ4ODqJHjx4iPj5eVFVVyZKjLhUVFSI2NlZ07NhRjBgxQhw8eFDuSNQcVUJ86SOEQRsh5h146OcVQszpI8TL38sVrOXt3btXmJubi7KyMrmjaDtw48at9k1vhxhUKhVCQ0Ph7u6OuXPnNvr4Pn36ICIiAnv37sX169exePFinD9/Hj4+PrCxsUFERATS0tIgRMsPAObl5SEoKAgTJ06Ej48Pzp49i7CwMK0bITI2NkZERAQKCgrg6emJMWPGICgoCJcuXZI7GjVDR0tg3UJA2YoeGh09ejQMDAyQlpYmdxQi0lHa9RtaQkuWLMGZM2fw9ddfN3vCd+fOnREaGorExERcv34dS5Yswfnz5zFu3Dj07dsX4eHhSExMRGVlpUTp1e7fv4+oqCg4OjqitLQUubm5iIuLg4WFhaTXkZq1tTXi4uJw5MgR3LhxA0OHDkVUVBTKysrkjkZNYPMqMPlXIOp7QK/nEzzExMQEY8eO5TwsImoyvSxYaWlp+OSTT/DNN9/UugZUYWFhk8/dqVOnmrJ148YNfPbZZygtLcW0adPQvXv3ms8ePHjQ5GuoVCooFAoMGDAAW7Zswfbt25GYmAhbW9smn1MOzs7O+Pnnn/Htt99CoVBg0KBBUCgUcseiRjKwBD6aD+xZDGSUyp1Gc/z8/FiwiKjJ9K5gFRUVISQkBO+99x58fX1r3ee5557D4MGD8cEHHyArK6vJ17K0tERgYCAUCgVu3ryJr7/+GgAQHByMbt261ZStxrww+fDhw3j66acxb948fPDBBzh58iSee+65JmfUBgEBATh9+jTeeust/O///i98fHxw4sQJuWNRI/SbAcwyAhauBVRyh9GQSZMm4dSpU/j111/ljkJEOkjvCtbs2bPRpUsXREVF1bnPzp07sXDhQpw6dQqenp7o27dvzZwqlappvz7atWuHgICAmrK1YcMGAEBISAg6depU89n9+/drPf7y5csIDQ3FqFGj4OTkhHPnziEiIgJt2rRpUh5t07ZtW0RGRiI3Nxe9evWCi4sLQkNDcfPmTbmjUUOYAu8uAfJigB36+SKEx9jY2GDAgAFITk6WOwoR6SC9Kljx8fFISkrCpk2bYGJiUud+1tbWj9zm+8c//oHz58/D19f3kZGnpt7mMzMzqylUt2/fxp49e2Bra4u///3v6Nq1a81nd+/eRUlJCWJiYjB06FBcvXoVWVlZiI+PR2d9ewfJH3r27AmFQoH09HTk5+djyJAhiImJQUVFhdzR6Ak6PQ/8fQCwaDnQWmbTTZgwga/NIaKmEXri9OnTol27duKbb75p8jmKi4vFjh07REhIiGjfvr3o1KmTCAkJETt27BDl5eXNzvjgwQOxd+9eER4eLrp16ybMzMzEwIEDxcCBA8WOHTuafX5dU1VVJdauXSu6d+8upkyZIioqKuSORA/7Y5kGl38KUfnHj0oOCjHUQghrc/1epkEIIYqKisSkSZPEoEGDRFhYmLh8+bLckbSR7I/Cc+OmrRuEHigrKxNOTk5ixowZkp2zpKSkpmxZWFgIS0tLERISIrZu3Sru37/f7PNXVlaKTz/9VHTq1EmS8qbLrl27JgCI3NxcuaO0eo8srVZLwRIqIba+JIShgX4XrF27dolevXoJZ2dnsWrVKjFy5EhhZmYm5syZIy5duiR3PG0i+y8xbty0dYPQA2+99ZYYMGCAuHv3boucv7S0VOzYsUOEhYWJrl27inbt2gl/f3+xYcOGZl0zMTFRdOvWTcKkuunevXssWFpg/XohHB2FaMq/0vryZ4Q7d+6IsLAwYWxsLCIjIx/5w09qaqrw8fERJiYmIiQkROTn58uYVGvI/kuMGzdt3XR+DtbOnTuxZs0abNq0CR06dGiRa1TPqYqPj8fVq1dr5lRFRkY+Mqfqzp07LXJ9opZ07RoweTLw5ptAcDBgbt6442/fBgYNAr79tkXiaUxycjIcHByQnp6OjIwMREdHPzKX09vbG/v27cO+fftQWFiIoUOHIjQ0FHl5eTKmJiJtpdMF68qVK3jllVcQHR0Nd3d3jVzTyMgI3t7eiIuLw5UrV7B3717Y2tpiwYIFsLa2rvnsxo0bGslD1FRCAKtXA0OGAGVlQG4uEBkJNPYFAdbWwJIlwOuvA+HhQDOWgJPF3bt3ER4eDn9/f7z88svIzMyEi4tLnft7e3sjMTER+/fvR2FhIezt7REaGgqlUqnB1ESk9YSOqqqqEj4+PsLPz0+2dwP+NU9mZqZYvHixGDhwoDAyMhJeXl4iNjZWXL16tdZjeItQjbcINe/8eSHGjRPC0lKI+HghpPhPKCtLiH79hBg1Sohr15p/Pk3Ys2eP6NOnj3B0dBSZmZlNOsfBgweFv7+/MDIyEoGBgeLs2bMSp9Rqst+G4cZNWzedHcFaunQpTp06hfXr1zf7VThSMDQ0hKurK6KioqBUKpGRkYFRo0bhyy+/RO/evbF582a5IxJBpVKPWg0bBrRtC5w6BYSFAVL8J+TiAhw9CpiaAm5uQHp688/ZUh4etZo2bRoyMzPh6urapHM9/fTTSExMRGZmJgDA3t4eQUFByM3NlTIyEekaoYMOHz4sTE1NRXJystxRGuT48ePixo0bj/2cI1hqHMHSjFOnhPDwEKJrVyE2bGi561RWChEZKYSxsRDR0S13naZKTk4Wffr0EQ4ODk0etarPsWPHRGBgoDAyMhL+/v4iKytL8mtoEdlHCbhx09ZN50awioqKMHXqVERERGD8+PFyx2mQYcOGoWvXrnLHoFZMpQICA4EBA4AzZ4DQ0Ja7lpEREB0NJCQAn34KhIQAJSUtd72GKikpQUREBCZNmtSoUav09HRkZ2c3+DpOTk7YunUrcnJyYGVlBQ8PDwQEBDTrtVxEpHt0rmC9+eabsLa2xj/+8Q+5oxDpDEND4OBB4Jtv1JPSNeGFF4DDh4HsbMDbG/jlF81ctzapqalwcnLC/v37cejQIURHR8PU1LRBx+7evRvu7u4ICAjAkSNHGnxNR0dHKBSKmqI1cuRIBAQE4OjRo039xyAiHaJTBWvNmjVITEzE5s2b630VDhE9zspK89ccPBjIyABsbIARIwBNv9avpKQE8+fPh6+vL6ZMmYLMzEy4ubk16hwff/wxlEolnnrqKXh5ecHb2xv//e9/G3y8g4MDFAoFjh8/DisrK3h6emL8+PGNKmtEpHt0pmDl5eXhnXfewapVqzBo0CC54xBRA3XoAPznP8D77wMBAUBUlPqWZUtLS0uDk5MTkpKScPDgwUaNWv2Vra0t4uPjoVQqYW9vj2effRbe3t7Yv39/g89hZ2cHhUIBpVIJW1tbeHl5Yfz48cjIyGhSJiLSbjpRsMrLyxEUFIQXX3wR06dPlzsOETWSgYF6ja3t24GkpGJMn/467t271yLXqh61GjduHKZMmYKsrKxGj1rVxcbGBvHx8cjPz4erqysmTpxYsy5WQz1c1mxtbTFq1KhGlzUi0n46UbDef/993Lt3D1988YXcUYioGSZMALZtu438/JNwc3PD6dOnJT1/Wloahg8fjqSkJKSlpTVr1Ko+ffv2RVxcHJRKJVxdXREUFNToolVd1vLy8ppc1ohIe2l9wdq1axfi4+OxadMmWFhYyB1HUsbGlrC1dZI7BmkjFbByHGBoDLyT+tDPHwBz+wLB2x/dz+EjoBKP7veOLTB5k8YSN1ifPn1w4MABeHt7w9PTE//5z3+afc6HR61efPFFZGVlYcSIERKkrV+fPn0eKVpTp06tWRdLCNGgc/Tr1w9xcXE4d+5ck8saEWkfrS5YV65cQWhoKJYuXQoPDw+540juwQNvnD+/R+4YpMU6WgLrFgLKKrmTSMvMzAxr167FsmXLMH36dMyfPx9VVU37h9TUqFV9evfujbi4OFy4cAGjR4/GSy+9BBcXFyQkJDS4aP11VGzq1Knw8vJi0SLSUVpbsFQqFV555RW4uLjgnXfekTsOkSxsXgUm/wpEfQ807Ne0bgkLC8O+ffugUCjg6+uLmzdvNvjY0tJSWUat6tO1a1dER0fjwoUL8PPzw6uvvorhw4c3qmhVl7XqW4dBQUHw9PTE7t27Wzg9EUlJawvWp59+ipMnT2L9+vUwbOzbZ4n0hIEl8NF8YM9iIKNU7jQtw8vLC8eOHYOBgQHc3NwatHzBwYMH4eTkhMTERKSmpsoyalWfLl26IDo6GhcvXsTf/vY3vPHGG3BycoJCoWjwSF3Pnj3x+eef4/z58xg5ciQWLVrE1+8Q6RCtbC5Hjx7FkiVLsHbtWvTo0eOxz3NzczlsTq1GvxnALCNg4VqgrtUNzi0HuncGOldvPYCvftVozGbp2rUrkpOT8fLLL2PMmDFYu3ZtrftVj1r5+PjgxRdfRHZ2Ntzd3TWctuE6d+6MqKgoFBQU4MUXX0RERESji1aPHj2wYsUK3LhxgwWLSIdoZcGaOXMm3nzzTfj7+9f6+aFDhxAcHIz8/HwNJyOSgSnw7hIgLwbYcav2XfrPBLKOAceqt0xgek8NZpRAmzZtEB0djX/961+YO3cuwsPDUVFRUfP5oUOHMHz4cHz33XdISUnRulGr+lhbWyMqKgoXL15ESEgI3n77bQwbNqxRRcvKygqFhYUtnJSIpKKVBetJXn/9dfj7++Oll1565H/ARPqq0/PA3wcAi5YDZbV83sYC6NkL6FW99QTaG2k8piSCg4Nx8OBBpKSkwMvLC0qlEvPnz8eYMWMwduxYHD9+HKNGjZI7ZpNYWFggMjISFy9eRGhoKObNm4dBgwZh9erVqKysrPdYKysrFBUVaSYoETWbVhasf/3rX1i5ciWSkpLq3GfVqlX4/fff8eGHH2owGZFMDIGZS4HK1cC223KHaXnDhw/HkSNHYGVlBV9fX3z//fdITU1FfHw82rVrJ3e8ZuvQoQMiIyPxyy+/YMaMGViwYAHs7Oywfv36OosWR7CIdItWFqwRI0Zg0aJFeP3113Ht2rVa9+nYsSP+/e9/Iy4uDj/++KOGExJpXltP4OOJQGGJ3Ek0w9raGklJSbh8+TI2btyIkSNHyh1JchYWFvjwww/xyy+/4LXXXsOXX35Z59OGLFhEusVANPTZYQ1TqVR49tlnYWRkhF27dtX5JOE///lPfPHFFzh+/Di6d++u4ZTNk5QEzJwJXL8udxJ53b9/Hx06dEBubi6GDBkidxzSMh07dsT27dvxzDPPyB1FVu+++y6uXbuGzZs3yx3lYQZyByDSVlo5ggUAhoaG2LBhA7Kzs7F8+fI691uwYAEcHR3x6quvNnidGSLSHZaWlhy5AedgEekarS1YgHodGIVCgYULF+Lw4cO17lNdxLKysuotYkSkm3hrTI3fA5Fu0eqCBQATJkxAeHg4goODcffu3Vr36dmzJzZs2IAFCxbUWcSISDsdOnQIgwcPrvNzjtyosWAR6RatL1gA8H//93/o0KED5syZU+c+EydORFhYWL1FjIi0j6mpKfLz8+tcD4rFQo23Sol0i04ULFNTU2zduhXbtm3DN998U+d+y5YtQ4cOHTB37lwNpmu6Ll2AsWPlTkEkLysrK6hUqjr/YMSCpcaRPCLdohMFCwAGDhyI5cuXY/bs2VAqlbXuY2pqik2bNiEhIaHeIqYtPDyAf/9b7hSkC7ZvB37+We4ULcPKygoA6iwPlpaWLBZQf08VFRUoLi6WOwoRNYDOFCwAeOONNxAQEICXX365zhXc7ezssGLFinqLGJGuWbcO+OknuVO0jI4dO8LQ0LDOUSqOYKk9qYgSkXbRqYIFACtXrsTt27fx0Ucf1blPWFgY/P39ERwczFfpkF4oKQH0YAHzWhkaGsLCwoIF6wmqCxa/CyLdoHMFy9LSElu2bEFcXBz27t1b536rVq3CrVu3sGjRIg2mI2oZxcX6W7CA+idws2CpmZiYoF27dvwuiHSEzhUsAHB3d8fChQsxffp0XK9jGfTqIhYbG1tvESPSBfo8ggXUX6L49Nyf+F0Q6Q6dLFgAsHDhQjg4ONS7gru7uzsWLFhQbxHTKBWwchxgaAy8k/rQzx8Ac/sCwdvlCkbarjUUrLrmFvHpuT/xuyDSHTpbsAwNDaFQKJCVlYXY2Ng69/vwww/h4OCAGTNmaM2rdDpaAusWAsral/0hekxrKFj13SKsqKhASUkrect1PXi7lEh36GzBAv5cwX3+/Pk4cuRIrftUF7GjR4/WW8Q0yeZVYPKvQNT3gHZUPtJ2rb1gAZzcDbBgEekSnS5YgHoF9zfeeAPBwcG4d+9erfs8/Cqd7OxsDSd8nIEl8NF8YM9iIKNU7jSkC/S9YD1pkjvAggWwYBHpEp0vWADw2WefwdzcHBEREXXuM2nSJLz22muYOnVqnUVMk/rNAGYZAQvXAiq5w8js/v37ckfQapWVQEWFfhes+uYW8em5P3HRVSLdoRcFy9TUFJs3b8aWLVuwadOmOvdrSBHTGFPg3SVAXgyw45bcYeShUqmwbt06ODs7Y8qUKejfv7/ckbRS9dQjc3N5c7SkJ43M8Ok5NY5gEekOvShYgHoF9+XLl2PWrFnIy8urdR8zM7MGFTEplZcDFy7U/lmn54G/DwAWLQfKNJJGexw9ehTe3t5477338Pbbb2Pz5s0wNjaWO5ZWqi5Y+j6CVV9x4NNzaixYRLpDbwoWAISHhz9xBXc7Ozt89tln9Rax5iotBbZtA6ZPB7p1A954o44dDYGZS4HK1cC22y0SRetcuXIFoaGh8PT0xIABA3Du3DlERkbCxMRE7mhaqzUUrCeNULFYqPF7INIdelWwAPUK7r/99huioqLq3GfWrFmYNGmSpK/SKSkBEhOB0NA/S5WhIbBxI5CUVPdxbT2BjycChX95Ar2yElAo1H/VB6WlpYiJicHQoUNx+fJlZGdnQ6FQoEuXLnJH03qtoWBVj1DVtZQKi4Uab5US6Q69K1iWlpbYuHEjli1bhpSUlDr3W7VqFUaPHo2qqqYvRlVUBCQkqEtV167Aa6+pf75pE3D9urogBQQApqZ/HGAIvLkPyFoIGFWfxAAI/BaoUgGbJv95bqUSeP99wNkZ2LevyRFlJwSwZQswerQv1qxZA4VCgf3792PYsGFyR9MZFRUX8cwzC6HPg3xWVlaorKys84EHFgs13iol0h16V7AAwNvbu2YF9xs3btS6j5WVFZYtW4a2bds26ty///5ncerWDXj3XaBtW+Dbb4Fr1/78rLnTiezsgPPngSlTgEmT1OcsKGjeOTUtOxsYM0Y9mjdjxmqcPn0akydPljuWzrl79xdkZf0/uWO0qOqlGLiae/2srKxQUlKC8vJyuaMQ0RPoZcECgEWLFsHOzk6SFdyvXr2K9esz4OOjHqlaskRdgFJTgYsXgfh4dQFq00ai8H8wNweiooBTp9Qlzs4OiIgA7t6V9jpSu31bndPDA+jXTz0a9+ab9jCtGcqjxigpKUE7fb4/CPUIlYGBAV/4/ARPKqJEpD30tmBVr+B+5MgRfP75540+/uLFi1i+fDm8vLzQu3dvxMf/E15eQGYmkJ8PxMQA7u6AgUELhP+LAQOArVuBnTuB/fuBIUOA1auBZtzdbBEPHgBxcUD//kB6OnDggHpEr3t3uZPpttZQsIyNjWFubs6C9QSWlpYAuOgqkS6QeMxFu/Tq1Qtr1qzBtGnTMHr0aDg7O9e7/4ULF/DDDz8gISEBhw4dwtChQxEYGIjPP/8crq6uGkpdt3HjgJwcYN06YOFCdcmKjQW8veVOBqSkqEetioqAzz8HQkI0Uz5bg9ZQsID651lxDpaaubk5TE1N+V0Q6QC9HcGq9re//Q0zZsxAUFBQrSu4nz59GjExMfD29oaNjQ1Wr14NX19f5Obm4vTp04iKitKKclWtTRsgLAw4dw7w8gJ8fICgIPWtSrn89hswbZp6vphSqZ70z3IlndZSsJ70PkKWCrWOHTvyuyDSAXpfsABgxYoVaNeuHebNmwcANcXJzs4Ow4YNQ0JCAnx9fZGXl1fz2eDBg2VOXb9OndS3406cAO7dU8/PiopSr8GlaV26qAvekiX6vdq4XIqLi1tNweIk9yfjd0GkG/T6FmE1MzMzbNq0CR4eHti9ezeuX7+OsWPHYs6cOZg8eTJ69Oghd8QmGzIE2LVLvQbX228Da9cCS5dq/hZdK/j9LxuOYD369Fxrf1iCo3lEuqFVjGABgIODAwoKCvDJJ5/g+vXrSElJwezZs3W6XD0sIADIzQXeew+YMwd45hng2DG5U5EUSktLWbBa0dNzCQkJCAgIqHONPhYsIt3QagoWAHTv3h2hoaHo3Lmz3FFahImJeqL52bPA4MHAiBHq+VB1LAVGOqK1jGA9aZI7oN9PzxUWFiI8PByvvPIKvOt5coUT/ol0Q6sqWK1Fjx7qtbnS0tSTzocMAVas+GNZBxWwchxgaAy8k/rQQQ+AuX2B4O1//P0f+zl8BDzytp4HwDu2wGTNvCub0HoKVn1zi/T96bmkpCQ4ODjg5MmTyM7ORmRkJIyMjGrdl3OwiHQDC5Ye8/BQr0f1+efAzz8/OieroyWwbiGg1LK1tOhxralg1Veg9PHpuepRq8DAQMydOxepqakYMmRIvcfwFiGRbmDB0nMGBuoJ79u3q18+Xc3mVWDyr0DU90Dz1rmnlsaC9efn+jRyUz1qdeLECeTk5NQ7alXt1q1bSE9Ph4ODg4ZSElFTsWC1UgaWwEfzgT2LgQwZlnaghispKWkVT84NGTIEzz33XJ2f68vITVFR0SOjVmlpaU8ctQKAbdu2wcHBAaWlpXj11VdbPigRNQsLVivWbwYwywhYuBZQ1bHPueVA985A5+qtB/DVrxqN2er5+/tj5cqVSExMlDtKi3Jzc8Py5cvr/FwfCtaPP/4Ie3v7Ro1aVRey4OBgzJs3D6mpqbC1tdVQYiJqKhas1swUeHcJkBcD7LhV+y79ZwJZx9RLPhw7BhzLBKb31GBGQnh4OCIiIhAUFISAgABcunRJ7kiy0OWn56pL0v/8z/80atSqKYWMiLQDC1Yr1+l54O8DgEXLgbJaPm9jAfTsBfSq3noC7fn/d40yNjZGZGQkTp06hfLycjg6OiIuLq7OdZL0UXl5OX777TecPHkSd+7ckTtOo/z4449wcHDA8ePHn/iEYLWmFjIi0h4sWK2dITBzKVC5Gth2W+4wVJ/+/fsjOTkZa9aswdKlSzF69GicOnVK7lgtqqqqCuvXr8fgwYNx+vRpnD9/HjY2Nvj444+1fsL7wyVpzpw5OHjwIIYOHfrE43bu3NnoQkZE2ocFi9DWE/h4IlBYIncSaojAwECcPXsWDg4OcHFxQUREBIqLi+WOJbmUlBS4uLjg3XffxezZs1FQUAClUokNGzbghx9+QO/evREREYEbWriSblNKUnUhmzJlSqMKGRFpKUFEOuunn34SgwcPFv379xfJyclyx5HE3r17hZubmzA3NxeRkZGisLDwsX2qqqrEjh07hKurq2jfvr2YO3euuHbtmubD/kVhYaEICwsTZmZmIjo6WlRWVjbouB9//FH07NlTeHh4iDNnzrRwSkmBGzdutW8QRKTTSkpKxOLFi4WJiYkIDAwUN2/elDtSk2RkZAgfHx9hYmIiwsLCGlSYVCqV2LFjR00hmzt3rrh69aoG0j6uuiS5u7s3uCQ1tZBpEdl/iXHjpq0bBBHphRMnToiRI0cKKysrER8fL1QqldyRGuT06dMiMDBQGBkZicDAQFFQUNCk8+zdu1e4u7sLU1NTERYWJi5fvixx0tpVlyRjY2MRGRkpKioqGnRcdSFzcnISOTk5LRuy5cj+S4wbN23dIIhIb6hUKhEfHy8sLCzEmDFjxNmzZ+WOVKcLFy6IsLAw0aZNG+Hv7y9OnDghyXn37t0rRo4cWTMSdunSJUnOW5udO3eKXr16NaokNbWQaSnZf4lx46atGye5E+kRAwMDhIWF4ezZs+jSpQucnZ0RFRWFiooKuaPV+O233zB//nwMHjwY58+fR0ZGBhITE+Ho6CjJ+X19fZGeno59+/YhLy8P/fv3R2hoKAoKCiQ5PwDcuXMH4eHheOGFFxAcHIyjR49i+PDhTzxu165dcHR0xOHDh3HkyBFER0fD2NhYslxEpEUEEemtHTt2iN69ewtHR0eRnp4ua5bbt2+LyMhI0a5dOzFy5Eixf/9+jVw3NTVVjBs3ThgbG4uQkBCRl5fXrPM1ZdSqqKhIn0atHib7KAE3btq6cQSLSI8FBAQgNzcXEydOxKhRoxAeHo67d+9qNENxcTFiYmLQv39/JCUlYf369UhPT8czzzyjket7e3sjJSUF+/fvR2FhIezs7BAaGgqlUtmk8yUkJGD27NnIzMxs0KjV7t274eDgwFErotZGEFGrkJWVJVxdXUWPHj3Ehg0bWvx65eXlIj4+XnTv3l3069dPxMfHa8VTcgcPHhT+/v41k+pbap6aHo9aPUz2UQJu3LR1gyCiVuPBgwciNjZWmJubC39/f3Hx4kXJr1FVVSW2bt0qbG1tRc+ePUVsbKwoKyuT/DrNdejQoUeKVm5urmTn3rVrl+jVq5cYNmyYyM7Oluy8Wkj2X2LcuGnrxluERK1ImzZtEBERgRMnTqCiogJ2dnaIiYmR7L2GKSkpcHZ2xqxZsxAWFoa8vDxERETA1NRUkvNLydPTE4mJicjKygIAODg4ICAgADk5OU0+Z/Xk9+eff75m8ruzs7NUkYlIh7BgEbVCtra22LNnD77++mt89tlncHd3rykaTZGSkoIRI0Zg8uTJmDBhAgoKChAZGYm2bdtKmLplODk5YevWrcjOzkbbtm0xYsQIBAQEIDs7u1Hn2bNnDxwdHZGRkYHDhw8jOjoaJiYmLZSaiLQdCxZRKxYYGIhz587Bzc0Nnp6eiIiIwP379xt8/OHDhzFu3DhMmjQJLi4uyM/PR3R0NCwtLVsudAsZNmwYtm7dimPHjsHKygoeHh4ICAhAZmZmvcdVj1oFBATg5Zdf5qgVEQFgwSJq9aysrBAfH4+UlBQkJyfDyckJe/bsqfeYM2fOICgoCF5eXrC2tsbZs2cRHx+P7t27ayh1y3FwcIBCoagpWiNHjsT48eNx5MiRx/Z9eNQqIyODo1ZEVIMFi4gAAKNHj0ZOTg5CQkLwwgsvICgoCDdv3nxkn4sXLyI8PBxOTk4A1EVr69atsLGxkSNyi7K3t4dCocC5c+dga2sLLy8vjB8/HhkZGbh79+5jo1YuLi5yRyYiLWIghBByhyAi7aJUKjFr1iwcO3YM0dHRmDx5MpYvX47Y2FiMGjUK0dHRcHV1lTumRimVSixduhSbN2/GgAEDYGZmhvXr19eUzVbKQO4ARNqKBYuIaqVSqfDVV19hwYIFMDMzw8CBA/HJJ59g1KhRckeTVX5+Pu7cuYNhw4ZxwVAWLKI6sWARUb1u3ryJsrIy9OnTR+4opH1YsIjqwIJFRERNxYJFVAdOciciIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQSY8EiIiIikhgLFhEREZHEWLCIiIiIJMaCRURERCQxFiwiIiIiibFgEREREUmMBYuIiIhIYixYRERERBJjwSIiIiKSGAsWERERkcRYsIiIiIgkxoJFREREJDEWLCIiIiKJsWARERERSYwFi4iIiEhiLFhEREREEmPBIiIiIpIYCxYRERGRxFiwiIiIiCTGgkVEREQkMRYsIiIiIomxYBERERFJjAWLiIiISGIsWEREREQS+/+6rl5Qw5FTyAAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<PIL.Image.Image image mode=RGBA size=600x200 at 0x7FF82A805160>" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"MolsToGridImage(mols)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1\n" | |
] | |
} | |
], | |
"source": [ | |
"pattern = mols[0] # MolFromSmiles\n", | |
"matches = [smiles_list[i] for i, m in enumerate(mols) if m.HasSubstructMatch(pattern)]\n", | |
"print(len(matches)) # result: 1, why not 2?" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"A possible solution is to mark the bonds in your pattern as generic, so they will match both single and double:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"query_params = Chem.AdjustQueryParameters()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"query_params.makeBondsGeneric = True\n", | |
"query_params.aromatizeIfPossible = False\n", | |
"query_params.adjustDegree = False\n", | |
"query_params.adjustHeavyDegree = False" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pattern_generic_bonds = Chem.AdjustQueryProperties(pattern, query_params)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAYAAABNcIgQAAAPd0lEQVR4nO3dfWxUZd7G8WumM32xrRSmiCzItggr1G6fJrLiuiomKGSBRVZFTRDXxvoWI5g2bNySgstGjBKU7UaXF1cFFJGiqwZ8aCwFxSgvLRqgi0KKtLUCMgVrS20705nnj1lYeFawrTNzn5nz/fzTMp05cxFCr7l/c+45jmAwGBQAADblNB0AAACTKEIAgK1RhAAAW6MIAQC2RhECAGyNIgQA2BpFCACwNYoQAGBrFCEAwNYoQgCArVGEAABbowgBALZGEQIAbI0iBADYGkUIALA1ihAAYGsUIQDA1ihCAICtUYQAAFujCAEAtkYRAgBsjSIEANgaRQgAsDWKEABgaxQhAMDWKEIAgK1RhAAAW6MIAQC2RhECAGyNIgQA2BpFCACwNYoQAGBrFCEAwNYoQgCArVGEAABbowgBmykvL1dlZaW6urpMRwEsgSIEbOL48eOSpIyMDH377bcqKSnR4sWLDacCzHMEg8Gg6RAAIuvLL7/UuHHjVFdXJ7fbLUlqampScnKyPB6Pli9frrvuuksXX3yx4aRA9LEiBOLYU089pcOHDys7O1s1NTVnSlCShgwZIo/Ho1OnTun9999XYmKigsGgfD6fwcRA9FGEQJwJBoPyer2SpIEDB57z/Q9JTU1VeXm5kpOTtXbtWt17773RigpYAqNRIM68/fbbWrp0qTZt2tTrx7a1tembb77R8OHDtX79eo0ZM0ZZWVnhDwlYCCtCIA74fD4VFxers7NTU6dO1auvvtqn46SlpWn48OGSpE2bNun062TOMEU8owgR144dOyZJ6uzsNJwkMjo6OvTdd9/J7XarX79+am5ultPpVGZm5k8+9osvvqjs7GzV1tZqzJgxYniEeMVoFHGrq6tLubm5Ki8v19SpU/Xxxx9ryJAhOnHihAYMGGA6XljMmzdPfr9fCxcujNhzdHd36/PPP9eVV16pjz76SC6XS9dcc03Eng+INooQca2trU1paWnat2+fcnNz5fV6dcUVV+jo0aNyOp1qamrSsGHDTMfslSNHjmjZsmV64okn1NraKpfLpZSUlKg89+LFizVq1ChNnjxZXV1dSkxMjMrzApHEaBRxqbS0VFu3blVaWpokKTc3V5KUmZmpvXv3yu12q7q6WtOmTZMknTx5UnV1dabi9khzc7MCgYA8Ho9aW1vV1dWl9PT0qJWgJBUXF2vy5Mlqa2vT6NGjz2zSB2IZK0LEpa1btyojI0P5+fkXvF9ra6vS09P1xhtvaOPGjVq1apXq6uoUCAQ0cuTI6ITtoSlTpqigoEC33Xab6SiSpD179igvL0/19fXasWOH7rjjDtORgD6hCBFXmpqa5HQ6NXjw4F4/1u/3y+VyadGiRZKkOXPmaPv27brkkkvOnEkZbbt371Z1dbUeeOABeb1eeTweORwOI1nOZ8OGDTpw4ICKiooYlyImMRpFXKmqqlJJSUmfHutyuSSFCnDOnDmSpHXr1umLL76QJL355ptqamoKT9AfcXrkOHDgwDPfZ2ZmWq4EpdBKtaioSJJ0880368MPPzScCOgdVoSIO8FgMCKFMXPmTC1YsEDZ2dl67rnnVFBQoIyMjLA/j9/v14gRI1RZWakRI0aE/fiRtG/fPuXk5Mjn82nFihV65JFHLFnewNlYESIufPbZZ5owYYL8fn/EfvGuXr1a2dnZ8vv92rVrl1JTU+X3+1VUVKRAIPCTj//WW2+poqJCLpdLu3btirkSlEInJTmdTh06dEj79++Xw+FgMz4sjyJEXMjLy1NJScmZ8WYkuVwurVmzRm63WydOnFBSUpKcTqcaGhrOjFR74/9fHkk6/+eCxorRo0fr+eeflxQaNZeVlRlOBJwfo1HEvNdff13Tp0+PSgleyMGDB/XBBx+osLBQVVVV+uSTTzR37twLPqahoUHXXnut6urqlJSUFKWk0dXY2Ki0tDT1799fS5cu1YwZM5Senm46FnAGK0LEtPb29qiexHIhI0eOVGFhoSTJ4/Ho6quvliS98MILWr58+Tn3feaZZ1RXV6dhw4bp008/jdsSlKTLLrtM/fv3V2trq6qqqpSYmKhAICC/3286GiCJFSFiWGdnZ0wUyM6dO3XRRRcpNzdXDz/8sCZNmiSv16ucnByNHTvWdDwjVq1apaqqKr3yyiumowAUIWLXpEmTVFhYqFtvvdV0lB6rrKyU2+3WuHHjTEcxqrW1Vc3NzVziCZbAaBQxa+XKlZowYYLpGL2SnJwc0Q/IjhUJCQm68cYbLf+xdrAHVoSIORs3bpTL5dLEiRNNR0EfBYNB1dfXa8iQIXK73abjwOZYESLmpKen66uvvjIdo09aWlq0Z88e0zGMczgcysrKogRhCRQhYkZ3d7eOHTumG264Qffdd5/pOH2yd+/ePu01jDft7e3KyspiNApLYDSKmLFt2zbNnj1bNTU1fGxXjGM0CiuhCBFTTp06pdTUVNMx+qylpUX19fXKy8szHQXAvzEaheW1tLTo7rvvltfrjekSlBiNnsZoFFbCihCWFwgE9NJLL+mee+7hWndxgtEorIQihKVVV1crPz/f+OeIhgujUcB6GI3C0hYuXKhNmzaZjhE2jEZDGI3CSlgRwtIidZFdmMVoFFbCihCWVFpaqrKysrgrQTbUh7ChHlZCEcKSZs2apZtuusl0jLBjNBrCaBRWwmgUltLU1KTa2tqY+zDtnvJ6pdpayeYXn2A0CkthRQhLOXLkiNauXWs6RsTU1Ul//7vpFOYxGoWVsCKEZXR0dCg5Odl0DERBe3u7cnJytHnzZl1++eWm48DmWBHCEo4cOaJRo0bp5MmTpqNElNcrffCB6RTmpaSkaOvWrRo2bJjpKABFCGsIBAKqrKxU//79TUeJKEajIYxGYSUUISzh9ttvV2Njo+kYETd2rBTHb4H2GGeNwkp4jxCW0NnZqaSkJNMxIo6zRkM4axRWwooQlpCUlKQTJ05o2rRp8nq9puNEDKPREEajsBKKEJaRkZGhO++8Ux6Px3SUiGE0GsJoFFbCaBSW4/P5tH37dl1//fWmo4Qdo9EQRqOwElaEsJxt27ZpyZIlisfXaIxGQxiNwkpYEcLSuPpEfGJDPayEFSEsq6ysTMXFxaZjhBUb6kPYUA8rYUUIyzp06JAyMjI0YMAA01HCZscO6bnnOGEGsBKKEJa3YcMGjRo1SiNGjDAdBWHCaBRWwmgUlrd582a1tLSYjoEwYjQKK2FFiJjR1tamtLQ00zEAxBlWhIgJra2tysnJscXnkdoBG+phJawIETOOHTumQYMGmY6BMGBDPayEFSFixqBBg9TY2KjHH388djbbB6QXxktOt1S07azbfdKsn0sz3jYVzCw21MNKKELEFIfDocGDB5uO0Wv9MqSX5koHuk0nsQZGo7ASihAxZejQoZo9e7b8fr8aGhpMx+mx7HulaQ3SE/+UYmQtG1GcNQoroQgRk5YsWaJFixaZjtFjjgyp9HGpYr60/XvTacxjNAoroQgRkx577DGVlZWZjtErWQXSQwnS3H9IAdNhDGtubtbQoUMZjcISKELEJLfbLYfDodLSUq1bt850nJ5JkooXSAeflt6N32sPX1AgEFB3d7fS09M1Y8YMRqOwBIoQMe26667T+PHjTcc4R3e3dPjwD/9swFTpjyOkec9KHVFNZQ2zZ8/WihUrlJiYqKeffprRKCyBIkRMmzhxojwej7Zs2SKfz2c0yzvvSN9/LzkcUmnpee7klAqflPzLpbeaoxrPmKNHj55ZtRcXF2v69OmGEwHnoggR8zo6OjR//nwjZ5Fu3Pif1d+aNdLnn0tOp7Rq1fkfk/Jr6c+TpJPt597u94ceFytbJH/M6RcmwWBQ69evVzAYVFZWljwej+FkwLn4ZBnElUAgIKczsq/vNm+WUlKka6+V/vIXacwY6be//enH/de/pCeflF57TQoEQoUaq3bvlh566Dr97W+LNXbsWNNxgAuK4f9qwLn279+vq666Sl1dXWE/9vbtoRWfJDU3S6dPdiwtDU8JSlJOTqgEJamkRHr++fAcN5oOHgx9HT5cKixcpvz8fKN5gJ5gRYi40d3drdraWuXl5YXleKtW7VR9/dUqLZVqaqSPP5YefTQsh/5RBw5I/fpJgwaFLuL7u99JqanRee6+CgalX/4y9IIhTP8EQFSwIkTcSEhIUF5enhoaGlReXt6nY7z88ss6dOiQJKm6+n/lcoVuv+qq6JWgJP3iF6ESPHVKevnl0G3BYGhkajXl5aH3Nh0OaedOShCxhyJE3Nm9e7e+/vrrHt+/vLxcW7ZskRTa6H36sWVl8/WnP0UkYo+lpkoVFaGv5eVSYaHZPGc7PQa94gopPT30/UUXmcsD9BWjUcQtv98vn8+nlJSU//pZRUWFGhsbVVhYqHfeeUcJCQmaMmWKgZQ95/VKx49Lo0dL770n5edLP/uZmSzHj4dWyfv3W39kC/wYVoSIW/fff79WrFhx5s81NTWaM2eOJOnSSy9V6r9/g99yyy2WL0FJyswMlaAkrVwptbWFvjcxLh04MLRVhBJEPGBFiLh1/PhxdXR06MEHH9R7772n5uZmvfvuuyooKDAdLWz275f+8Adpx47Qe3QAeo8iRFwLBoNavXq1Zs6cKUccNkVHh7R3r/SrX0m7dklJSZysAvQWRQjEiQULQtsXfv/72N+QD0QT/1WAODFvXqgE29pCq0Lv6StcBKQXxktOt1S07awH+KRZP5dmvH3u/XJLJb/OvV/RcGnaa9H4WwDRRxECcSY1VVq2LHRyzdn6ZUgvzZUOdBuJBVgWRQjEGYdD+s1v/vv27HulaQ3SE/+UeD8E+A+KELAJR4ZU+rhUMV/a/r3pNIB1UISAjWQVSA8lSHP/IZ1v++EXz0qXZoZGq5mZUuZgaWn0r3AFRI3LdAAAUZQkFS+Q/udR6d3bfvgulxdKFXOkhNM3+KUF46RvopURiDKKELCZAVOlP/5VmvesdM0P/Nx1sTRk6Fm/HHxSWgJFiPjFaBSwG6dU+KTkXy691Ww6DGAeRQjYUMqvpT9Pkk62m04CmMcnywAAbI0VIQDA1ihCAICtUYQAAFujCAEAtkYRAgBsjSIEANgaRQgAsDWKEABgaxQhAMDWKEIAgK1RhAAAW6MIAQC2RhECAGyNIgQA2BpFCACwNYoQAGBrFCEAwNYoQgCArVGEAABbowgBALZGEQIAbI0iBADYGkUIALA1ihAAYGsUIQDA1ihCAICtUYQAAFujCAEAtkYRAgBsjSIEANgaRQgAsDWKEABga/8HQdsn5M7XcR0AAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<rdkit.Chem.rdchem.Mol at 0x7ff82a7f9a30>" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pattern_generic_bonds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2\n" | |
] | |
} | |
], | |
"source": [ | |
"matches = [smiles_list[i] for i, m in enumerate(mols) if m.HasSubstructMatch(pattern_generic_bonds)]\n", | |
"print(len(matches)) # result: 1, why not 2?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0 C False\n", | |
"1 C False\n", | |
"2 C False\n", | |
"3 C False\n", | |
"4 N False\n", | |
"5 N False\n", | |
"6 C False\n", | |
"7 C False\n", | |
"8 C False\n", | |
"0 1 UNSPECIFIED False\n", | |
"1 2 UNSPECIFIED False\n", | |
"2 3 UNSPECIFIED False\n", | |
"3 4 UNSPECIFIED False\n", | |
"4 5 UNSPECIFIED False\n", | |
"5 6 UNSPECIFIED False\n", | |
"6 7 UNSPECIFIED False\n", | |
"7 8 UNSPECIFIED False\n", | |
"4 0 UNSPECIFIED False\n", | |
"8 0 UNSPECIFIED False\n" | |
] | |
} | |
], | |
"source": [ | |
"for a in pattern_generic_bonds.GetAtoms():\n", | |
" print(a.GetIdx(), a.GetSymbol(), a.GetIsAromatic())\n", | |
"for b in pattern_generic_bonds.GetBonds():\n", | |
" print(b.GetBeginAtomIdx(), b.GetEndAtomIdx(), b.GetBondType(), b.GetIsAromatic())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment