Skip to content

Instantly share code, notes, and snippets.

@whitead
Created November 1, 2022 21:22
Show Gist options
  • Save whitead/df6e53cff3102b2dbec6a2e793816a57 to your computer and use it in GitHub Desktop.
Save whitead/df6e53cff3102b2dbec6a2e793816a57 to your computer and use it in GitHub Desktop.
Automated naming of compounds
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
import exmol
import skunk
import math
import matplotlib.pyplot as plt
import textwrap
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import urllib.request
urllib.request.urlretrieve('https://github.com/google/fonts/raw/main/ofl/ibmplexmono/IBMPlexMono-Regular.ttf', 'IBMPlexMono-Regular.ttf')
fe = font_manager.FontEntry(
fname='IBMPlexMono-Regular.ttf',
name='plexmono')
font_manager.fontManager.ttflist.append(fe)
plt.rcParams.update({'axes.facecolor':'#f5f4e9',
'grid.color' : '#AAAAAA',
'axes.edgecolor':'#333333',
'figure.facecolor':'#FFFFFF',
'axes.grid': False,
'axes.prop_cycle': plt.cycler('color', plt.cm.Dark2.colors),
'font.family': fe.name,
'figure.figsize': (3.5,3.5 / 1.2),
'ytick.left': True,
'xtick.bottom': True
})
def ax_grid(count):
'''make square grid with matplotlib subplots'''
n = int(math.ceil(math.sqrt(count)))
fig, ax = plt.subplots(n, n, figsize=(n * 2, n * 2) )
ax = ax.flatten()
print(n**2, count)
for a in ax[count:n**2]:
a.axis('off')
return fig, ax
def draw_morgan_bit(mol, bitInfo, key):
svg = Chem.Draw.DrawMorganBit(
mol,
key,
bitInfo,
molSize=(300, 200),
centerColor=None,
aromaticColor=None,
ringColor=None,
extraColor=(0.8, 0.8, 0.8),
useSVG=True,
)
try:
svgdata = svg.data
except AttributeError:
svgdata = svg
return svgdata
def draw_mol(m):
size = (300, 200)
dos = rdkit.Chem.Draw.MolDrawOptions()
dos.drawMolsSameScale = False
#dos.useBWAtomPalette()
dos.minFontSize = 16
d = rdkit.Chem.Draw.rdMolDraw2D.MolDraw2DSVG(*size)
d.SetDrawOptions(dos)
d.DrawMolecule(m)
d.FinishDrawing()
return d.GetDrawingText()
def plot(smi):
mol = Chem.MolFromSmiles(smi)
bitInfo = {}
svg_dict = {}
AllChem.GetMorganFingerprint(mol, 3, bitInfo=bitInfo)
names = []
used_names = set()
for r in range(4):
for b in bitInfo:
_, ri = bitInfo[b][0]
if ri != r:
continue
name = exmol.exmol._name_morgan_bit(mol, bitInfo, b)
if name is None or len(name) < 2 or name in used_names:
continue
names.append(name)
s = draw_morgan_bit(mol, bitInfo, b)
svg_dict[str(b)] = s
# ok now ban all names we've seen so far
used_names |= set(names)
print(names)
# set-up plot
fig, axs = ax_grid(len(svg_dict) + 1)
mol_svg = draw_mol(mol)
axs[0].axis('off')
svg_dict['mol'] = mol_svg
skunk.connect(axs[0], 'mol')
for ax, k, n in zip(axs[1:], svg_dict.keys(), names):
ax.axis('off')
ax.set_title(textwrap.fill(n, 20))
skunk.connect(ax, str(k))
fig.tight_layout(pad=1.0)
svg = skunk.insert(svg_dict)
plt.close()
skunk.display(svg)
plot('O=C(NCC1CCCCC1N)C2=CC=CC=C2C3=CC=C(F)C=C3C(=O)NC4CCCCC4')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment