Skip to content

Instantly share code, notes, and snippets.

@ljmartin
Created May 2, 2025 04:13
Show Gist options
  • Save ljmartin/afdd75ec6c02838168a30dbc3d7be1f9 to your computer and use it in GitHub Desktop.
Save ljmartin/afdd75ec6c02838168a30dbc3d7be1f9 to your computer and use it in GitHub Desktop.
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem, rdMolDescriptors, rdMolAlign, rdMolTransforms
import numpy as np
from tqdm.notebook import tqdm
df = pd.read_csv('./ligs.smi', sep='\t')
mols = []
for smi in df['smiles']:
mols.append(Chem.AddHs(Chem.MolFromSmiles(smi)))
nconfs = 50
for m in mols:
AllChem.EmbedMultipleConfs(m, nconfs)
#assume the zeroth conformer is the best one to initialize.
cids = [0]*len(mols)
nits = 100 # iterations
nmols = len(mols)
# for logging purposes, calculate the total pairwise alignment score:
score = 0
for i in range(nmols):
for j in range(i+1, nmols):
l = rdMolAlign.GetO3A(mols[i], mols[j], prbCid=cids[i], refCid=cids[j])
score += l.Score()
print(score)
# for each iteration we'll pick a random molecule, and update it's conformer Id
# to the best alignment over all other 'best' compounds.
# the 'best' conformer ID is initialized as zero, but after each iteration
# we expect the conformers to converge towards the single best alignment.
for i in tqdm(range(nits)):
#ref molindex:
rmid = np.random.choice(range(nmols))
# create a matrix of shape nmols,nconfs. this will store scores.
# once it's full of alignment scores for all conformers of
# this molecule vs all the other 'best' conformers, we'll update this molecule's best conformer ID
# to give the maximum sum of alignment scores.
smat = np.zeros([nmols, nconfs])
#test moleculec index
for tmid in range(nmols):
if tmid==rmid:
continue
#for each conformer of the 'ref' molindex.
for cid in range(nconfs):
l = rdMolAlign.GetO3A(mols[tmid], mols[rmid], prbCid=cids[tmid], refCid=cid)
score = l.Score()
smat[tmid, cid] = score
# now update rmid's conformer to it's best alignment with all others.
scores = smat.sum(0)
rmax = scores.argmax()
cids[rmid] = int(rmax)
# for logging purposes, calculate the total alignment score now:
score = 0
for i in range(nmols):
for j in range(i+1, nmols):
l = rdMolAlign.GetO3A(mols[i], mols[j], prbCid=cids[i], refCid=cids[j])
score += l.Score()
print(score)
# use the first molecule as the original 'reference' coordinate set.
refmol = Chem.Mol(mols[0])
refcid = cids[0]
with Chem.SDWriter('./ligs.sdf') as writer:
for m, cid in zip(mols, cids):
l = rdMolAlign.GetO3A(m, refmol, prbCid=cid, refCid=refcid)
l.Align() #update coordinates to alignment.
writer.write(m, confId=cid)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment