Created
May 2, 2025 04:13
-
-
Save ljmartin/afdd75ec6c02838168a30dbc3d7be1f9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
from rdkit.Chem import rdMolDescriptors | |
from rdkit.Chem import AllChem, rdMolDescriptors, rdMolAlign, rdMolTransforms | |
import numpy as np | |
from tqdm.notebook import tqdm | |
df = pd.read_csv('./ligs.smi', sep='\t') | |
mols = [] | |
for smi in df['smiles']: | |
mols.append(Chem.AddHs(Chem.MolFromSmiles(smi))) | |
nconfs = 50 | |
for m in mols: | |
AllChem.EmbedMultipleConfs(m, nconfs) | |
#assume the zeroth conformer is the best one to initialize. | |
cids = [0]*len(mols) | |
nits = 100 # iterations | |
nmols = len(mols) | |
# for logging purposes, calculate the total pairwise alignment score: | |
score = 0 | |
for i in range(nmols): | |
for j in range(i+1, nmols): | |
l = rdMolAlign.GetO3A(mols[i], mols[j], prbCid=cids[i], refCid=cids[j]) | |
score += l.Score() | |
print(score) | |
# for each iteration we'll pick a random molecule, and update it's conformer Id | |
# to the best alignment over all other 'best' compounds. | |
# the 'best' conformer ID is initialized as zero, but after each iteration | |
# we expect the conformers to converge towards the single best alignment. | |
for i in tqdm(range(nits)): | |
#ref molindex: | |
rmid = np.random.choice(range(nmols)) | |
# create a matrix of shape nmols,nconfs. this will store scores. | |
# once it's full of alignment scores for all conformers of | |
# this molecule vs all the other 'best' conformers, we'll update this molecule's best conformer ID | |
# to give the maximum sum of alignment scores. | |
smat = np.zeros([nmols, nconfs]) | |
#test moleculec index | |
for tmid in range(nmols): | |
if tmid==rmid: | |
continue | |
#for each conformer of the 'ref' molindex. | |
for cid in range(nconfs): | |
l = rdMolAlign.GetO3A(mols[tmid], mols[rmid], prbCid=cids[tmid], refCid=cid) | |
score = l.Score() | |
smat[tmid, cid] = score | |
# now update rmid's conformer to it's best alignment with all others. | |
scores = smat.sum(0) | |
rmax = scores.argmax() | |
cids[rmid] = int(rmax) | |
# for logging purposes, calculate the total alignment score now: | |
score = 0 | |
for i in range(nmols): | |
for j in range(i+1, nmols): | |
l = rdMolAlign.GetO3A(mols[i], mols[j], prbCid=cids[i], refCid=cids[j]) | |
score += l.Score() | |
print(score) | |
# use the first molecule as the original 'reference' coordinate set. | |
refmol = Chem.Mol(mols[0]) | |
refcid = cids[0] | |
with Chem.SDWriter('./ligs.sdf') as writer: | |
for m, cid in zip(mols, cids): | |
l = rdMolAlign.GetO3A(m, refmol, prbCid=cid, refCid=refcid) | |
l.Align() #update coordinates to alignment. | |
writer.write(m, confId=cid) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment