Created
January 17, 2018 18:04
-
-
Save marta-sd/ecf88ee96c41022f27d9f5458a142811 to your computer and use it in GitHub Desktop.
Merge multiple molecules in DeCAF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rdkit.Chem import MolFromSmiles | |
from decaf.toolkits.rd import phar_from_mol | |
from decaf.utils import similarity, combine_pharmacophores, draw, filter_nodes | |
from scipy.cluster.hierarchy import average as avg_clustering | |
from scipy.spatial.distance import squareform | |
smiles = [ | |
'c1cc(cc(c1)N)c2cc(cc(c2O)c3[nH]c4ccc(cc4n3)C(=N)N)Cl', | |
'c1cc(cc(c1)N(=O)=O)c2cc(cc(c2O)c3cc4cc(ccc4[nH]3)C(=N)N)C(CC(=O)O)C(=O)O', | |
'c1cc(c(cc1CNC(=O)N)c2cc(cc(c2O)c3[nH]c4ccc(cc4n3)C(=N)N)C(CC(=O)O)C(=O)O)O', | |
'c1ccc(cc1)C[C@@H](C(=O)NCc2cccc(c2)c3cc(cc(c3O)c4cc-5c([nH]ccc5n4)N)C(=O)O)O', | |
'CC[C@@H](C)NC(=O)c1cc(cc(c1)N)c2cnc(c(=O)n2CC(=O)NCc3ccc(cc3)C(=N)N)NC(C)C', | |
] | |
phars = [phar_from_mol(MolFromSmiles(smi)) for smi in smiles] | |
# vector-form distance matrix | |
distance = [1 - similarity(p1, p2)[0] for i, p1 in enumerate(phars) for p2 in phars[i+1:]] | |
print('Distance matrix:') | |
print(squareform(distance)) | |
print() | |
clustering = avg_clustering(distance) | |
print('Clustering linkage matrix:') | |
print(clustering) | |
print() | |
# merge models based on clustering | |
clusters = phars[:] | |
for i, j, dist, mols_in_cluster in clustering: | |
clusters.append(combine_pharmacophores(clusters[int(i)], clusters[int(j)])) | |
# filter the least frequent nodes | |
merged_molecules = filter_nodes(clusters[-1], freq_range=(0.3, 1)) | |
# draw and inspect the model | |
draw(merged_molecules) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment