Skip to content

Instantly share code, notes, and snippets.

@tjlane
Created August 2, 2013 04:20
Show Gist options
  • Select an option

  • Save tjlane/6137469 to your computer and use it in GitHub Desktop.

Select an option

Save tjlane/6137469 to your computer and use it in GitHub Desktop.
Compute polar surface area, etc for a list of molecules using OpenEye's toolkit.
#!/usr/bin/env python
import csv
import numpy as np
from openeye.oechem import *
from openeye.oemolprop import *
from openeye.oespicoli import *
from openeye.oeomega import *
def load_molecules(filename):
"""
Returns OE molecule objects
"""
molecules = {}
with open(filename, 'r') as csvfile:
csv_r = csv.reader(csvfile)
for row in csv_r:
mol = OEMol()
OEParseSmiles(mol, row[1])
molecules[row[0]] = mol
print "Parsed %d molecules from file: %s" % (len(molecules), filename)
return molecules
def compute_2d_psa(mol):
"""
Compute the atomic-level PSA for a molecule `mol`.
Returns
-------
atom_psas : list
The per-atom PSA (in ang sqd) for `mol`
"""
atomPSA = OEFloatArray(mol.GetMaxAtomIdx())
psa = OEGet2dPSA(mol, atomPSA)
return [ atomPSA[atom.GetIdx()] for atom in mol.GetAtoms() ]
def measure_3d_surface_area(mol):
omega = OEOmega()
omega.SetStrictStereo(False)
omega(mol)
surf = OESurface()
OEMakeAccessibleSurface(surf, mol)
return OESurfaceArea(surf)
def compute_psa_moment(mol):
omega = OEOmega()
omega.SetStrictStereo(False)
omega(mol)
atomPSA = OEFloatArray(mol.GetMaxAtomIdx())
_ = OEGet2dPSA(mol, atomPSA)
xyz = np.zeros((mol.NumAtoms(), 3))
psa = np.zeros(mol.NumAtoms())
for i,atom in enumerate(mol.GetAtoms()):
xyz[i,:] = np.array( mol.GetCoords(atom) )
psa[i] = atomPSA[atom.GetIdx()]
total_psa = psa.sum()
barycenter = np.sum(xyz * psa[:,None], axis=0)
assert barycenter.shape == (3,)
# this is a vector, mu
mu = psa[:,None] * (xyz - barycenter[None,:])
return np.sqrt( np.sum( np.square(mu) ) )
def whitespace_pad(string, pad=18):
if len(string) > pad:
return string[:pad]
else:
d = pad - len(string)
return string + ' '*d
def main():
if ('-h' in sys.argv) or len(sys.argv) != 2:
print "Usage: python %s <csv-of-SMILES>" % sys.argv[0]
sys.exit(0)
else:
print ""
print sys.argv
molecules = load_molecules(sys.argv[1])
print ""
print " Molecule \tSASA (A^2)\tPSA (A^2)\tmu-psa (A^3)"
print " ---------- \t----------\t---------\t------------"
for name, mol in molecules.items():
print "%s\t%.2f \t%.2f \t%.2f" % ( whitespace_pad(name),
measure_3d_surface_area(mol),
sum(compute_2d_psa(mol)),
compute_psa_moment(mol))
return
if __name__ == '__main__':
main()
@tjlane
Copy link
Copy Markdown
Author

tjlane commented Aug 2, 2013

output:

['measure_psa.py', 'fluorophores_smiles_notation.csv']
Parsed 27 molecules from file: fluorophores_smiles_notation.csv

   Molecule         SASA (A^2)  PSA (A^2)   mu-psa (A^3)
  ----------        ----------  ---------   ------------
Sulforhodamine B    809.09      133.79      48572.44
Alexa 633 M         1153.60     285.41      38503.87
Atto 550 M          921.85      149.46      24630.77
Atto 565 biotin     1048.98     154.94      145757.61
OG 514 SE           650.58      133.53      15530.13
Sulfo-Cy5 M         1084.40     218.98      128372.96
Dyomics 654 SE      1138.44     288.55      56884.02
Cy3 SE              789.76      46.38       11514.12
TMR M               787.64      128.75      52419.51
Alexa 488 SE        645.36      257.67      12906.42
Carboxyfluorescein  561.11      133.53      38011.36
Atto 488 SE         718.69      237.85      39179.21
Alexa 546 SE        1065.45     257.38      37033.00
Alexa 532 M         1013.76     246.35      27982.90
Atto 655 SE         788.91      125.17      22454.94
Abb. STAR 635P azi  1038.37     221.20      57083.77
Alexa 568 hydrazid  896.73      243.27      60694.08
Alexa 532 SE        816.59      188.15      10092.46
Alexa 594 M         1052.09     268.34      59158.37
Atto 532 SE         835.68      212.37      33208.14
Texas Red M         934.84      188.08      63238.66
Atto 465 SE         509.12      96.05       14794.29
OG 488 M            680.46      162.63      33852.82
Alexa 647 M         1285.44     333.38      115023.61
Sulfo-Cy3 M         973.25      218.98      92314.03
Alexa 647 SE        1178.68     275.18      84913.29
Atto 647N M         912.62      122.09      28045.04

There are also a bunch of warnings that get printed -- not sure if they are important right now. They have been filtered out in this output.

Also, the BODIPY compound didn't work, since there are some missing parameters in MMFF.

@tjlane
Copy link
Copy Markdown
Author

tjlane commented Aug 2, 2013

Further, mu-psa is defined analogously to a dipole moment, but using the atomic PSA's instead of charges. Before computing this, the PSA-weighted barycenter was subtracted:

https://gist.github.com/tjlane/6137469#file-measure_psa-py-L72

Still need to confirm this part of the code is (1) what we want, and (2) performing as expected

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment