Skip to content

Instantly share code, notes, and snippets.

@tjlane
Created August 2, 2013 04:20
Show Gist options
  • Select an option

  • Save tjlane/6137469 to your computer and use it in GitHub Desktop.

Select an option

Save tjlane/6137469 to your computer and use it in GitHub Desktop.
Compute polar surface area, etc for a list of molecules using OpenEye's toolkit.
#!/usr/bin/env python
import csv
import numpy as np
from openeye.oechem import *
from openeye.oemolprop import *
from openeye.oespicoli import *
from openeye.oeomega import *
def load_molecules(filename):
"""
Returns OE molecule objects
"""
molecules = {}
with open(filename, 'r') as csvfile:
csv_r = csv.reader(csvfile)
for row in csv_r:
mol = OEMol()
OEParseSmiles(mol, row[1])
molecules[row[0]] = mol
print "Parsed %d molecules from file: %s" % (len(molecules), filename)
return molecules
def compute_2d_psa(mol):
"""
Compute the atomic-level PSA for a molecule `mol`.
Returns
-------
atom_psas : list
The per-atom PSA (in ang sqd) for `mol`
"""
atomPSA = OEFloatArray(mol.GetMaxAtomIdx())
psa = OEGet2dPSA(mol, atomPSA)
return [ atomPSA[atom.GetIdx()] for atom in mol.GetAtoms() ]
def measure_3d_surface_area(mol):
omega = OEOmega()
omega.SetStrictStereo(False)
omega(mol)
surf = OESurface()
OEMakeAccessibleSurface(surf, mol)
return OESurfaceArea(surf)
def compute_psa_moment(mol):
omega = OEOmega()
omega.SetStrictStereo(False)
omega(mol)
atomPSA = OEFloatArray(mol.GetMaxAtomIdx())
_ = OEGet2dPSA(mol, atomPSA)
xyz = np.zeros((mol.NumAtoms(), 3))
psa = np.zeros(mol.NumAtoms())
for i,atom in enumerate(mol.GetAtoms()):
xyz[i,:] = np.array( mol.GetCoords(atom) )
psa[i] = atomPSA[atom.GetIdx()]
total_psa = psa.sum()
barycenter = np.sum(xyz * psa[:,None], axis=0)
assert barycenter.shape == (3,)
# this is a vector, mu
mu = psa[:,None] * (xyz - barycenter[None,:])
return np.sqrt( np.sum( np.square(mu) ) )
def whitespace_pad(string, pad=18):
if len(string) > pad:
return string[:pad]
else:
d = pad - len(string)
return string + ' '*d
def main():
if ('-h' in sys.argv) or len(sys.argv) != 2:
print "Usage: python %s <csv-of-SMILES>" % sys.argv[0]
sys.exit(0)
else:
print ""
print sys.argv
molecules = load_molecules(sys.argv[1])
print ""
print " Molecule \tSASA (A^2)\tPSA (A^2)\tmu-psa (A^3)"
print " ---------- \t----------\t---------\t------------"
for name, mol in molecules.items():
print "%s\t%.2f \t%.2f \t%.2f" % ( whitespace_pad(name),
measure_3d_surface_area(mol),
sum(compute_2d_psa(mol)),
compute_psa_moment(mol))
return
if __name__ == '__main__':
main()
@tjlane
Copy link
Copy Markdown
Author

tjlane commented Aug 2, 2013

Further, mu-psa is defined analogously to a dipole moment, but using the atomic PSA's instead of charges. Before computing this, the PSA-weighted barycenter was subtracted:

https://gist.github.com/tjlane/6137469#file-measure_psa-py-L72

Still need to confirm this part of the code is (1) what we want, and (2) performing as expected

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment