Created
August 2, 2013 04:20
-
-
Save tjlane/6137469 to your computer and use it in GitHub Desktop.
Compute polar surface area, etc for a list of molecules using OpenEye's toolkit.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import csv | |
| import numpy as np | |
| from openeye.oechem import * | |
| from openeye.oemolprop import * | |
| from openeye.oespicoli import * | |
| from openeye.oeomega import * | |
| def load_molecules(filename): | |
| """ | |
| Returns OE molecule objects | |
| """ | |
| molecules = {} | |
| with open(filename, 'r') as csvfile: | |
| csv_r = csv.reader(csvfile) | |
| for row in csv_r: | |
| mol = OEMol() | |
| OEParseSmiles(mol, row[1]) | |
| molecules[row[0]] = mol | |
| print "Parsed %d molecules from file: %s" % (len(molecules), filename) | |
| return molecules | |
| def compute_2d_psa(mol): | |
| """ | |
| Compute the atomic-level PSA for a molecule `mol`. | |
| Returns | |
| ------- | |
| atom_psas : list | |
| The per-atom PSA (in ang sqd) for `mol` | |
| """ | |
| atomPSA = OEFloatArray(mol.GetMaxAtomIdx()) | |
| psa = OEGet2dPSA(mol, atomPSA) | |
| return [ atomPSA[atom.GetIdx()] for atom in mol.GetAtoms() ] | |
| def measure_3d_surface_area(mol): | |
| omega = OEOmega() | |
| omega.SetStrictStereo(False) | |
| omega(mol) | |
| surf = OESurface() | |
| OEMakeAccessibleSurface(surf, mol) | |
| return OESurfaceArea(surf) | |
| def compute_psa_moment(mol): | |
| omega = OEOmega() | |
| omega.SetStrictStereo(False) | |
| omega(mol) | |
| atomPSA = OEFloatArray(mol.GetMaxAtomIdx()) | |
| _ = OEGet2dPSA(mol, atomPSA) | |
| xyz = np.zeros((mol.NumAtoms(), 3)) | |
| psa = np.zeros(mol.NumAtoms()) | |
| for i,atom in enumerate(mol.GetAtoms()): | |
| xyz[i,:] = np.array( mol.GetCoords(atom) ) | |
| psa[i] = atomPSA[atom.GetIdx()] | |
| total_psa = psa.sum() | |
| barycenter = np.sum(xyz * psa[:,None], axis=0) | |
| assert barycenter.shape == (3,) | |
| # this is a vector, mu | |
| mu = psa[:,None] * (xyz - barycenter[None,:]) | |
| return np.sqrt( np.sum( np.square(mu) ) ) | |
| def whitespace_pad(string, pad=18): | |
| if len(string) > pad: | |
| return string[:pad] | |
| else: | |
| d = pad - len(string) | |
| return string + ' '*d | |
| def main(): | |
| if ('-h' in sys.argv) or len(sys.argv) != 2: | |
| print "Usage: python %s <csv-of-SMILES>" % sys.argv[0] | |
| sys.exit(0) | |
| else: | |
| print "" | |
| print sys.argv | |
| molecules = load_molecules(sys.argv[1]) | |
| print "" | |
| print " Molecule \tSASA (A^2)\tPSA (A^2)\tmu-psa (A^3)" | |
| print " ---------- \t----------\t---------\t------------" | |
| for name, mol in molecules.items(): | |
| print "%s\t%.2f \t%.2f \t%.2f" % ( whitespace_pad(name), | |
| measure_3d_surface_area(mol), | |
| sum(compute_2d_psa(mol)), | |
| compute_psa_moment(mol)) | |
| return | |
| if __name__ == '__main__': | |
| main() |
Author
Author
Further, mu-psa is defined analogously to a dipole moment, but using the atomic PSA's instead of charges. Before computing this, the PSA-weighted barycenter was subtracted:
https://gist.github.com/tjlane/6137469#file-measure_psa-py-L72
Still need to confirm this part of the code is (1) what we want, and (2) performing as expected
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
output:
There are also a bunch of warnings that get printed -- not sure if they are important right now. They have been filtered out in this output.
Also, the BODIPY compound didn't work, since there are some missing parameters in MMFF.