Created
November 7, 2018 16:00
-
-
Save chao1224/25cbb962bbb6898763d3d7a29f35c228 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rdkit import RDConfig | |
from rdkit.Chem import ChemicalFeatures | |
atom_candidates = ['C', 'Cl', 'I', 'F', 'O', 'N', 'P', 'S', 'Br', 'Unknown'] | |
def one_of_k_encoding(x, allowable_set): | |
if x not in allowable_set: | |
raise Exception("input {0} not in allowable set{1}:".format(x, allowable_set)) | |
return map(lambda s: 1 if x == s else 0, allowable_set) | |
def one_of_k_encoding_unk(x, allowable_set): | |
# Maps inputs not in the allowable set to the last element | |
if x not in allowable_set: | |
print('Unknown detected: {}'.format(x)) | |
x = allowable_set[-1] | |
return map(lambda s: 1 if x == s else 0, allowable_set) | |
def extract_atom_features(atom, is_acceptor=0, is_donor=0): | |
return np.array(one_of_k_encoding_unk(atom.GetSymbol(), atom_candidates) + | |
one_of_k_encoding(atom.GetDegree(), [0, 1, 2, 3, 4, 5, 6]) + | |
one_of_k_encoding(atom.GetTotalNumHs(), [0, 1, 2, 3, 4, 5, 6]) + | |
one_of_k_encoding(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5]) + | |
one_of_k_encoding(atom.GetFormalCharge(), [-2, -1, 0, 1, 2, 3]) + | |
one_of_k_encoding(atom.GetIsAromatic(), [0, 1]) + | |
one_of_k_encoding(is_acceptor, [0, 1]) + | |
one_of_k_encoding(is_donor, [0, 1]) | |
) | |
def get_feature_for_each_mol(smiles): | |
fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') | |
factory = ChemicalFeatures.BuildFeatureFactory(fdefName) | |
mol = MolFromSmiles(smiles) | |
feats = factory.GetFeaturesForMol(mol) | |
acceptor_atom_ids = map(lambda x: x.GetAtomIds()[0], filter(lambda x: x.GetFamily() =='Acceptor', feats)) | |
donor_atom_ids = map(lambda x: x.GetAtomIds()[0], filter(lambda x: x.GetFamily() =='Donor', feats)) | |
for atom in mol.GetAtoms(): | |
atom_idx = atom.GetIdx() | |
atom_feature = extract_atom_features(atom, is_acceptor=atom_idx in acceptor_atom_ids, is_donor=atom_idx in donor_atom_ids) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment