Created
August 10, 2015 16:50
-
-
Save adalke/fe39441b22251c0c1a5a to your computer and use it in GitHub Desktop.
patch to use CanonicalRankAtoms instead of AssignStereochemistry
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to compare the symmetry class assignments of RDKit's | |
# AssignStereochemistry and CanonicalRankAtoms. | |
# The input must come from a file named "x.dat", with one SMILES per | |
# line. | |
from rdkit import Chem | |
from collections import defaultdict | |
# Group atoms based on their symmetry group assignment values | |
def get_groups(values): | |
value_to_indices = defaultdict(list) | |
for i, value in enumerate(values): | |
value_to_indices[value].append(i) | |
groups = set(tuple(indices) for indices in value_to_indices.values()) | |
return groups | |
def main(): | |
for lineno, line in enumerate(open("x.dat")): # <-- to use a SMILES file .... | |
if lineno % 1000 == 0: | |
print "Processing", lineno | |
smiles = line.strip() # ... comment out this line ... | |
#smiles, id = line.split() # ... and uncomment this line. | |
mol = Chem.MolFromSmiles(smiles) | |
if mol is None: | |
continue | |
Chem.AssignStereochemistry(mol, flagPossibleStereoCenters=True) | |
cip = [int(a.GetProp('_CIPRank')) for a in mol.GetAtoms()] | |
rank = list(Chem.CanonicalRankAtoms(mol, breakTies=False)) | |
assert len(rank) == len(cip), (smiles, len(rank), len(cip)) | |
# Do they have identical symmetry group assignments? | |
rank_groups = get_groups(rank) | |
cip_groups = get_groups(cip) | |
if rank_groups != cip_groups: | |
print "Mismatch with", smiles | |
print "Canonical", Chem.MolToSmiles(mol, isomericSmiles=True) | |
print "Rank:", rank, "=>", sorted(rank_groups) | |
print " CIP:", cip, "=>", sorted(cip_groups) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
index 4ef38b3..d0f2a74 100644 | |
--- a/Contrib/mmpa/indexing.py | |
+++ b/Contrib/mmpa/indexing.py | |
@@ -62,16 +62,14 @@ def get_symmetry_class(smi): | |
m = Chem.MolFromSmiles(smi) | |
#determine the symmetry class | |
- #see: http://www.mail-archive.com/[email protected]/msg01894.html | |
- #A thanks to Greg (and Alan) | |
- Chem.AssignStereochemistry(m,cleanIt=True,force=True,flagPossibleStereoCenters=True) | |
+ symmetry_classes = Chem.CanonicalRankAtoms(m, breakTies=False) | |
#get the symmetry class of the attachements points | |
#Note: 1st star is the zero index, | |
#2nd star is first index, etc | |
- for atom in m.GetAtoms(): | |
+ for atom, symmetry_class in zip(m.GetAtoms(), symmetry_classes): | |
if(atom.GetMass() == 0): | |
- symmetry.append(atom.GetProp('_CIPRank')) | |
+ symmetry.append(symmetry_class) | |
return symmetry |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment