Last active
December 8, 2021 16:30
-
-
Save dehaenw/bb5704fc4d108eec8f8e999d6ab79118 to your computer and use it in GitHub Desktop.
SSCP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools as it | |
allsmi=[] | |
maxvalence=2 | |
def cl_ratio(Cl,C): | |
return Cl*35.45/((C*12.011)+(Cl*35.453)+((C*2-Cl+2)*1.008)) #ratio of chlorine vs total MW | |
for chainlength in [10,11,12,13]: | |
combinations = (list(i) for i in it.product(list(range(maxvalence+1)),repeat=chainlength) if tuple(reversed(i)) >= tuple(i)) #filter out mirror image molecules | |
for comb in combinations: | |
curr_smi = "" | |
for cl_count in comb: | |
curr_smi+="C" # increase chain with one carbon | |
for i in range(cl_count): | |
curr_smi+="(Cl)" # chlorinate as needed | |
if 0.4<=cl_ratio(sum(comb),chainlength)<=0.7: | |
allsmi.append(curr_smi) #add if it has the correct ratio of Cl to MW | |
if maxvalence>1: # check for molecules that have a trichloromethyl terminal cap | |
if comb[0]==2: | |
if 0.4<=cl_ratio(sum(comb)+1,chainlength)<=0.7: | |
allsmi.append("Cl"+curr_smi) | |
if comb[-1]==2: | |
if 0.4<=cl_ratio(sum(comb)+2,chainlength)<=0.7: | |
allsmi.append(curr_smi+"Cl") | |
allsmi.append(curr_smi+"Cl") | |
else: | |
if comb[-1]==2 and comb[0]!=2: | |
if 0.4<=cl_ratio(sum(comb)+1,chainlength)<=0.7: | |
allsmi.append(curr_smi+"Cl") | |
with open('CSSP.smi', 'w') as f: | |
for smi in allsmi: | |
f.write("%s\n" % smi) | |
print("total amount of non canonized smiles is {}".format(len(allsmi))) | |
canonize=True | |
if canonize==True: | |
can_smi = [Chem.MolToSmiles(Chem.MolFromSmiles(smi),True) for smi in allsmi] | |
print("total amount of non canonized smles is {}".format(len(set(can_smi)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment