Skip to content

Instantly share code, notes, and snippets.

@ghutchis
Created July 18, 2019 17:23
Show Gist options
  • Save ghutchis/b6836c4d1e5c554ab0ef51c10ad9065b to your computer and use it in GitHub Desktop.
Save ghutchis/b6836c4d1e5c554ab0ef51c10ad9065b to your computer and use it in GitHub Desktop.
looking for correlated dihedral angles
#!/usr/bin/env python
import sys
import os
import glob
import pybel
# don't need to reimport openbabel
ob = pybel.ob
# count the matches for each pattern and the total # of mol matching pattterns
patterns = [ "[a][a]!@;-[CX3](=[CX3])!@-[a][a]",
"[a][a]!@;-[NX3H1]!@;-[CX3](=S)[!#1]",
"[a][c]!@;-[CX4H0]([CX3,N])!@;-[c][a]",
"[a][c]!@;-[CX4H1]([N,O,H])!@;-[c][a]",
"[a][c]!@;-[CH2]!@;-[n][a]",
"[a][c]!@;-[CX4H2]!@;-[OX2][C]",
"[a][c]!@;-[CX4H1]!@;-[OX2][!#1]",
"[c][c]!@;-[CX4]!@;-[c][c]",
"[cH0][c]!@;-[CX4H2]!@;-[a][a]",
"[cH0][c]([cH0])!@;-[NX3]!@;-[a][a]",
"[cH1][c]([cH1])!@;-[NX3]([CX4])!@;-[a][a]",
"[!#1][c]!@;-[SX2]!@;-[c][aH1,aH0]",
"[!#1][NX3H0]!@;-[C](=O)!@;-[O][CH0]",
"[CX3,CX4][CX4H2]!@;-[C](=O)!@;-[O]~[C]",
"[N,O,NH1,OH1][CX4]!@;-[C](=O)!@;-[O]~[C]",
"[C,c][NH]!@;-[C](=S)!@;-[NH][C,c]",
"[aH1][c]([aH1])!@;-[$(S(=O)=O)]!@;-[NX3H0][*]",
"[O]=[C]!@;-[O]!@;-[CX4H0][!#1]",
"[O]=[C]!@;-[NX3H0](A)!@;-[a][cH0]" ]
smarts = []
for pattern in patterns:
smart = pybel.Smarts(pattern)
smarts.append(smart)
molecules = 0
matches = [0]*len(smarts)
mol_matching = 0
# repeat through all the files on the command-line
# we can change this to use the glob module as well
# e.g., find all the files in a set of folders
print('filename', 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19, 'didmatch', sep=',' )
for argument in sorted(glob.iglob("*.sdf")):
filename, extension = os.path.splitext(argument)
# read all the molecules from the supplied file
# (i.e. might be more than one molecule in a file)
for mol in pybel.readfile(extension[1:], argument):
molecules += 1
this_one_matched = False
this_match = [0]*len(smarts)
for i in range(len(smarts)):
count = smarts[i].findall(mol)
if len(count) > 0:
this_one_matched = True
this_match[i] = len(count)
# total matches across the data set
matches[i] += len(count)
if this_one_matched:
mol_matching += 1
# print the dihedral angle
print(mol.title, *this_match, this_one_matched, sep=',')
print('total', *matches, mol_matching, molecules, sep=',')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment