|
#!/usr/bin/env python |
|
|
|
import sys |
|
import os |
|
import glob |
|
|
|
import pybel |
|
# don't need to reimport openbabel |
|
ob = pybel.ob |
|
|
|
# count the matches for each pattern and the total # of mol matching pattterns |
|
|
|
patterns = [ "[a][a]!@;-[CX3](=[CX3])!@-[a][a]", |
|
"[a][a]!@;-[NX3H1]!@;-[CX3](=S)[!#1]", |
|
"[a][c]!@;-[CX4H0]([CX3,N])!@;-[c][a]", |
|
"[a][c]!@;-[CX4H1]([N,O,H])!@;-[c][a]", |
|
"[a][c]!@;-[CH2]!@;-[n][a]", |
|
"[a][c]!@;-[CX4H2]!@;-[OX2][C]", |
|
"[a][c]!@;-[CX4H1]!@;-[OX2][!#1]", |
|
"[c][c]!@;-[CX4]!@;-[c][c]", |
|
"[cH0][c]!@;-[CX4H2]!@;-[a][a]", |
|
"[cH0][c]([cH0])!@;-[NX3]!@;-[a][a]", |
|
"[cH1][c]([cH1])!@;-[NX3]([CX4])!@;-[a][a]", |
|
"[!#1][c]!@;-[SX2]!@;-[c][aH1,aH0]", |
|
"[!#1][NX3H0]!@;-[C](=O)!@;-[O][CH0]", |
|
"[CX3,CX4][CX4H2]!@;-[C](=O)!@;-[O]~[C]", |
|
"[N,O,NH1,OH1][CX4]!@;-[C](=O)!@;-[O]~[C]", |
|
"[C,c][NH]!@;-[C](=S)!@;-[NH][C,c]", |
|
"[aH1][c]([aH1])!@;-[$(S(=O)=O)]!@;-[NX3H0][*]", |
|
"[O]=[C]!@;-[O]!@;-[CX4H0][!#1]", |
|
"[O]=[C]!@;-[NX3H0](A)!@;-[a][cH0]" ] |
|
|
|
smarts = [] |
|
for pattern in patterns: |
|
smart = pybel.Smarts(pattern) |
|
smarts.append(smart) |
|
molecules = 0 |
|
matches = [0]*len(smarts) |
|
mol_matching = 0 |
|
|
|
# repeat through all the files on the command-line |
|
# we can change this to use the glob module as well |
|
# e.g., find all the files in a set of folders |
|
print('filename', 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19, 'didmatch', sep=',' ) |
|
|
|
for argument in sorted(glob.iglob("*.sdf")): |
|
filename, extension = os.path.splitext(argument) |
|
|
|
# read all the molecules from the supplied file |
|
# (i.e. might be more than one molecule in a file) |
|
for mol in pybel.readfile(extension[1:], argument): |
|
molecules += 1 |
|
this_one_matched = False |
|
this_match = [0]*len(smarts) |
|
for i in range(len(smarts)): |
|
count = smarts[i].findall(mol) |
|
if len(count) > 0: |
|
this_one_matched = True |
|
this_match[i] = len(count) |
|
# total matches across the data set |
|
matches[i] += len(count) |
|
if this_one_matched: |
|
mol_matching += 1 |
|
|
|
# print the dihedral angle |
|
print(mol.title, *this_match, this_one_matched, sep=',') |
|
print('total', *matches, mol_matching, molecules, sep=',') |