Skip to content

Instantly share code, notes, and snippets.

@sczizzo
Created March 15, 2013 02:47
Show Gist options
  • Select an option

  • Save sczizzo/5167140 to your computer and use it in GitHub Desktop.

Select an option

Save sczizzo/5167140 to your computer and use it in GitHub Desktop.
Analyze sidechain separation and volume in PDB files
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from marshal import dump, load
from random import sample
from glob import glob
from pdb import *
import sys
def run_experiment(residues, interests, ss_code=None, mind=0.0, maxd=100.0):
results = []
for interest in interests:
distances, volumes = [], []
res_name, a1n, a2n, a3n, a4n = interest
matching_residues = filter(lambda r: r['resName'] == res_name, residues)
for residue in matching_residues:
if ss_code != None and residue['ssCode'] != ss_code: continue
distance = residue_distance(residue, a3n, a4n)
volume = residue_volume(residue, a1n, a2n, a3n, a4n)
if distance != None: distances.append(distance)
else:
print("Error on %s (%.2f)" % (res_name, residue['resNum'], find_atom('HD2')))
if volume != None: volumes.append(volume)
results.append([res_name, distances, volumes])
return results
def analyze_result(data, mind=0.0, maxd=1000.0, bins=25):
try:
print("Average: %.3f" % average(data))
print("Median: %.3f" % median(data))
rdata = filter(lambda x: mind < x < maxd, data)
hist(rdata, bins)
except: return
interests = [
['ASP', 'CB', 'CG', 'OD1', 'OD2'],
['GLU', 'CG', 'CD', 'OE1', 'OE2'],
# ['ASN', 'CB', 'CG', 'OD1', 'ND2'],
# ['GLN', 'CG', 'CD', 'OE1', 'NE2'],
# ['VAL', 'CA', 'CB', 'CG1', 'CG2']
]
try:
__IPYTHON__
is_ipy = True
except: is_ipy = False
if is_ipy or len(sys.argv) == 1: num_samples = 512
else: num_samples = int(sys.argv[1])
residue_data = 'r%d.pydata' % num_samples
if is_ipy:
residues_file = open(residue_data, 'rb')
all_residues = load(residues_file)
residues_file.close()
aspN, gluN, asnN, glnN, valN = run_experiment(all_residues, interests, ss_code=None)
aspL, gluL, asnL, glnL, valL = run_experiment(all_residues, interests, ss_code='loop')
aspH, gluH, asnH, glnH, valH = run_experiment(all_residues, interests, ss_code='helix')
aspS, gluS, asnS, glnS, valS = run_experiment(all_residues, interests, ss_code='sheet')
aspU, gluU, asnU, glnU, valU = run_experiment(all_residues, interests, ss_code='unknown')
else:
print('Starting on %s...' % residue_data)
all_residues = []
nrpdb = sample(glob('nrpdb/*'), k=num_samples)
nprocd = 0
for pdb_fname in nrpdb:
if nprocd > 0 and nprocd % 25 == 0: print("%d processed" % nprocd)
all_residues += extract_models(pdb_fname)[0]['residues']
nprocd += 1
print('Done. Writing to file...')
residues_file = open(residue_data, 'w+b')
dump(all_residues, residues_file)
residues_file.close()
print('Done.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment