Created
March 15, 2013 03:19
-
-
Save sczizzo/5167248 to your computer and use it in GitHub Desktop.
In-depth analysis of the sidechain separation distance in a set of PDB files separated by pH
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: UTF-8 -*- | |
from __future__ import division | |
from marshal import dump, load | |
from random import sample | |
from glob import glob | |
from pdb import * | |
import subprocess as sub | |
import sys | |
import re | |
def run_experiment(name, residues, interests): | |
left, right = [], [] | |
matching_names = map(lambda i: i[0], interests) | |
for residue in residues: | |
for interest in interests: | |
res_name, a1n, a2n, a3n, a4n, divide = interest | |
if name != res_name: continue | |
if res_name == residue['resName']: | |
distance = residue_distance(residue, a3n, a4n) | |
if distance == None: | |
continue | |
elif distance < divide: | |
left.append(distance) | |
elif distance >= divide: | |
right.append(distance) | |
return left, right | |
def analyze_result(left, right, mind=2.0, maxd=3.0, nbins=50): | |
data, labels = [], [] | |
left = filter(lambda d: mind < d < maxd, left) | |
right = filter(lambda d: mind < d < maxd, right) | |
med_left = median(left) | |
med_right = median(right) | |
delta = med_right - med_left | |
print("PeakL: %.2f Angstrom, PeakR: %.2f Angstrom, Delta: %.2f Angstrom" % | |
(med_left, med_right, delta)) | |
data.append(left) | |
data.append(right) | |
labels.append("left") | |
labels.append("right") | |
figure(figsize=[11, 8.5]) | |
hist(data, histtype='bar', bins=nbins, label=labels) | |
legend() | |
grid() | |
interests = [ | |
['ASP', 'CB', 'CG', 'OD1', 'OD2', 2.16], | |
['GLU', 'CG', 'CD', 'OE1', 'OE2', 2.16], | |
['ASN', 'CB', 'CG', 'OD1', 'ND2', 2.20], | |
['GLN', 'CG', 'CD', 'OE1', 'NE2', 2.20], | |
['VAL', 'CA', 'CB', 'CG1', 'CG2', None] | |
] | |
low_ph_residues = [] | |
high_ph_residues = [] | |
LOW_PH = 3.5 | |
HIGH_PH = 8.5 | |
nprocd = 0 | |
nrpdb = glob('nrpdb_ph/*') | |
for pdb_fname in nrpdb: | |
short_name = pdb_fname.split('/')[-1] | |
raw_ph = sub.check_output("./ph.sh " + pdb_fname, shell=True) | |
try: | |
ph = float(raw_ph) | |
except: continue | |
if ph < LOW_PH: | |
low_ph_residues += extract_models(pdb_fname)[0]['residues'] | |
if ph > HIGH_PH: | |
high_ph_residues += extract_models(pdb_fname)[0]['residues'] | |
if nprocd > 0 and nprocd % 25 == 0: print("%d processed" % nprocd) | |
nprocd += 1 | |
print("Running experiments for low pH (< %.2f) ..." % LOW_PH) | |
low_asp_l, low_asp_r = run_experiment('ASP', low_ph_residues, interests) | |
low_asn_l, low_asn_r = run_experiment('ASN', low_ph_residues, interests) | |
low_glu_l, low_glu_r = run_experiment('GLU', low_ph_residues, interests) | |
low_gln_l, low_gln_r = run_experiment('GLN', low_ph_residues, interests) | |
print("Running experiments for high pH (> %.2f) ..." % HIGH_PH) | |
high_asp_l, high_asp_r = run_experiment('ASP', high_ph_residues, interests) | |
high_asn_l, high_asn_r = run_experiment('ASN', high_ph_residues, interests) | |
high_glu_l, high_glu_r = run_experiment('GLU', high_ph_residues, interests) | |
high_gln_l, high_gln_r = run_experiment('GLN', high_ph_residues, interests) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment