sczizzo · March 15, 2013 03:19
diff --git a/analyze_ph.py b/analyze_ph.py
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 from __future__ import division
 from marshal import dump, load
 from random import sample
 from glob import glob
 from pdb import *
 import subprocess as sub
 import sys
 import re


 def run_experiment(name, residues, interests):
  left, right = [], []
  matching_names = map(lambda i: i[0], interests)
  for residue in residues:
    for interest in interests:
      res_name, a1n, a2n, a3n, a4n, divide = interest
      if name != res_name: continue
      if res_name == residue['resName']:
        distance = residue_distance(residue, a3n, a4n)
        if distance == None:
          continue
        elif distance < divide:
          left.append(distance)
        elif distance >= divide:
          right.append(distance)
  return left, right

 def analyze_result(left, right, mind=2.0, maxd=3.0, nbins=50):
  data, labels = [], []
  left         = filter(lambda d: mind < d < maxd, left)
  right        = filter(lambda d: mind < d < maxd, right)
  med_left     = median(left)
  med_right    = median(right)
  delta        = med_right - med_left
  print("PeakL: %.2f Angstrom, PeakR: %.2f Angstrom, Delta: %.2f Angstrom" %
    (med_left, med_right, delta))
  data.append(left)
  data.append(right)
  labels.append("left")
  labels.append("right")
  figure(figsize=[11, 8.5])
  hist(data, histtype='bar', bins=nbins, label=labels)
  legend()
  grid()



 interests = [
  ['ASP', 'CB', 'CG', 'OD1', 'OD2', 2.16],
  ['GLU', 'CG', 'CD', 'OE1', 'OE2', 2.16],
  ['ASN', 'CB', 'CG', 'OD1', 'ND2', 2.20],
  ['GLN', 'CG', 'CD', 'OE1', 'NE2', 2.20],
  ['VAL', 'CA', 'CB', 'CG1', 'CG2', None]
 ]

 low_ph_residues  = []
 high_ph_residues = []

 LOW_PH  = 3.5
 HIGH_PH = 8.5
 nprocd  = 0
 nrpdb   = glob('nrpdb_ph/*')
 for pdb_fname in nrpdb:
  short_name = pdb_fname.split('/')[-1]
  raw_ph = sub.check_output("./ph.sh " + pdb_fname, shell=True)
  try:
    ph = float(raw_ph)
  except: continue
  if ph < LOW_PH:
    low_ph_residues += extract_models(pdb_fname)[0]['residues']
  if ph > HIGH_PH:
    high_ph_residues += extract_models(pdb_fname)[0]['residues']
  if nprocd > 0 and nprocd % 25 == 0: print("%d processed" % nprocd)
  nprocd += 1

 print("Running experiments for low pH (< %.2f) ..." % LOW_PH)
 low_asp_l, low_asp_r = run_experiment('ASP', low_ph_residues, interests)
 low_asn_l, low_asn_r = run_experiment('ASN', low_ph_residues, interests)
 low_glu_l, low_glu_r = run_experiment('GLU', low_ph_residues, interests)
 low_gln_l, low_gln_r = run_experiment('GLN', low_ph_residues, interests)

 print("Running experiments for high pH (> %.2f) ..." % HIGH_PH)
 high_asp_l, high_asp_r = run_experiment('ASP', high_ph_residues, interests)
 high_asn_l, high_asn_r = run_experiment('ASN', high_ph_residues, interests)
 high_glu_l, high_glu_r = run_experiment('GLU', high_ph_residues, interests)
 high_gln_l, high_gln_r = run_experiment('GLN', high_ph_residues, interests)
	#!/usr/bin/env python
	# -- coding: UTF-8 --
	from __future__ import division
	from marshal import dump, load
	from random import sample
	from glob import glob
	from pdb import *
	import subprocess as sub
	import sys
	import re


	def run_experiment(name, residues, interests):
	left, right = [], []
	matching_names = map(lambda i: i[0], interests)
	for residue in residues:
	for interest in interests:
	res_name, a1n, a2n, a3n, a4n, divide = interest
	if name != res_name: continue
	if res_name == residue['resName']:
	distance = residue_distance(residue, a3n, a4n)
	if distance == None:
	continue
	elif distance < divide:
	left.append(distance)
	elif distance >= divide:
	right.append(distance)
	return left, right

	def analyze_result(left, right, mind=2.0, maxd=3.0, nbins=50):
	data, labels = [], []
	left = filter(lambda d: mind < d < maxd, left)
	right = filter(lambda d: mind < d < maxd, right)
	med_left = median(left)
	med_right = median(right)
	delta = med_right - med_left
	print("PeakL: %.2f Angstrom, PeakR: %.2f Angstrom, Delta: %.2f Angstrom" %
	(med_left, med_right, delta))
	data.append(left)
	data.append(right)
	labels.append("left")
	labels.append("right")
	figure(figsize=[11, 8.5])
	hist(data, histtype='bar', bins=nbins, label=labels)
	legend()
	grid()



	interests = [
	['ASP', 'CB', 'CG', 'OD1', 'OD2', 2.16],
	['GLU', 'CG', 'CD', 'OE1', 'OE2', 2.16],
	['ASN', 'CB', 'CG', 'OD1', 'ND2', 2.20],
	['GLN', 'CG', 'CD', 'OE1', 'NE2', 2.20],
	['VAL', 'CA', 'CB', 'CG1', 'CG2', None]
	]

	low_ph_residues = []
	high_ph_residues = []

	LOW_PH = 3.5
	HIGH_PH = 8.5
	nprocd = 0
	nrpdb = glob('nrpdb_ph/*')
	for pdb_fname in nrpdb:
	short_name = pdb_fname.split('/')[-1]
	raw_ph = sub.check_output("./ph.sh " + pdb_fname, shell=True)
	try:
	ph = float(raw_ph)
	except: continue
	if ph < LOW_PH:
	low_ph_residues += extract_models(pdb_fname)[0]['residues']
	if ph > HIGH_PH:
	high_ph_residues += extract_models(pdb_fname)[0]['residues']
	if nprocd > 0 and nprocd % 25 == 0: print("%d processed" % nprocd)
	nprocd += 1

	print("Running experiments for low pH (< %.2f) ..." % LOW_PH)
	low_asp_l, low_asp_r = run_experiment('ASP', low_ph_residues, interests)
	low_asn_l, low_asn_r = run_experiment('ASN', low_ph_residues, interests)
	low_glu_l, low_glu_r = run_experiment('GLU', low_ph_residues, interests)
	low_gln_l, low_gln_r = run_experiment('GLN', low_ph_residues, interests)

	print("Running experiments for high pH (> %.2f) ..." % HIGH_PH)
	high_asp_l, high_asp_r = run_experiment('ASP', high_ph_residues, interests)
	high_asn_l, high_asn_r = run_experiment('ASN', high_ph_residues, interests)
	high_glu_l, high_glu_r = run_experiment('GLU', high_ph_residues, interests)
	high_gln_l, high_gln_r = run_experiment('GLN', high_ph_residues, interests)