Created
March 19, 2015 17:10
-
-
Save JoaoRodrigues/3442d0c404f1feb1c0cb to your computer and use it in GitHub Desktop.
Calculates per residue displacement in an ensemble of structures (based on CA atoms)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Analyzes the RMSD on a residue basis for an ensemble | |
of protein structures. | |
""" | |
from __future__ import print_function, division | |
import argparse | |
import os | |
import sys | |
import numpy as np | |
from Bio.PDB import PDBParser, Superimposer, PDBIO | |
from Bio.PDB.Polypeptide import is_aa | |
if __name__ == '__main__': | |
ap = argparse.ArgumentParser(description=__doc__) | |
ap.add_argument('ensemble', type=str, | |
help='PDB file of the ensemble of structures') | |
cmd = ap.parse_args() | |
pdbf = cmd.ensemble | |
if not os.path.isfile(pdbf): | |
raise Exception('Could not read PDB file: {0}'.format(pdbf)) | |
# Parse ensemble | |
p = PDBParser(QUIET=1) | |
s = p.get_structure('xyz', os.path.abspath(pdbf)) | |
print('[+] Read PDB file: {0}'.format(pdbf)) | |
# Get reference atoms (first model) | |
# Use CA only | |
refe = s[0] | |
refe_atoms = [res['CA'] for res in refe.get_residues() if is_aa(res)] | |
print('[+] Calculating r.m.s. on {0} CA atoms'.format(len(refe_atoms))) | |
# Superimpose all structures on the first model | |
si = Superimposer() | |
ca_distances = [] | |
for model in s: | |
model_atoms = [res['CA'] for res in model.get_residues() if is_aa(res)] | |
si.set_atoms(refe_atoms, model_atoms) | |
if model.id == refe.id: | |
#Check for self/self get zero RMS, zero translation | |
#and identity matrix for the rotation. | |
assert np.abs(si.rms) < 0.0000001 | |
assert np.max(np.abs(si.rotran[1])) < 0.000001 | |
assert np.max(np.abs(si.rotran[0]) - np.identity(3)) < 0.000001 | |
else: | |
si.apply(model.get_atoms()) | |
print('[+] Model {0} r.m.s. = {1:3.2f}'.format(model.id+1, si.rms)) | |
# For every CA atom calculate distance to reference equivalent CA | |
ca_distances.append( [i-j for i,j in zip(refe_atoms, model_atoms)] ) | |
# Calculate average displacement | |
n_models = len(ca_distances) | |
ca_distances = zip(*ca_distances) | |
ca_rmsd = [(sum(map(lambda x: x**2, i))/n_models)**0.5 for i in ca_distances] | |
# Normalize displacement | |
max_rmsd = max(ca_rmsd) | |
min_rmsd = min(ca_rmsd) | |
norm_ca_rmsd = [(rmsd-min_rmsd)/max_rmsd for rmsd in ca_rmsd] | |
# Load as bfactors in first model | |
for residue, res_displacement in zip(refe.get_residues(), norm_ca_rmsd): | |
if is_aa(residue): | |
new_bfactor = res_displacement | |
else: | |
new_bfactor = 0.0 | |
for atom in residue: | |
atom.bfactor = new_bfactor | |
# Save refe as new PDB file | |
io = PDBIO() | |
io.set_structure(refe) | |
io.save('{0}.pdb'.format(pdbf[:-4]+'_per-residue-rmsd')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment