Created
October 29, 2014 00:27
-
-
Save JoaoRodrigues/02181f993b0a48a213dd to your computer and use it in GitHub Desktop.
Sampling Analysis in BioPython
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -* | |
""" | |
Utility script to analyze several docking poses of a protein complex. | |
Produces PDB file with color-coded ligand center-of-mass positions | |
that reflect the population density (i.e. number of neighbor models). | |
Works with multiple chain models by considering as "ligand" all chains | |
defined as NOT part of the receptor. Default, chain A is the only receptor. | |
João Rodrigues @ Utrecht, 2014 | |
""" | |
import copy | |
import os | |
import sys | |
try: | |
from Bio.PDB import PDBParser | |
from Bio.PDB import PDBIO | |
from Bio.PDB import Superimposer | |
from Bio.PDB import StructureBuilder | |
from Bio.PDB import NeighborSearch | |
except ImportError: | |
raise Exception('Could not import Biopython PDB modules') | |
# Main Code | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser(description='Analyse sampling of docking runs.') | |
# Define args | |
parser.add_argument('structures', metavar='pdb_file(s)', type=str, nargs='+', | |
help='Docking models containing all chains') | |
# Define options | |
parser.add_argument('--output', metavar='output.pdb', type=str, | |
default='sampling.pdb', | |
help='Name of the output analysis PDB file') | |
parser.add_argument('--receptor', metavar='A', type=str, nargs='+', | |
default='A', | |
help='Chains to consider as receptor') | |
parser.add_argument('--radius', metavar='N Å', type=float, | |
default=10.0, | |
help='Radius in Å to consider neighboring ligand COMs') | |
ui = parser.parse_args() # user input | |
pdb_parser = PDBParser(QUIET=1, PERMISSIVE=1) | |
# Start analysis | |
print "= Starting ..." | |
lst_models = [] | |
for pdb_model in ui.structures: | |
fpath = os.path.abspath(pdb_model) | |
if not fpath.endswith('.pdb'): | |
print >>sys.stderr, "'{0}' is not a PDB file. Ignoring...".format(fpath) | |
continue | |
lst_models.append(fpath) | |
print "\t Read {0} models".format(len(lst_models)) | |
# Fit on receptor chains of the first model | |
# Select only CA or P atoms | |
print "= Extracting reference atoms from first model" | |
receptor_chains = set(ui.receptor) | |
print "\tUsing chains as receptor: {0}".format(','.join(receptor_chains)) | |
refe_atoms = [] | |
refe_path = lst_models[0] | |
refe_model = pdb_parser.get_structure(os.path.basename(refe_path), refe_path) | |
for residue in refe_model.get_residues(): | |
if residue.parent.id in receptor_chains: | |
if residue.id[0] == ' ' and 'CA' in residue: | |
refe_atoms.append(residue['CA']) | |
elif residue.id[0] == ' ' and 'P' in residue: | |
refe_atoms.append(residue['P']) | |
if not refe_atoms: | |
msg = '\tNo "CA" or "P" atoms found in first model for the specified chain(s)' | |
print >>sys.stderr, msg | |
sys.exit(1) | |
# Create 'sampling' structure with receptor chains of first model | |
sb = StructureBuilder.StructureBuilder() | |
sb.init_structure('sampling') | |
sb.init_model(0) | |
sb.init_seg('') | |
for chain in refe_model.get_chains(): | |
if chain.id in receptor_chains: | |
sb.model.add(chain) | |
# Free space? | |
del refe_model | |
# Fit to reference | |
si = Superimposer() | |
print "= Fitting the remaining models" | |
ligand_atoms = [] | |
for model_path in lst_models: | |
model = pdb_parser.get_structure(os.path.basename(model_path), model_path) | |
mobi_atoms = [] | |
ligand_atoms.append([]) | |
for residue in model.get_residues(): | |
if residue.parent.id in receptor_chains: | |
if residue.id[0] == ' ' and 'CA' in residue: | |
mobi_atoms.append(residue['CA']) | |
elif residue.id[0] == ' ' and 'P' in residue: | |
mobi_atoms.append(residue['P']) | |
else: # collect ligand atoms | |
for atom in residue: | |
ligand_atoms[-1].append(atom) | |
if not mobi_atoms: | |
msg = '\tNo "CA" or "P" atoms found in "{0}" for the specified chain(s). Skipping.' | |
print >>sys.stderr, msg.format(model.id) | |
continue | |
si.set_atoms(refe_atoms, mobi_atoms) | |
si.apply(model.get_atoms()) | |
# Free Space? | |
del model | |
# C. Calculate rotated ligand center-of-mass | |
# D. Append COM as a residue of chain X for the sampling.pdb structure. | |
print "= Calculating ligand center-of-mass" | |
sb.init_chain('X') | |
ligand_coms = [] | |
modifier = 0 # id modifier for >9999 models | |
for imodel, atom_lst in enumerate(ligand_atoms, start=1): | |
coord_lst = [a.coord for a in atom_lst] | |
ligand_com = sum(coord_lst)/len(coord_lst) | |
# Create 'virtual' atom with COM coordinates | |
# Show as oxygen since red color is nice to visualize. | |
# Avoid numbering issues.. | |
# Im leaving this like this.. who wants to analyze more than 20k positions.. | |
if imodel >= 9999: | |
modifier = -9998 | |
sb.init_chain('Y') | |
sb.init_residue('COM', ' ', imodel+modifier, ' ') | |
sb.init_atom('XYZ', ligand_com, 0.0, 0.0, ' ', 'XYZ', 1, 'O') | |
ligand_coms.append(sb.atom) | |
# Analyze COM distances | |
print "= Analyzing COM distances" | |
nb = NeighborSearch(ligand_coms) | |
nneighbors = [] | |
for com in ligand_coms: | |
nn = nb.search(com.coord, ui.radius, level='R') | |
# minimum 1 (itself) | |
nneighbors.append(len(nn)) | |
# Normalize neighbor counts | |
max_nn = float(max(nneighbors)) | |
min_nn = min(nneighbors) | |
nneighbors = [(n-min_nn)/max_nn for n in nneighbors] | |
print '\tLarger Cluster: {0} elements'.format(int(max_nn)) | |
# Add to bfactor | |
for com, nn in zip(ligand_coms, nneighbors): | |
com.bfactor = nn | |
# Write sampling.pdb | |
print "= Writing analysis file to: {0}".format(ui.output) | |
io = PDBIO() | |
output_structure = sb.get_structure() | |
io.set_structure(output_structure) | |
io.save(ui.output) | |
print "= Writing Pymol visualization file" | |
with open('sampling.pml', 'w') as handle: | |
print >>handle, 'color white' | |
print >>handle, 'spectrum b, rainbow, chain X+Y' | |
print >>handle, 'as spheres, chain X+Y' | |
print >>handle, 'as surface, not chain X+Y' | |
print >>handle, 'center not chain X+Y' | |
print >>handle, 'zoom' | |
print "= End" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment