Created
February 21, 2014 17:13
-
-
Save JoaoRodrigues/9138606 to your computer and use it in GitHub Desktop.
CNS Topology to PDB file - loads charges into b-factor column and parses bonds to CONECT statements
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Replaces the bfactor column of a PDB file | |
with the charge information present in a | |
CNS topology file. | |
Atoms missing from the topology are automatically | |
assigned a set charge (default 0.0) | |
If reading a cns top file, parses bond information | |
to connect records at the end of the output file. | |
""" | |
DEFAULT_CHARGE=0.0 | |
## DO NOT EDIT BELOW ## | |
import sys | |
import os | |
import re | |
USAGE = "USAGE: %s file.pdb cns_file.top" %sys.argv[0] | |
def _write_error_message(message, show_usage=False): | |
"""Handles error messages""" | |
sys.stderr.write(message+"\n") | |
if show_usage: | |
sys.stderr.write('\n'+USAGE+'\n') | |
sys.exit(1) | |
try: | |
from Bio.PDB import PDBParser | |
from Bio.PDB import PDBIO | |
except ImportError: | |
_write_error_message("Biopython PDB module could not be imported. Check your PYTHONPATH.") | |
def _parse_cns_psf(psf_data): | |
charge_dict = {} | |
for line in psf_data: | |
fields = line.split() | |
if len(fields) == 8 and fields[0].isdigit(): | |
atom_name = fields[4][1:-1].strip() | |
atom_charge = float(fields[6]) | |
charge_dict[atom_name] = atom_charge | |
return charge_dict | |
def _parse_cns_topology(top_data): | |
""" | |
Parses a CNS topology file, retrieving only the lines | |
that contain CHARGE= fields and BONDs. | |
""" | |
charge_re = re.compile('CHARGE=([\-0-9\. ]+)') | |
charge_dict = {} | |
bonds_list = [] | |
for line in top_data: | |
line = line.upper() | |
if 'CHARGE=' in line: | |
atom_name = line.split()[1].strip() | |
s_atom_charge = re.search(charge_re, line).group(1) | |
atom_charge = float(s_atom_charge) | |
charge_dict[atom_name] = atom_charge | |
elif line.startswith('BOND'): | |
atom_pair = tuple(line.strip().split()[1:]) | |
bonds_list.append(atom_pair) | |
return (bonds_list, charge_dict) | |
def _replace_bfactor(structure, mapping): | |
""" | |
Uses an atom name -> number mapping to | |
replace the bfactor column with. | |
""" | |
for atom in structure.get_atoms(): | |
try: | |
atom.bfactor = mapping[atom.name] | |
except KeyError: | |
print "Atom not found in topology: %s" %atom.name | |
atom.bfactor = DEFAULT_CHARGE | |
return structure | |
if __name__ == '__main__': | |
args = sys.argv[1:] | |
if len(args) != 2: | |
_write_error_message('Incorrect number of arguments.', True) | |
for f in args: | |
if not os.path.exists(f): | |
_write_error_message('File not found: %s' %f) | |
pdb_path = args[0] | |
top_path = args[1] | |
try: | |
P = PDBParser(QUIET=1) | |
except AttributeError: | |
P = PDBParser() | |
top_file = open(top_path) | |
if top_path.endswith('psf'): | |
bonds = None # We could parse it from the PSF too.. | |
charges = _parse_cns_psf(top_file) | |
elif top_path.endswith('top') or top_path.endswith('TOP'): | |
bonds, charges = _parse_cns_topology(top_file) | |
else: | |
sys.exit(1) | |
top_file.close() | |
struct = P.get_structure('structure', pdb_path) | |
new_struct = _replace_bfactor(struct, charges) | |
nname = "%s.pdb" %pdb_path.replace('.pdb', '_top') | |
try: | |
io = PDBIO() | |
io.set_structure(new_struct) | |
io.save(nname, write_end=False) | |
except Exception, e: | |
_write_error_message('There was an error saving your output:\n\n%s\n\n' %e) | |
if bonds: | |
atom_to_serial = dict([ (atom.name, atom.serial_number) for atom in new_struct.get_atoms()]) | |
pdb_handle = open(nname, 'a') | |
for i, j in bonds: | |
if i in atom_to_serial and j in atom_to_serial: | |
pdb_handle.write('CONECT%5i%5i\n' %(atom_to_serial[i], atom_to_serial[j])) | |
pdb_handle.close() | |
print "File saved at: %s" %nname | |
print "Use 'spectrum b, red_white_blue' in Pymol to visualize the charges" | |
if bonds: | |
print "Use set connect_mode, 1 to visualize only the bonds from the topology" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment