Created
October 2, 2018 01:20
-
-
Save PatWalters/c046fee2760e6894ed13e19b8c99193b to your computer and use it in GitHub Desktop.
An improved script to extract a ligand from a protein-ligand complex and assign bond orders
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
from prody import * | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
from io import StringIO | |
import pypdb | |
def get_pdb_components(pdb_id): | |
""" | |
Split a protein-ligand pdb into protein and ligand components | |
:param pdb_id: | |
:return: | |
""" | |
pdb = parsePDB(pdb_id) | |
protein = pdb.select('protein') | |
ligand = pdb.select('not protein and not water') | |
return protein, ligand | |
def process_ligand(ligand, res_name): | |
""" | |
Add bond orders to a pdb ligand | |
1. Select the ligand component with name "res_name" | |
2. Get the corresponding SMILES from pypdb | |
3. Create a template molecule from the SMILES in step 2 | |
4. Write the PDB file to a stream | |
5. Read the stream into an RDKit molecule | |
6. Assign the bond orders from the template from step 3 | |
:param ligand: ligand as generated by prody | |
:param res_name: residue name of ligand to extract | |
:return: molecule with bond orders assigned | |
""" | |
output = StringIO() | |
sub_mol = ligand.select(f"resname {res_name}") | |
chem_desc = pypdb.describe_chemical(f"{res_name}") | |
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"] | |
template = AllChem.MolFromSmiles(sub_smiles) | |
writePDBStream(output, sub_mol) | |
pdb_string = output.getvalue() | |
rd_mol = AllChem.MolFromPDBBlock(pdb_string) | |
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol) | |
return new_mol | |
def write_pdb(protein, pdb_name): | |
""" | |
Write a prody protein to a pdb file | |
:param protein: protein object from prody | |
:param pdb_name: base name for the pdb file | |
:return: None | |
""" | |
output_pdb_name = f"{pdb_name}_protein.pdb" | |
writePDB(f"{output_pdb_name}", protein) | |
print(f"wrote {output_pdb_name}") | |
def write_sdf(new_mol, pdb_name, res_name): | |
""" | |
Write an RDKit molecule to an SD file | |
:param new_mol: | |
:param pdb_name: | |
:param res_name: | |
:return: | |
""" | |
outfile_name = f"{pdb_name}_{res_name}_ligand.sdf" | |
writer = Chem.SDWriter(f"{outfile_name}") | |
writer.write(new_mol) | |
print(f"wrote {outfile_name}") | |
def main(pdb_name): | |
""" | |
Read Ligand Expo data, split pdb into protein and ligands, | |
write protein pdb, write ligand sdf files | |
:param pdb_name: id from the pdb, doesn't need to have an extension | |
:return: | |
""" | |
protein, ligand = get_pdb_components(pdb_name) | |
write_pdb(protein, pdb_name) | |
res_name_list = list(set(ligand.getResnames())) | |
for res in res_name_list: | |
new_mol = process_ligand(ligand, res) | |
write_sdf(new_mol, pdb_name, res) | |
if __name__ == "__main__": | |
if len(sys.argv) == 2: | |
main(sys.argv[1]) | |
else: | |
print("Usage: {sys.argv[1]} pdb_id", file=sys.stderr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Dear Dr. Walters,
Thank you for sharing this script; it works like a charm! The only thing I had to update was due to the changes in pypdb. Instead of
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
I used