Last active
August 30, 2024 14:57
-
-
Save PatWalters/3c0a483c030a2c75cb22c4234f206973 to your computer and use it in GitHub Desktop.
Split a protein-ligand complex into protein and ligands and assign ligand bond orders using SMILES strings from Ligand Expo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Split a protein-ligand complex into protein and ligands and assign ligand bond orders using SMILES strings from Ligand Export | |
# Code requires Python 3.6 | |
import sys | |
from prody import * | |
import pandas as pd | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
from io import StringIO | |
import requests | |
def read_ligand_expo(): | |
""" | |
Read Ligand Expo data, try to find a file called | |
Components-smiles-stereo-oe.smi in the current directory. | |
If you can't find the file, grab it from the RCSB | |
:return: Ligand Expo as a dictionary with ligand id as the key | |
""" | |
file_name = "Components-smiles-stereo-oe.smi" | |
try: | |
df = pd.read_csv(file_name, sep="\t", | |
header=None, | |
names=["SMILES", "ID", "Name"]) | |
except FileNotFoundError: | |
url = f"http://ligand-expo.rcsb.org/dictionaries/{file_name}" | |
print(url) | |
r = requests.get(url, allow_redirects=True) | |
open('Components-smiles-stereo-oe.smi', 'wb').write(r.content) | |
df = pd.read_csv(file_name, sep="\t", | |
header=None, | |
names=["SMILES", "ID", "Name"]) | |
df.set_index("ID", inplace=True) | |
return df.to_dict() | |
def get_pdb_components(pdb_id): | |
""" | |
Split a protein-ligand pdb into protein and ligand components | |
:param pdb_id: | |
:return: | |
""" | |
pdb = parsePDB(pdb_id) | |
protein = pdb.select('protein') | |
ligand = pdb.select('not protein and not water') | |
return protein, ligand | |
def process_ligand(ligand, res_name, expo_dict): | |
""" | |
Add bond orders to a pdb ligand | |
1. Select the ligand component with name "res_name" | |
2. Get the corresponding SMILES from the Ligand Expo dictionary | |
3. Create a template molecule from the SMILES in step 2 | |
4. Write the PDB file to a stream | |
5. Read the stream into an RDKit molecule | |
6. Assign the bond orders from the template from step 3 | |
:param ligand: ligand as generated by prody | |
:param res_name: residue name of ligand to extract | |
:param expo_dict: dictionary with LigandExpo | |
:return: molecule with bond orders assigned | |
""" | |
output = StringIO() | |
sub_mol = ligand.select(f"resname {res_name}") | |
sub_smiles = expo_dict['SMILES'][res_name] | |
template = AllChem.MolFromSmiles(sub_smiles) | |
writePDBStream(output, sub_mol) | |
pdb_string = output.getvalue() | |
rd_mol = AllChem.MolFromPDBBlock(pdb_string) | |
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol) | |
return new_mol | |
def write_pdb(protein, pdb_name): | |
""" | |
Write a prody protein to a pdb file | |
:param protein: protein object from prody | |
:param pdb_name: base name for the pdb file | |
:return: None | |
""" | |
output_pdb_name = f"{pdb_name}_protein.pdb" | |
writePDB(f"{output_pdb_name}", protein) | |
print(f"wrote {output_pdb_name}") | |
def write_sdf(new_mol, pdb_name, res_name): | |
""" | |
Write an RDKit molecule to an SD file | |
:param new_mol: | |
:param pdb_name: | |
:param res_name: | |
:return: | |
""" | |
outfile_name = f"{pdb_name}_{res_name}_ligand.sdf" | |
writer = Chem.SDWriter(f"{outfile_name}") | |
writer.write(new_mol) | |
print(f"wrote {outfile_name}") | |
def main(pdb_name): | |
""" | |
Read Ligand Expo data, split pdb into protein and ligands, | |
write protein pdb, write ligand sdf files | |
:param pdb_name: id from the pdb, doesn't need to have an extension | |
:return: | |
""" | |
df_dict = read_ligand_expo() | |
protein, ligand = get_pdb_components(pdb_name) | |
write_pdb(protein, pdb_name) | |
res_name_list = list(set(ligand.getResnames())) | |
for res in res_name_list: | |
new_mol = process_ligand(ligand, res, df_dict) | |
write_sdf(new_mol, pdb_name, res) | |
if __name__ == "__main__": | |
if len(sys.argv) == 2: | |
main(sys.argv[1]) | |
else: | |
print("Usage: {sys.argv[1]} pdb_id", file=sys.stderr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am trying to identify the functional groups and separate some molecules into different parts. How do I go about it? thanks