Skip to content

Instantly share code, notes, and snippets.

@jonathanking
Last active March 8, 2023 20:07
Show Gist options
  • Save jonathanking/c301285f871c370e127782228ee9b258 to your computer and use it in GitHub Desktop.
Save jonathanking/c301285f871c370e127782228ee9b258 to your computer and use it in GitHub Desktop.
Loads protein from pdb into SCNProtein.
import prody
import sidechainnet as scn
from sidechainnet.utils.download import get_resolution_from_pdbid
def from_pdb(filename, pdbid="", include_resolution=False):
"""Create a SCNProtein from a PDB file. Warning: does not support gapped proteins!!
Args:
filename (str): Path to existing PDB file.
pdbid (str): 4-letter string representing the PDB Identifier.
include_resolution (bool, default=False): If True, query the PDB for the protein
structure resolution based off of the given pdb_id.
Returns:
A SCNProtein object containing the coorinates, angles, and sequence parsed
from the PDB file.
"""
# TODO: Raise an alarm if the user is working with files that have gaps
# First, use Prody to parse the PDB file
chain = prody.parsePDB(filename)
# Next, use SidechainNet to make the relevant measurements given the Prody chain obj
(dihedrals_np, coords_np, observed_sequence, unmodified_sequence,
is_nonstd) = scn.utils.measure.get_seq_coords_and_angles(chain,
replace_nonstd=True)
scndata = {
"coordinates": coords_np.reshape(len(observed_sequence), -1, 3),
"angles": dihedrals_np,
"sequence": observed_sequence,
"unmodified_seq": unmodified_sequence,
"mask": "+" * len(observed_sequence),
"is_modified": is_nonstd,
"id": pdbid,
}
# If requested, look up the resolution of the given PDB ID
if include_resolution:
assert pdbid, "You must provide a PDB ID to look up the resolution."
scndata['resolution'] = get_resolution_from_pdbid(pdbid)
return scn.SCNProtein(**scndata)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment