Skip to content

Instantly share code, notes, and snippets.

@0187773933
Last active October 21, 2022 20:16
Show Gist options
  • Save 0187773933/28ecf8d0308c2d93fbb36afc89b5e89a to your computer and use it in GitHub Desktop.
Save 0187773933/28ecf8d0308c2d93fbb36afc89b5e89a to your computer and use it in GitHub Desktop.
Calculates the Isoelectric Point , Neutral pH , and Net Charge at Some pH of Some Peptide Sequence
#!/usr/bin/env python3
import math
import numpy as np
from decimal import Decimal
AMINO_ACIDS = {
"A": {
"name": "Alanine" ,
"3letter": "Ala",
"sc_mass": 15.0234,
"pka_1": 2.34,
"pka_2": 9.60,
"sc_hphob": 0.5
},
"R": {
"name": "Arginine" ,
"3letter": "Arg",
"sc_mass": 100.0873,
"pka_1": 2.17,
"pka_2": 9.04,
"pka_3": 12.48,
"sc_hphob": 1.81
},
"N": {
"name": "Asparagine",
"3letter": "Asn",
"sc_mass": 58.0292,
"pka_1": 2.02,
"pka_2": 8.80,
"sc_hphob": 0.85
},
"D": {
"name": "Aspartate" ,
"3letter": "Asp",
"sc_mass": 59.0132,
"pka_1": 1.88,
"pka_2": 9.60,
"pka_3": 3.65,
"sc_hphob": 3.64
},
"C": {
"name": "Cysteine" ,
"3letter": "Cys",
"sc_mass": 46.9955,
"pka_1": 1.96,
"pka_2": 10.28,
"pka_3": 8.18,
"sc_hphob": -0.02,
"extco": 125
},
"Q": {
"name": "Glutamine" ,
"3letter": "Gln",
"sc_mass": 72.0448,
"pka_1": 2.17,
"pka_2": 9.13,
"sc_hphob": 0.77
},
"E": {
"name": "Glutamate" ,
"3letter": "Glu",
"sc_mass": 73.0288,
"pka_1": 2.19,
"pka_2": 9.67,
"pka_3": 4.25,
"sc_hphob": 3.63
},
"G": {
"name": "Glycine" ,
"3letter": "Gly",
"sc_mass": 1.0078,
"pka_1": 2.34,
"pka_2": 9.60,
"sc_hphob": 1.15
},
"H": {
"name": "Histidine" ,
"3letter": "His",
"sc_mass": 81.0452,
"pka_1": 1.82,
"pka_2": 9.17,
"pka_3": 6.0,
"sc_hphob": 2.33
},
"I": {
"name": "Isoleucine" ,
"3letter": "Ile",
"sc_mass": 57.0702,
"pka_1": 2.36,
"pka_2": 9.68,
"sc_hphob": -1.12
},
"L": {
"name": "Leucine" ,
"3letter": "Leu",
"sc_mass": 57.0702,
"pka_1": 2.36,
"pka_2": 9.60,
"sc_hphob": -1.25
},
"K": {
"name": "Lysine" ,
"3letter": "Lys",
"sc_mass": 72.0811,
"pka_1": 2.18,
"pka_2": 8.95,
"pka_3": 10.53,
"sc_hphob": 2.8
},
"M": {
"name": "Methionine" ,
"3letter": "Met",
"sc_mass": 75.0267,
"pka_1": 2.28,
"pka_2": 9.21,
"sc_hphob": -0.67
},
"F": {
"name": "Phenylalanine" ,
"3letter": "Phe",
"sc_mass": 91.0546,
"pka_1": 1.83,
"pka_2": 9.13,
"sc_hphob": -1.71
},
"P": {
"name": "Proline" ,
"3letter": "Pro",
"sc_mass": 41.039,
"pka_1": 1.99,
"pka_2": 9.68,
"sc_hphob": 0.14
},
"S": {
"name": "Serine" ,
"3letter": "Ser",
"sc_mass": 31.0183,
"pka_1": 2.21,
"pka_2": 9.15,
"sc_hphob": 0.46
},
"T": {
"name": "Threonine" ,
"3letter": "Thr",
"sc_mass": 45.0339,
"pka_1": 2.11,
"pka_2": 9.62,
"sc_hphob": 0.25
},
"W": {
"name": "Tryptophan" ,
"3letter": "Trp",
"sc_mass": 130.0655,
"pka_1": 2.38,
"pka_2": 9.38,
"sc_hphob": -2.09,
"extco": 5500
},
"Y": {
"name": "Tyrosine" ,
"3letter": "Tyr",
"sc_mass": 107.0495,
"pka_1": 2.20,
"pka_2": 9.11,
"pka_3": 10.07,
"sc_hphob": -0.71,
"extco": 1490
},
"V": {
"name": "Valine" ,
"3letter": "Val",
"sc_mass": 43.0546,
"pka_1": 2.32,
"pka_2": 9.62,
"sc_hphob": -0.46
}
}
# Amino Acids with Acidic Side Chains
# Aspartate , Glutamate , Cysteine , Tyrosine
ACIDIC_SIDE_CHAINS = [ "D" , "E" , "C" , "Y" ]
# Amino Acids with Basic Side Chains
# Lysine , Arginine , Histidine
BASIC_SIDE_CHAINS = [ "K" , "R" , "H" ]
def calculate_local_charge( pH=7.0 , pKa=3.3 , charge_type="acid" ):
result = 0.0
charge_magnitude = 1.0
if pH < pKa: # protonated
if charge_type == "base":
result = charge_magnitude
elif charge_type == "acid":
result = 0.0
elif pH == pKa: # equilibrium
# assume half will be protonated ,
# and half will be unprotonated
if charge_type == "base":
result = ( charge_magnitude / 2 )
elif charge_type == "acid":
result = -1 * ( charge_magnitude / 2 )
elif pH > pKa: # deprotonated
if charge_type == "base":
result = 0.0
elif charge_type == "acid":
result = ( -1 * charge_magnitude )
return result
def calculate_net_charge_at_pH( peptide_sequence , pH , printing=False ):
charges = []
first_amino_acid = peptide_sequence[ 0 ]
last_amino_acid = peptide_sequence[ -1 ]
for index , amino_acid in enumerate( peptide_sequence ):
# Special Case for First , N-Terminus
if index == 0:
n_terminus_charge = calculate_local_charge( pH , AMINO_ACIDS[ first_amino_acid ][ "pka_2" ] , "base" )
if printing:
print( f"N-Terminus = {amino_acid} = {n_terminus_charge}" )
charges.append( n_terminus_charge )
# Special Case for Last , C-Terminus
elif index == ( len( peptide_sequence ) - 1 ):
c_terminus_charge = calculate_local_charge( pH , AMINO_ACIDS[ last_amino_acid ][ "pka_1" ] , "acid" )
if printing:
print( f"C-Terminus = {amino_acid} = {c_terminus_charge}" )
charges.append( c_terminus_charge )
# Other Wise , Add only Side Chains
if amino_acid in ACIDIC_SIDE_CHAINS:
side_chain_charge = calculate_local_charge( pH , AMINO_ACIDS[ amino_acid ][ "pka_3" ] , "acid" )
if printing:
print( f"Acidic-Side-Chain = {amino_acid} = {side_chain_charge}" )
charges.append( side_chain_charge )
elif amino_acid in BASIC_SIDE_CHAINS:
side_chain_charge = calculate_local_charge( pH , AMINO_ACIDS[ amino_acid ][ "pka_3" ] , "base" )
if printing:
print( f"Basic-Side-Chain = {amino_acid} = {side_chain_charge}" )
charges.append( side_chain_charge )
net_charge = sum( charges )
if printing:
print( f"Net Charge = {net_charge}" )
return net_charge
# This assumes the peptide sequences is written as N-Terminus on the left , and C-Terminus on the right
# Based on :
# https://www2.tulane.edu/~biochem/WW/pepdrawhelp.html#charge
# https://www2.tulane.edu/~biochem/WW/PepDraw/propertiescalculator.js#227
def calculate_isoelectric_point( peptide_sequence ):
for pH in np.arange( 0 , 14 , 0.01 ):
# print( pH )
net_charge = calculate_net_charge_at_pH( peptide_sequence , pH )
# net_charge_rounded = Decimal( net_charge ).quantize( Decimal( "0.001" ) )
if math.isclose( 0.0 , net_charge , abs_tol=0.008 ):
return pH
# if net_charge_rounded == 0.0:
# print( pH )
return False
def calculate_pI( peptide_sequence ):
iso_electric_point = calculate_isoelectric_point( peptide_sequence )
print( f"Peptide is Neutral @ pH = {iso_electric_point}" )
first_pka = AMINO_ACIDS[ peptide_sequence[ 0 ] ][ "pka_2" ]
middle_pkas = []
for index , amino_acid in enumerate( peptide_sequence ):
if "pka_3" in AMINO_ACIDS[ amino_acid ]:
middle_pkas.append( AMINO_ACIDS[ amino_acid ][ "pka_3" ] )
last_pka = AMINO_ACIDS[ peptide_sequence[ -1 ] ][ "pka_2" ]
pkas = [ first_pka , *middle_pkas , last_pka ]
pkas.sort()
lower_split = [ x for x in pkas if x < iso_electric_point ]
upper_split = [ x for x in pkas if x > iso_electric_point ]
lower_bound_pka = lower_split[ -1 ]
upper_bound_pka = upper_split[ 0 ]
pI = ( ( lower_bound_pka + upper_bound_pka ) / 2 )
print( "pKas =" , pkas )
print( f"pI = ( ( {lower_bound_pka} + {upper_bound_pka} ) / 2 ) = {pI}" )
if __name__ == "__main__":
peptide_sequence = "KPEHD"
calculate_pI( peptide_sequence )
calculate_net_charge_at_pH( peptide_sequence , 10.0 , True )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment