Last active
October 21, 2022 20:16
-
-
Save 0187773933/28ecf8d0308c2d93fbb36afc89b5e89a to your computer and use it in GitHub Desktop.
Calculates the Isoelectric Point , Neutral pH , and Net Charge at Some pH of Some Peptide Sequence
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import math | |
import numpy as np | |
from decimal import Decimal | |
AMINO_ACIDS = { | |
"A": { | |
"name": "Alanine" , | |
"3letter": "Ala", | |
"sc_mass": 15.0234, | |
"pka_1": 2.34, | |
"pka_2": 9.60, | |
"sc_hphob": 0.5 | |
}, | |
"R": { | |
"name": "Arginine" , | |
"3letter": "Arg", | |
"sc_mass": 100.0873, | |
"pka_1": 2.17, | |
"pka_2": 9.04, | |
"pka_3": 12.48, | |
"sc_hphob": 1.81 | |
}, | |
"N": { | |
"name": "Asparagine", | |
"3letter": "Asn", | |
"sc_mass": 58.0292, | |
"pka_1": 2.02, | |
"pka_2": 8.80, | |
"sc_hphob": 0.85 | |
}, | |
"D": { | |
"name": "Aspartate" , | |
"3letter": "Asp", | |
"sc_mass": 59.0132, | |
"pka_1": 1.88, | |
"pka_2": 9.60, | |
"pka_3": 3.65, | |
"sc_hphob": 3.64 | |
}, | |
"C": { | |
"name": "Cysteine" , | |
"3letter": "Cys", | |
"sc_mass": 46.9955, | |
"pka_1": 1.96, | |
"pka_2": 10.28, | |
"pka_3": 8.18, | |
"sc_hphob": -0.02, | |
"extco": 125 | |
}, | |
"Q": { | |
"name": "Glutamine" , | |
"3letter": "Gln", | |
"sc_mass": 72.0448, | |
"pka_1": 2.17, | |
"pka_2": 9.13, | |
"sc_hphob": 0.77 | |
}, | |
"E": { | |
"name": "Glutamate" , | |
"3letter": "Glu", | |
"sc_mass": 73.0288, | |
"pka_1": 2.19, | |
"pka_2": 9.67, | |
"pka_3": 4.25, | |
"sc_hphob": 3.63 | |
}, | |
"G": { | |
"name": "Glycine" , | |
"3letter": "Gly", | |
"sc_mass": 1.0078, | |
"pka_1": 2.34, | |
"pka_2": 9.60, | |
"sc_hphob": 1.15 | |
}, | |
"H": { | |
"name": "Histidine" , | |
"3letter": "His", | |
"sc_mass": 81.0452, | |
"pka_1": 1.82, | |
"pka_2": 9.17, | |
"pka_3": 6.0, | |
"sc_hphob": 2.33 | |
}, | |
"I": { | |
"name": "Isoleucine" , | |
"3letter": "Ile", | |
"sc_mass": 57.0702, | |
"pka_1": 2.36, | |
"pka_2": 9.68, | |
"sc_hphob": -1.12 | |
}, | |
"L": { | |
"name": "Leucine" , | |
"3letter": "Leu", | |
"sc_mass": 57.0702, | |
"pka_1": 2.36, | |
"pka_2": 9.60, | |
"sc_hphob": -1.25 | |
}, | |
"K": { | |
"name": "Lysine" , | |
"3letter": "Lys", | |
"sc_mass": 72.0811, | |
"pka_1": 2.18, | |
"pka_2": 8.95, | |
"pka_3": 10.53, | |
"sc_hphob": 2.8 | |
}, | |
"M": { | |
"name": "Methionine" , | |
"3letter": "Met", | |
"sc_mass": 75.0267, | |
"pka_1": 2.28, | |
"pka_2": 9.21, | |
"sc_hphob": -0.67 | |
}, | |
"F": { | |
"name": "Phenylalanine" , | |
"3letter": "Phe", | |
"sc_mass": 91.0546, | |
"pka_1": 1.83, | |
"pka_2": 9.13, | |
"sc_hphob": -1.71 | |
}, | |
"P": { | |
"name": "Proline" , | |
"3letter": "Pro", | |
"sc_mass": 41.039, | |
"pka_1": 1.99, | |
"pka_2": 9.68, | |
"sc_hphob": 0.14 | |
}, | |
"S": { | |
"name": "Serine" , | |
"3letter": "Ser", | |
"sc_mass": 31.0183, | |
"pka_1": 2.21, | |
"pka_2": 9.15, | |
"sc_hphob": 0.46 | |
}, | |
"T": { | |
"name": "Threonine" , | |
"3letter": "Thr", | |
"sc_mass": 45.0339, | |
"pka_1": 2.11, | |
"pka_2": 9.62, | |
"sc_hphob": 0.25 | |
}, | |
"W": { | |
"name": "Tryptophan" , | |
"3letter": "Trp", | |
"sc_mass": 130.0655, | |
"pka_1": 2.38, | |
"pka_2": 9.38, | |
"sc_hphob": -2.09, | |
"extco": 5500 | |
}, | |
"Y": { | |
"name": "Tyrosine" , | |
"3letter": "Tyr", | |
"sc_mass": 107.0495, | |
"pka_1": 2.20, | |
"pka_2": 9.11, | |
"pka_3": 10.07, | |
"sc_hphob": -0.71, | |
"extco": 1490 | |
}, | |
"V": { | |
"name": "Valine" , | |
"3letter": "Val", | |
"sc_mass": 43.0546, | |
"pka_1": 2.32, | |
"pka_2": 9.62, | |
"sc_hphob": -0.46 | |
} | |
} | |
# Amino Acids with Acidic Side Chains | |
# Aspartate , Glutamate , Cysteine , Tyrosine | |
ACIDIC_SIDE_CHAINS = [ "D" , "E" , "C" , "Y" ] | |
# Amino Acids with Basic Side Chains | |
# Lysine , Arginine , Histidine | |
BASIC_SIDE_CHAINS = [ "K" , "R" , "H" ] | |
def calculate_local_charge( pH=7.0 , pKa=3.3 , charge_type="acid" ): | |
result = 0.0 | |
charge_magnitude = 1.0 | |
if pH < pKa: # protonated | |
if charge_type == "base": | |
result = charge_magnitude | |
elif charge_type == "acid": | |
result = 0.0 | |
elif pH == pKa: # equilibrium | |
# assume half will be protonated , | |
# and half will be unprotonated | |
if charge_type == "base": | |
result = ( charge_magnitude / 2 ) | |
elif charge_type == "acid": | |
result = -1 * ( charge_magnitude / 2 ) | |
elif pH > pKa: # deprotonated | |
if charge_type == "base": | |
result = 0.0 | |
elif charge_type == "acid": | |
result = ( -1 * charge_magnitude ) | |
return result | |
def calculate_net_charge_at_pH( peptide_sequence , pH , printing=False ): | |
charges = [] | |
first_amino_acid = peptide_sequence[ 0 ] | |
last_amino_acid = peptide_sequence[ -1 ] | |
for index , amino_acid in enumerate( peptide_sequence ): | |
# Special Case for First , N-Terminus | |
if index == 0: | |
n_terminus_charge = calculate_local_charge( pH , AMINO_ACIDS[ first_amino_acid ][ "pka_2" ] , "base" ) | |
if printing: | |
print( f"N-Terminus = {amino_acid} = {n_terminus_charge}" ) | |
charges.append( n_terminus_charge ) | |
# Special Case for Last , C-Terminus | |
elif index == ( len( peptide_sequence ) - 1 ): | |
c_terminus_charge = calculate_local_charge( pH , AMINO_ACIDS[ last_amino_acid ][ "pka_1" ] , "acid" ) | |
if printing: | |
print( f"C-Terminus = {amino_acid} = {c_terminus_charge}" ) | |
charges.append( c_terminus_charge ) | |
# Other Wise , Add only Side Chains | |
if amino_acid in ACIDIC_SIDE_CHAINS: | |
side_chain_charge = calculate_local_charge( pH , AMINO_ACIDS[ amino_acid ][ "pka_3" ] , "acid" ) | |
if printing: | |
print( f"Acidic-Side-Chain = {amino_acid} = {side_chain_charge}" ) | |
charges.append( side_chain_charge ) | |
elif amino_acid in BASIC_SIDE_CHAINS: | |
side_chain_charge = calculate_local_charge( pH , AMINO_ACIDS[ amino_acid ][ "pka_3" ] , "base" ) | |
if printing: | |
print( f"Basic-Side-Chain = {amino_acid} = {side_chain_charge}" ) | |
charges.append( side_chain_charge ) | |
net_charge = sum( charges ) | |
if printing: | |
print( f"Net Charge = {net_charge}" ) | |
return net_charge | |
# This assumes the peptide sequences is written as N-Terminus on the left , and C-Terminus on the right | |
# Based on : | |
# https://www2.tulane.edu/~biochem/WW/pepdrawhelp.html#charge | |
# https://www2.tulane.edu/~biochem/WW/PepDraw/propertiescalculator.js#227 | |
def calculate_isoelectric_point( peptide_sequence ): | |
for pH in np.arange( 0 , 14 , 0.01 ): | |
# print( pH ) | |
net_charge = calculate_net_charge_at_pH( peptide_sequence , pH ) | |
# net_charge_rounded = Decimal( net_charge ).quantize( Decimal( "0.001" ) ) | |
if math.isclose( 0.0 , net_charge , abs_tol=0.008 ): | |
return pH | |
# if net_charge_rounded == 0.0: | |
# print( pH ) | |
return False | |
def calculate_pI( peptide_sequence ): | |
iso_electric_point = calculate_isoelectric_point( peptide_sequence ) | |
print( f"Peptide is Neutral @ pH = {iso_electric_point}" ) | |
first_pka = AMINO_ACIDS[ peptide_sequence[ 0 ] ][ "pka_2" ] | |
middle_pkas = [] | |
for index , amino_acid in enumerate( peptide_sequence ): | |
if "pka_3" in AMINO_ACIDS[ amino_acid ]: | |
middle_pkas.append( AMINO_ACIDS[ amino_acid ][ "pka_3" ] ) | |
last_pka = AMINO_ACIDS[ peptide_sequence[ -1 ] ][ "pka_2" ] | |
pkas = [ first_pka , *middle_pkas , last_pka ] | |
pkas.sort() | |
lower_split = [ x for x in pkas if x < iso_electric_point ] | |
upper_split = [ x for x in pkas if x > iso_electric_point ] | |
lower_bound_pka = lower_split[ -1 ] | |
upper_bound_pka = upper_split[ 0 ] | |
pI = ( ( lower_bound_pka + upper_bound_pka ) / 2 ) | |
print( "pKas =" , pkas ) | |
print( f"pI = ( ( {lower_bound_pka} + {upper_bound_pka} ) / 2 ) = {pI}" ) | |
if __name__ == "__main__": | |
peptide_sequence = "KPEHD" | |
calculate_pI( peptide_sequence ) | |
calculate_net_charge_at_pH( peptide_sequence , 10.0 , True ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment