Skip to content

Instantly share code, notes, and snippets.

@jagmit
Created February 25, 2020 16:11
Show Gist options
  • Save jagmit/56f9440edbd9f8b776e797ca626bd572 to your computer and use it in GitHub Desktop.
Save jagmit/56f9440edbd9f8b776e797ca626bd572 to your computer and use it in GitHub Desktop.
#pip install biopython
#1) Reading an PDB file:
#create a PDBParser object:
import os
import math
import glob
from Bio.PDB import *
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB import parse_pdb_header
parser = PDBParser(PERMISSIVE=1)
#Task 1: list the chains within a .pdb file:
#Task 2: how many chains are within a .pdb file:
def chain_name_number(file):
with open(file, "r") as f:
chains = set()
res_chain = []
for line in f:
splits = line.split()
if splits[0] == "ATOM" and splits[2] == "CA":
chains.add(splits[4][0])
res_chain.append(len(chains))
for i in chains:
res_chain.append(i)
return res_chain
#print(length_res)
#chain_name_number("5iz8.pdb")
#chains = chain_name_number("1drj.pdb")
#OUTPUT: ['A', 'B', 'C', 'D']
#4
#task3: for each chain: create residue number list and coordinate list and put it in a dictionary
import os
def lists_cords_red(file):
with open(file, "r") as f:
chains = {}
for line in f:
splits = line.split()
if splits[0] == "ATOM" and splits[2] == "CA":
chain = splits[4][0]
if chain not in chains.keys():
chains[chain] = {"residues" : [], "coords" : []}
residue = line[22:26]
if residue not in chains[chain]["residues"]:
chains[chain]["residues"].append(residue)
coord = [line[32:38], line[39:46], line[47:54]]
chains[chain]["coords"].append(coord)
return chains
#OUTPUT:
#{'A': {'residues': ['1', '2', '3', '4', '5', '6', '7'], 'coords': [['16.615', '74.620', '16.918'], ['20.299', '75.628', '17.465'], ['21.468', '79.236', '18.008'], ['21.167', '82.153', '15.594'], ['21.847', '85.867', '15.022'], ['18.998', '88.348', '15.314'], ['16.690', '88.744', '12.273']]}}
#task4: calculate distances
res_chain = chain_name_number("TNFRSF21.pdb")
chains = lists_cords_red("TNFRSF21.pdb")
def calc_dist(file,chains,res_chain):
res_chain = chain_name_number(file) #store:4,A,B,C,D
if res_chain[0] == 1: #wenn 4 != 1
chain = res_chain[1]
dist_dict = {}
residue_list = chains[chain]['residues']
coord_list = chains[chain]['coords']
for i in range(len(residue_list)):
for j in range(i+1,len(residue_list)):
if i!= j:
value1 = float(coord_list[i][0]) - float(coord_list[j][0])
value2 = float(coord_list[i][1]) - float(coord_list[j][1])
value3 = float(coord_list[i][2]) - float(coord_list[j][2])
distance = math.sqrt(((x)**2) +((value2)**2) + ((value3)**2))
mykey = str(residue_list[i]) + '_' + str(residue_list[j])
dist_dict[mykey] = distance
elif res_chain[0] >= 2:
chain = res_chain[1:]
dist_dict = {}
for i in range(len(chain)-1):
dicoInter = chains[chain[i]]
ListResidue = dicoInter["residues"]
ListCoords = dicoInter["coords"]
for j in range(i+1,len(chain)):
dicoInter2 = chains[chain[j]] #ab der 2.chain alle res+coord
ListResidue2 = dicoInter2["residues"]
ListCoords2 = dicoInter2["coords"]
for k in range(len(ListResidue)):
for l in range(len(ListResidue2)):
value1 = float(ListCoords[k][0]) - float(ListCoords2[l][0])
value2 = float(ListCoords[k][1]) - float(ListCoords2[l][1])
value3 = float(ListCoords[k][2]) - float(ListCoords2[l][2])
distance = math.sqrt(((value1)**2) +((value2)**2) + ((value3)**2))
mykey = str(chain[i]) + str(ListResidue[k]) + '_' + str(chain[j]) + str(ListResidue2[l])
dist_dict[mykey] = distance
return dist_dict
dist_dict = calc_dist("TNFRSF21.pdb",chains, res_chain)
def distance(chain_a, x, chain_b, y):
value1 = float(chain_a["coords"][x][0]) - float(chain_b["coords"][y][0])
value2 = float(chain_a["coords"][x][1]) - float(chain_b["coords"][y][1])
value3 = float(chain_a["coords"][x][2]) - float(chain_b["coords"][y][2])
return math.sqrt(((value1)**2) +((value2)**2) + ((value3)**2))
def calc_dist2(file):
header = chain_name_number(file)
chains = lists_cords_red(file)
dist_dict = {}
# wenn wir nur eine chain haben
if header[0] == 1 :
chain_name = list(chains.keys())[0]
chain = chains[chain_name] # get the single chain
for i in range(len(chain["residues"])):
for j in range(len(chain["residues"])):
if i > j:
key = chain_name + str(chain["residues"][i]) + '_' + chain_name + str(chain["residues"][j])
dist_dict[key] = distance(chain, i, chain, j)
# wenn wir 2chainz haben (hihi)
elif header[0] == 2 :
chain_a_name = list(chains.keys())[0]
chain_a = chains[chain_a_name]
chain_b_name = list(chains.keys())[1]
chain_b = chains[chain_b_name]
for i in range(len(chain_a["residues"])):
for j in range(len(chain_b["residues"])):
key = chain_a_name + str(chain_a["residues"][i]) + '_' + chain_b_name + str(chain_b["residues"][j])
dist_dict[key] = distance(chain_a, i, chain_b, j)
else :
print("more than 2 chains")
return dist_dict;
#task 5: take distances less than 5 Armstrong
def small_dist(dist_dict):
Lastdict = {}
for i in dist_dict.values():
if float(i) < 5.0:
Lastdict[i] = float(i)
return Lastdict
def small_dist2(file):
dist_dict = calc_dist2(file)
result_dict = {}
for k in dist_dict.keys():
if float(dist_dict[k]) < 5.0:
result_dict[k] = dist_dict[k]
return result_dict
print(small_dist2("TNFRSF21.pdb"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment