Created
February 25, 2020 16:11
-
-
Save jagmit/56f9440edbd9f8b776e797ca626bd572 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pip install biopython | |
#1) Reading an PDB file: | |
#create a PDBParser object: | |
import os | |
import math | |
import glob | |
from Bio.PDB import * | |
from Bio.PDB.PDBParser import PDBParser | |
from Bio.PDB import parse_pdb_header | |
parser = PDBParser(PERMISSIVE=1) | |
#Task 1: list the chains within a .pdb file: | |
#Task 2: how many chains are within a .pdb file: | |
def chain_name_number(file): | |
with open(file, "r") as f: | |
chains = set() | |
res_chain = [] | |
for line in f: | |
splits = line.split() | |
if splits[0] == "ATOM" and splits[2] == "CA": | |
chains.add(splits[4][0]) | |
res_chain.append(len(chains)) | |
for i in chains: | |
res_chain.append(i) | |
return res_chain | |
#print(length_res) | |
#chain_name_number("5iz8.pdb") | |
#chains = chain_name_number("1drj.pdb") | |
#OUTPUT: ['A', 'B', 'C', 'D'] | |
#4 | |
#task3: for each chain: create residue number list and coordinate list and put it in a dictionary | |
import os | |
def lists_cords_red(file): | |
with open(file, "r") as f: | |
chains = {} | |
for line in f: | |
splits = line.split() | |
if splits[0] == "ATOM" and splits[2] == "CA": | |
chain = splits[4][0] | |
if chain not in chains.keys(): | |
chains[chain] = {"residues" : [], "coords" : []} | |
residue = line[22:26] | |
if residue not in chains[chain]["residues"]: | |
chains[chain]["residues"].append(residue) | |
coord = [line[32:38], line[39:46], line[47:54]] | |
chains[chain]["coords"].append(coord) | |
return chains | |
#OUTPUT: | |
#{'A': {'residues': ['1', '2', '3', '4', '5', '6', '7'], 'coords': [['16.615', '74.620', '16.918'], ['20.299', '75.628', '17.465'], ['21.468', '79.236', '18.008'], ['21.167', '82.153', '15.594'], ['21.847', '85.867', '15.022'], ['18.998', '88.348', '15.314'], ['16.690', '88.744', '12.273']]}} | |
#task4: calculate distances | |
res_chain = chain_name_number("TNFRSF21.pdb") | |
chains = lists_cords_red("TNFRSF21.pdb") | |
def calc_dist(file,chains,res_chain): | |
res_chain = chain_name_number(file) #store:4,A,B,C,D | |
if res_chain[0] == 1: #wenn 4 != 1 | |
chain = res_chain[1] | |
dist_dict = {} | |
residue_list = chains[chain]['residues'] | |
coord_list = chains[chain]['coords'] | |
for i in range(len(residue_list)): | |
for j in range(i+1,len(residue_list)): | |
if i!= j: | |
value1 = float(coord_list[i][0]) - float(coord_list[j][0]) | |
value2 = float(coord_list[i][1]) - float(coord_list[j][1]) | |
value3 = float(coord_list[i][2]) - float(coord_list[j][2]) | |
distance = math.sqrt(((x)**2) +((value2)**2) + ((value3)**2)) | |
mykey = str(residue_list[i]) + '_' + str(residue_list[j]) | |
dist_dict[mykey] = distance | |
elif res_chain[0] >= 2: | |
chain = res_chain[1:] | |
dist_dict = {} | |
for i in range(len(chain)-1): | |
dicoInter = chains[chain[i]] | |
ListResidue = dicoInter["residues"] | |
ListCoords = dicoInter["coords"] | |
for j in range(i+1,len(chain)): | |
dicoInter2 = chains[chain[j]] #ab der 2.chain alle res+coord | |
ListResidue2 = dicoInter2["residues"] | |
ListCoords2 = dicoInter2["coords"] | |
for k in range(len(ListResidue)): | |
for l in range(len(ListResidue2)): | |
value1 = float(ListCoords[k][0]) - float(ListCoords2[l][0]) | |
value2 = float(ListCoords[k][1]) - float(ListCoords2[l][1]) | |
value3 = float(ListCoords[k][2]) - float(ListCoords2[l][2]) | |
distance = math.sqrt(((value1)**2) +((value2)**2) + ((value3)**2)) | |
mykey = str(chain[i]) + str(ListResidue[k]) + '_' + str(chain[j]) + str(ListResidue2[l]) | |
dist_dict[mykey] = distance | |
return dist_dict | |
dist_dict = calc_dist("TNFRSF21.pdb",chains, res_chain) | |
def distance(chain_a, x, chain_b, y): | |
value1 = float(chain_a["coords"][x][0]) - float(chain_b["coords"][y][0]) | |
value2 = float(chain_a["coords"][x][1]) - float(chain_b["coords"][y][1]) | |
value3 = float(chain_a["coords"][x][2]) - float(chain_b["coords"][y][2]) | |
return math.sqrt(((value1)**2) +((value2)**2) + ((value3)**2)) | |
def calc_dist2(file): | |
header = chain_name_number(file) | |
chains = lists_cords_red(file) | |
dist_dict = {} | |
# wenn wir nur eine chain haben | |
if header[0] == 1 : | |
chain_name = list(chains.keys())[0] | |
chain = chains[chain_name] # get the single chain | |
for i in range(len(chain["residues"])): | |
for j in range(len(chain["residues"])): | |
if i > j: | |
key = chain_name + str(chain["residues"][i]) + '_' + chain_name + str(chain["residues"][j]) | |
dist_dict[key] = distance(chain, i, chain, j) | |
# wenn wir 2chainz haben (hihi) | |
elif header[0] == 2 : | |
chain_a_name = list(chains.keys())[0] | |
chain_a = chains[chain_a_name] | |
chain_b_name = list(chains.keys())[1] | |
chain_b = chains[chain_b_name] | |
for i in range(len(chain_a["residues"])): | |
for j in range(len(chain_b["residues"])): | |
key = chain_a_name + str(chain_a["residues"][i]) + '_' + chain_b_name + str(chain_b["residues"][j]) | |
dist_dict[key] = distance(chain_a, i, chain_b, j) | |
else : | |
print("more than 2 chains") | |
return dist_dict; | |
#task 5: take distances less than 5 Armstrong | |
def small_dist(dist_dict): | |
Lastdict = {} | |
for i in dist_dict.values(): | |
if float(i) < 5.0: | |
Lastdict[i] = float(i) | |
return Lastdict | |
def small_dist2(file): | |
dist_dict = calc_dist2(file) | |
result_dict = {} | |
for k in dist_dict.keys(): | |
if float(dist_dict[k]) < 5.0: | |
result_dict[k] = dist_dict[k] | |
return result_dict | |
print(small_dist2("TNFRSF21.pdb")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment