Last active
November 8, 2019 14:59
-
-
Save larsbratholm/b825126c632fc73fee0360f7c2cbb2b6 to your computer and use it in GitHub Desktop.
nmr interpolation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sklearn.neighbors | |
import sklearn.ensemble | |
import itertools | |
def create_lookup_table(angles, cs, lookup_table_filename, granularity_degree=20): | |
""" | |
Creates lookup tables for procs15 | |
""" | |
# convert everything to numpy arrays | |
angles = np.asarray(angles) | |
cs = np.asarray(cs) | |
# Create the empty lookup table | |
lookup_table = np.zeros((3,) + (360//granularity_degree+1,) * angles.shape[1]) | |
# Create the data to do predictions on (just all the angles that go into the array) | |
# I haven't tested this fully, but seem to work | |
test_data = np.zeros(((360//granularity_degree+1) ** angles.shape[1], angles.shape[1])) | |
for i, idx in enumerate(itertools.product(list(range(360//granularity_degree+1)), repeat=angles.shape[1])): | |
idx = np.asarray(idx) * granularity_degree - 180 | |
test_data[i] = idx | |
# Add cos/sin to features | |
angles_trig = np.zeros((angles.shape[0],angles.shape[1]*3)) | |
test_data_trig = np.zeros((test_data.shape[0],test_data.shape[1]*3)) | |
for i in range(angles.shape[1]): | |
angles_trig[:,i*3] = np.cos(angles[:,i]) | |
angles_trig[:,i*3+1] = np.sin(angles[:,i]) | |
angles_trig[:,i*3+2] = angles[:,i] | |
test_data_trig[:,i*3] = np.cos(test_data[:,i]) | |
test_data_trig[:,i*3+1] = np.sin(test_data[:,i]) | |
test_data_trig[:,i*3+2] = test_data[:,i] | |
# Make predictions for all the atoms (previous, central, next) | |
for ang in range(3): | |
y = cs[:,ang] | |
# KNeighboursRegressor if ExtraTrees takes too much memory. | |
# Make sure the cross validation is automated | |
#estimator = sklearn.neighbors.KNeighborsRegressor() | |
#param_grid = {'n_neighbors': [1,2,3,4,5,6,7,8,9,10], 'weights': ['uniform', 'distance'], 'p': [1,2]} | |
#mod = sklearn.model_selection.GridSearchCV(estimator, param_grid=param_grid, scoring='neg_mean_squared_error', | |
# cv=sklearn.model_selection.KFold(n_splits=10, shuffle=True)) | |
# ExtraTrees Regressor. Increase min_samples_leaf or decrease n_estimators if having memory issues. | |
# Or use the KNeighboursRegressor instead. | |
estimator = sklearn.ensemble.ExtraTreesRegressor(n_estimators=50, min_samples_leaf=1) | |
param_grid = {} | |
mod = sklearn.model_selection.GridSearchCV(estimator, param_grid=param_grid, scoring='neg_mean_squared_error', | |
cv=sklearn.model_selection.KFold(n_splits=3, shuffle=True)) | |
# Fit the model | |
mod.fit(angles_trig, y) | |
#print("Best params", mod.best_params_) | |
print("Best score for angle %d (should ideally be smaller than 0.4 for N, 0.1 for C and 0.02 for H)" % ang, mod.best_score_) | |
# Make predictions on the test data | |
predictions = mod.predict(test_data_trig) | |
# store predictions in lookup table | |
lookup_table[ang] = predictions.reshape(lookup_table[ang].shape) | |
np.save(lookup_table_filename, lookup_table) | |
if __name__ == "__main__": | |
# Read the npy file as an alternative to the actual log files | |
# This has shape (3,361,361). The first axis is the three residues in the | |
# tripeptide. x[0] is the first residue, x[1] is the second residue, x[2] is the third residue. | |
# The second and third axis are the backbone angles. Angles of (90,90) correspond to index (270,270), | |
# so the counting starts at -180. | |
# x[:,360,360] is equal to x[:,0,0] to make the linear interpolation in procs easier to do. | |
x = np.load("ALA_cb.npy") | |
angles = [] | |
cs = [] | |
# Get angles and chemical shift array from the matrix | |
for ang1 in range(361): | |
for ang2 in range(361): | |
angles.append([ang1-180,ang2-180]) | |
cs.append(x[:,ang1, ang2]) | |
cs = np.asarray(cs) | |
""" | |
angles and cs should be exactly what you get with your script (in this case only cb cs), | |
so start from here when you do the interpolation yourself. | |
For Alanine, angles and cs need to have the following form: | |
angles = [[-180,-180], [-180, -160], ...] | |
cs = [[172.3, 175.8, 172.0], [173.7, 178.2, 172.5], ...] | |
""" | |
create_lookup_table(angles=angles, cs=cs, lookup_table_filename='ALA_cb_new.npy', granularity_degree=1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment