Skip to content

Instantly share code, notes, and snippets.

@dvanatta
Forked from rmcgibbo/alg.txt
Created November 29, 2012 01:20
Show Gist options
  • Save dvanatta/4166092 to your computer and use it in GitHub Desktop.
Save dvanatta/4166092 to your computer and use it in GitHub Desktop.
make an empty list called medoids
for cluster i:
let t be a trajectory containing all of the structures assigned to cluster i
let D = the full len(t) x len(t) pairwise RMSD distance matrix
set medoid[i] = argmin( rowsums (D) )
Actual Code:
#!/home/dvanatta/epd-7.3-1-rh5-x86_64/bin/python
# This file is part of MSMBuilder.
#
# Copyright 2011 Stanford University
#
# MSMBuilder is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import numpy as np
from msmbuilder import Trajectory, Project, Serializer
from msmbuilder.metrics import RMSD
from msmbuilder import arglib
import copy
def run(project, pdb, atom_indices, assignments):
Gens = Trajectory.LoadTrajectoryFile('Fake_Gens.lh5')
num_states = np.max(assignments['Data']) + 1
print "numstates=", num_states
for i in range(num_states):
# state_traj=Trajectory.LoadFromPDB('active.pdb')
state = np.where(assignments['Data']==i)
state = np.array(state).T
state_traj = Project.GetConformations(project,state)
#for j in range(len(state[0])):
#project = Project.LoadFromHDF(options.projectfn)
# print "state=", state
# traj_fn = "Trajectories/trj"+str(state[0][j])+".lh5"
# traj = Trajectory.LoadTrajectoryFile(traj_fn)
# if j==0:
# state_traj['XYZList']= traj['XYZList'][state[1][j]]
# if j>0:
# state_traj['XYZList']= np.vstack((state_traj['XYZList'],traj['XYZList'][state[1][j]]))
# print "len state_traj=", len(state_traj)
# you could replace this with your own metric if you like
metric = RMSD(atom_indices)
D=np.ones((len(state),1))
for j in range(len(state)):
ppdb = metric.prepare_trajectory(state_traj[j])
ptraj = metric.prepare_trajectory(state_traj)
distances = metric.one_to_all(ppdb, ptraj, 0)
D[j]=distances.sum()
np.savetxt("D", D)
centroid = state_traj['XYZList'][np.where(D==D.min())[0][0]]
Gens['XYZList'][i] = centroid
# return distances
Gens['XYZList'] = Gens['XYZList'][0:num_states]
Trajectory.SaveToLHDF(Gens, 'HGens.lh5')
if __name__ == '__main__':
parser = arglib.ArgumentParser("""Given an assignments file, calculates and writes what the generators would have been for that Data. Since I'm a numpy noob and can't append, requires dummy generator file named "Fake_Gens.lh5" which has at least as many clusters as input data. Also requires pdb and atomindices. output is called "HGens.lh5
""")
parser.add_argument('pdb', type=arglib.TrajectoryType)
# parser.add_argument('input', description='Path to a trajectory-like file')
parser.add_argument('assignments')
parser.add_argument('project')
parser.add_argument('atom_indices', description='Indices of atoms to compare',
type=arglib.LoadTxtType(dtype=int), default='AtomIndices.dat')
parser.add_argument('output', description='Flat text file for the output',
default='RMSD.dat')
args = parser.parse_args()
arglib.die_if_path_exists(args.output)
distances = run(args.project, args.pdb, args.atom_indices, args.assignments)
print 'Saving Output: %s' % args.output
# np.savetxt(args.output, distances)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment