dvanatta · November 29, 2012 01:20
diff --git a/alg.txt b/alg.txt
 make an empty list called medoids

 for cluster i:
  let t be a trajectory containing all of the structures assigned to cluster i
  let D = the full len(t) x len(t) pairwise RMSD distance matrix
  set medoid[i] = argmin( rowsums (D) )


 Actual Code:
 #!/home/dvanatta/epd-7.3-1-rh5-x86_64/bin/python
 # This file is part of MSMBuilder.
 #
 # Copyright 2011 Stanford University
 #
 # MSMBuilder is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 import numpy as np
 from msmbuilder import Trajectory, Project, Serializer
 from msmbuilder.metrics import RMSD
 from msmbuilder import arglib
 import copy
 def run(project, pdb, atom_indices, assignments):
    Gens = Trajectory.LoadTrajectoryFile('Fake_Gens.lh5')
    num_states = np.max(assignments['Data']) + 1 
    print "numstates=", num_states
    for i in range(num_states):
        
       # state_traj=Trajectory.LoadFromPDB('active.pdb')
         
        state = np.where(assignments['Data']==i)
        state = np.array(state).T
        state_traj = Project.GetConformations(project,state)
        #for j in range(len(state[0])):
        #project = Project.LoadFromHDF(options.projectfn)
         #   print "state=", state
          #  traj_fn = "Trajectories/trj"+str(state[0][j])+".lh5"
          #  traj = Trajectory.LoadTrajectoryFile(traj_fn)
          #  if j==0: 
          #      state_traj['XYZList']= traj['XYZList'][state[1][j]]
          #  if j>0:
          #      state_traj['XYZList']= np.vstack((state_traj['XYZList'],traj['XYZList'][state[1][j]]))
          #  print "len state_traj=", len(state_traj)   
    # you could replace this with your own metric if you like
        metric = RMSD(atom_indices)
        D=np.ones((len(state),1))
        for j in range(len(state)):

            ppdb = metric.prepare_trajectory(state_traj[j])
            ptraj = metric.prepare_trajectory(state_traj)
            distances = metric.one_to_all(ppdb, ptraj, 0)
            D[j]=distances.sum()
        np.savetxt("D", D)
        centroid = state_traj['XYZList'][np.where(D==D.min())[0][0]]
        Gens['XYZList'][i] = centroid
    #    return distances
    Gens['XYZList'] = Gens['XYZList'][0:num_states] 
    Trajectory.SaveToLHDF(Gens, 'HGens.lh5')
 if __name__ == '__main__':
    parser = arglib.ArgumentParser("""Given an assignments file, calculates and writes what the generators would have been for that Data.  Since I'm a numpy noob and can't append, requires dummy generator file named "Fake_Gens.lh5" which has at least as many clusters as input data.  Also requires pdb and atomindices.  output is called "HGens.lh5
 """)
    parser.add_argument('pdb', type=arglib.TrajectoryType)
 #    parser.add_argument('input', description='Path to a trajectory-like file')
    parser.add_argument('assignments')
    parser.add_argument('project')
    parser.add_argument('atom_indices', description='Indices of atoms to compare',
        type=arglib.LoadTxtType(dtype=int), default='AtomIndices.dat')
    parser.add_argument('output', description='Flat text file for the output',
        default='RMSD.dat')
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)
    
    distances = run(args.project, args.pdb, args.atom_indices, args.assignments)
    print 'Saving Output: %s' % args.output
 #    np.savetxt(args.output, distances)
	make an empty list called medoids

	for cluster i:
	let t be a trajectory containing all of the structures assigned to cluster i
	let D = the full len(t) x len(t) pairwise RMSD distance matrix
	set medoid[i] = argmin( rowsums (D) )


	Actual Code:
	#!/home/dvanatta/epd-7.3-1-rh5-x86_64/bin/python
	# This file is part of MSMBuilder.
	#
	# Copyright 2011 Stanford University
	#
	# MSMBuilder is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 2 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

	import numpy as np
	from msmbuilder import Trajectory, Project, Serializer
	from msmbuilder.metrics import RMSD
	from msmbuilder import arglib
	import copy
	def run(project, pdb, atom_indices, assignments):
	Gens = Trajectory.LoadTrajectoryFile('Fake_Gens.lh5')
	num_states = np.max(assignments['Data']) + 1
	print "numstates=", num_states
	for i in range(num_states):

	# state_traj=Trajectory.LoadFromPDB('active.pdb')

	state = np.where(assignments['Data']==i)
	state = np.array(state).T
	state_traj = Project.GetConformations(project,state)
	#for j in range(len(state[0])):
	#project = Project.LoadFromHDF(options.projectfn)
	# print "state=", state
	# traj_fn = "Trajectories/trj"+str(state[0][j])+".lh5"
	# traj = Trajectory.LoadTrajectoryFile(traj_fn)
	# if j==0:
	# state_traj['XYZList']= traj['XYZList'][state[1][j]]
	# if j>0:
	# state_traj['XYZList']= np.vstack((state_traj['XYZList'],traj['XYZList'][state[1][j]]))
	# print "len state_traj=", len(state_traj)
	# you could replace this with your own metric if you like
	metric = RMSD(atom_indices)
	D=np.ones((len(state),1))
	for j in range(len(state)):

	ppdb = metric.prepare_trajectory(state_traj[j])
	ptraj = metric.prepare_trajectory(state_traj)
	distances = metric.one_to_all(ppdb, ptraj, 0)
	D[j]=distances.sum()
	np.savetxt("D", D)
	centroid = state_traj['XYZList'][np.where(D==D.min())[0][0]]
	Gens['XYZList'][i] = centroid
	# return distances
	Gens['XYZList'] = Gens['XYZList'][0:num_states]
	Trajectory.SaveToLHDF(Gens, 'HGens.lh5')
	if __name__ == '__main__':
	parser = arglib.ArgumentParser("""Given an assignments file, calculates and writes what the generators would have been for that Data. Since I'm a numpy noob and can't append, requires dummy generator file named "Fake_Gens.lh5" which has at least as many clusters as input data. Also requires pdb and atomindices. output is called "HGens.lh5
	""")
	parser.add_argument('pdb', type=arglib.TrajectoryType)
	# parser.add_argument('input', description='Path to a trajectory-like file')
	parser.add_argument('assignments')
	parser.add_argument('project')
	parser.add_argument('atom_indices', description='Indices of atoms to compare',
	type=arglib.LoadTxtType(dtype=int), default='AtomIndices.dat')
	parser.add_argument('output', description='Flat text file for the output',
	default='RMSD.dat')
	args = parser.parse_args()
	arglib.die_if_path_exists(args.output)

	distances = run(args.project, args.pdb, args.atom_indices, args.assignments)
	print 'Saving Output: %s' % args.output
	# np.savetxt(args.output, distances)