tjlane · July 8, 2013 23:51 · tjlane · Jul 8, 2013
diff --git a/doniach_hist.py b/doniach_hist.py

 import h5py
 import numpy as np
 from scipy import fftpack
 import matplotlib.pyplot as plt

 from odin import xray


 def PCA(A):
    """
    Performs principal components analysis (PCA) on the n-by-p data matrix A.

    Parameters
    ----------
    A : np.ndarray, float
        Rows of A correspond to observations, columns to variables.

    Returns
    -------
    pcs : np.ndarray
        is a p-by-p matrix, each column containing coefficients 
        for one principal component.
        
    latent : np.ndarray
        a vector containing the eigenvalues  of the covariance matrix of A.

    Sources
    -------
    ..[1] http://glowingpython.blogspot.sg/2011/07/principal-component-analysis
          -with-numpy.html
    """
    
    # computing eigenvalues and eigenvectors of covariance matrix
    M = ( A - np.mean(A.T, axis=1) ).T # subtract the mean (along columns)
    eigenvalues, pcs = np.linalg.eig( np.cov(M) )
    
    # sorting eigenvectors according to the sorted eigenvalues
    idx = np.argsort(eigenvalues)
    idx = idx[::-1] # in ascending order
    pcs = pcs[:,idx]
    eigenvalues = eigenvalues[idx]

    return pcs, eigenvalues
    
    
 def fft_pca_projections(intra_correlators, inter_correlators):
    """
    Compute the projections of the fourier transformed intra- and inter-
    correlators on the FFT/PCA basis.
    
    Specifically, this performs the following:
    
        1) Take the FFT of both the intra & inter correlators
        2) Take the mod sqd on the FFT
        3) PCA on the entire FFT'd intra/inter dataset
        4) Project the FFT'd intra/inter correlators on the first PC
    
    Parameters
    ----------
    intra_correlators, inter_correlators : np.ndarray
        A data matrix of correlation functions. These will be N x M 2d arrays,
        with N shots and M delta values. The intra/inter arrays can be different
        sizes.
        
    Returns
    -------
    intra_projections, inter_projections : np.ndarray
        Two arrays of the projections of the FFT'd correlators onto the first PC.
    """
    
    # fft each correlation
    intra_ffts = fftpack.fft( intra_correlators, axis=1 )
    inter_ffts = fftpack.fft( inter_correlators, axis=1 )

    # take the mod sqd and convert the data type to real
    intra_ffts = np.real( intra_ffts * np.conj( intra_ffts ) )
    inter_ffts = np.real( inter_ffts * np.conj( inter_ffts ) )
    
    # combine inter & intra correlators and perform PCA
    comb_pcs, comb_eigs = PCA( np.vstack(( inter_ffts, 
                                           intra_ffts )) )
                                             
    # project each correlator on the first PC
    intra_projections = np.real( np.dot( comb_pcs[:,0], intra_ffts.T ).flatten() )
    inter_projections = np.real( np.dot( comb_pcs[:,0], inter_ffts.T ).flatten() )
    
    # *** SEB *** I have been taking the abs value here -- could be a point of difference
    intra_projections = np.abs(intra_projections)
    inter_projections = np.abs(inter_projections)
    
    return intra_projections, inter_projections
    
    
 def plot_projection_histogram(intra_projections, inter_projections,
                              title='projection'):
    
    plt.figure()

    bins = 100

    plt.hist( intra_projections, bins, histtype='step', lw=2, log=False, color='b' )
    plt.hist( inter_projections, bins, histtype='step', lw=2, log=False, color='g' )

    plt.xlabel('Projection Magnitude')
    plt.ylabel('Frequency')
    plt.title(title)
    plt.legend(['intra', 'inter'])

    #plt.show()
    fn = title + '.png'
    plt.savefig(fn)
    print "  Saved: %s" % fn
    
    return
    
    
 def save_histogram_as_txt(intra_projections, inter_projections,
                          filename='projections.txt'):
    """
    Save the projections to a file. Will always save the same number of
    intra and inters. In the resulting file, the intra correlators are the
    left column, and inters are on the right.    
    """
    
    trunc = np.min([ len(intra_projections), len(inter_projections) ])
    data = np.array([intra_projections[:trunc], inter_projections[:trunc]]).T
    np.savetxt(filename, data, fmt='%.5e')
    print "  Saved: %s" % filename
    
    return


 def main():
    """
    Loads up different data sets and performs Seb's FFT/PCA analysis.
    """
    
    # load up all the data
    datasets = {} # keep the individual data sets in a dict
    
    # LCLS gold
    f = h5py.File('r0135_correlators.h5')
    intra = np.array( f['intra-2.67-2.67'] )
    inter = np.array( f['inter-2.67-2.67'] )
    f.close()
    
    datasets['lcls-Au'] = (intra, inter)
    
    # SSRL silver
    f = h5py.File('silver_all_correlators_4seb.h5')
    intra = np.array( f['intra-2.66-2.66-188mm'] )
    inter = np.array( f['inter-2.66-2.66-188mm'] )
    f.close()
    
    # throw away the 6 outliers that dominate the signal
    inds = np.loadtxt('corr_3std_from_mean-2.66auto.dat')
    intra = intra[ np.logical_not(inds.astype(np.bool)) ]
    
    datasets['ssrl-Ag'] = (intra, inter)
    
    # Gold simulations
    r = xray.load('combined_shot_1pht.ring')
    intra = r.correlate_intra(2.66, 2.66)
    inter = r.correlate_inter(2.66, 2.66, num_pairs=r.num_shots)
    
    datasets['sim-Au'] = (intra, inter)
    
    # iterate over datasets and plot histograms for each
    for k in datasets.keys():
        print "Analyzing dataset:", k
        intra_projections, inter_projections = fft_pca_projections( *datasets[k] )
        plot_projection_histogram(intra_projections, inter_projections, title=k)
        save_histogram_as_txt(intra_projections, inter_projections, 
                              filename=str(k+'_hist.txt'))

    return
    
    
 if __name__ == '__main__':
    main()

	import h5py
	import numpy as np
	from scipy import fftpack
	import matplotlib.pyplot as plt

	from odin import xray


	def PCA(A):
	"""
	Performs principal components analysis (PCA) on the n-by-p data matrix A.

	Parameters
	----------
	A : np.ndarray, float
	Rows of A correspond to observations, columns to variables.

	Returns
	-------
	pcs : np.ndarray
	is a p-by-p matrix, each column containing coefficients
	for one principal component.

	latent : np.ndarray
	a vector containing the eigenvalues of the covariance matrix of A.

	Sources
	-------
	..[1] http://glowingpython.blogspot.sg/2011/07/principal-component-analysis
	-with-numpy.html
	"""

	# computing eigenvalues and eigenvectors of covariance matrix
	M = ( A - np.mean(A.T, axis=1) ).T # subtract the mean (along columns)
	eigenvalues, pcs = np.linalg.eig( np.cov(M) )

	# sorting eigenvectors according to the sorted eigenvalues
	idx = np.argsort(eigenvalues)
	idx = idx[::-1] # in ascending order
	pcs = pcs[:,idx]
	eigenvalues = eigenvalues[idx]

	return pcs, eigenvalues


	def fft_pca_projections(intra_correlators, inter_correlators):
	"""
	Compute the projections of the fourier transformed intra- and inter-
	correlators on the FFT/PCA basis.

	Specifically, this performs the following:

	1) Take the FFT of both the intra & inter correlators
	2) Take the mod sqd on the FFT
	3) PCA on the entire FFT'd intra/inter dataset
	4) Project the FFT'd intra/inter correlators on the first PC

	Parameters
	----------
	intra_correlators, inter_correlators : np.ndarray
	A data matrix of correlation functions. These will be N x M 2d arrays,
	with N shots and M delta values. The intra/inter arrays can be different
	sizes.

	Returns
	-------
	intra_projections, inter_projections : np.ndarray
	Two arrays of the projections of the FFT'd correlators onto the first PC.
	"""

	# fft each correlation
	intra_ffts = fftpack.fft( intra_correlators, axis=1 )
	inter_ffts = fftpack.fft( inter_correlators, axis=1 )

	# take the mod sqd and convert the data type to real
	intra_ffts = np.real( intra_ffts * np.conj( intra_ffts ) )
	inter_ffts = np.real( inter_ffts * np.conj( inter_ffts ) )

	# combine inter & intra correlators and perform PCA
	comb_pcs, comb_eigs = PCA( np.vstack(( inter_ffts,
	intra_ffts )) )

	# project each correlator on the first PC
	intra_projections = np.real( np.dot( comb_pcs[:,0], intra_ffts.T ).flatten() )
	inter_projections = np.real( np.dot( comb_pcs[:,0], inter_ffts.T ).flatten() )

	# * SEB * I have been taking the abs value here -- could be a point of difference
	intra_projections = np.abs(intra_projections)
	inter_projections = np.abs(inter_projections)

	return intra_projections, inter_projections


	def plot_projection_histogram(intra_projections, inter_projections,
	title='projection'):

	plt.figure()

	bins = 100

	plt.hist( intra_projections, bins, histtype='step', lw=2, log=False, color='b' )
	plt.hist( inter_projections, bins, histtype='step', lw=2, log=False, color='g' )

	plt.xlabel('Projection Magnitude')
	plt.ylabel('Frequency')
	plt.title(title)
	plt.legend(['intra', 'inter'])

	#plt.show()
	fn = title + '.png'
	plt.savefig(fn)
	print " Saved: %s" % fn

	return


	def save_histogram_as_txt(intra_projections, inter_projections,
	filename='projections.txt'):
	"""
	Save the projections to a file. Will always save the same number of
	intra and inters. In the resulting file, the intra correlators are the
	left column, and inters are on the right.
	"""

	trunc = np.min([ len(intra_projections), len(inter_projections) ])
	data = np.array([intra_projections[:trunc], inter_projections[:trunc]]).T
	np.savetxt(filename, data, fmt='%.5e')
	print " Saved: %s" % filename

	return


	def main():
	"""
	Loads up different data sets and performs Seb's FFT/PCA analysis.
	"""

	# load up all the data
	datasets = {} # keep the individual data sets in a dict

	# LCLS gold
	f = h5py.File('r0135_correlators.h5')
	intra = np.array( f['intra-2.67-2.67'] )
	inter = np.array( f['inter-2.67-2.67'] )
	f.close()

	datasets['lcls-Au'] = (intra, inter)

	# SSRL silver
	f = h5py.File('silver_all_correlators_4seb.h5')
	intra = np.array( f['intra-2.66-2.66-188mm'] )
	inter = np.array( f['inter-2.66-2.66-188mm'] )
	f.close()

	# throw away the 6 outliers that dominate the signal
	inds = np.loadtxt('corr_3std_from_mean-2.66auto.dat')
	intra = intra[ np.logical_not(inds.astype(np.bool)) ]

	datasets['ssrl-Ag'] = (intra, inter)

	# Gold simulations
	r = xray.load('combined_shot_1pht.ring')
	intra = r.correlate_intra(2.66, 2.66)
	inter = r.correlate_inter(2.66, 2.66, num_pairs=r.num_shots)

	datasets['sim-Au'] = (intra, inter)

	# iterate over datasets and plot histograms for each
	for k in datasets.keys():
	print "Analyzing dataset:", k
	intra_projections, inter_projections = fft_pca_projections( *datasets[k] )
	plot_projection_histogram(intra_projections, inter_projections, title=k)
	save_histogram_as_txt(intra_projections, inter_projections,
	filename=str(k+'_hist.txt'))

	return


	if __name__ == '__main__':
	main()
No results found