Skip to content

Instantly share code, notes, and snippets.

@tjlane
Created July 8, 2013 23:51
Show Gist options
  • Select an option

  • Save tjlane/5953487 to your computer and use it in GitHub Desktop.

Select an option

Save tjlane/5953487 to your computer and use it in GitHub Desktop.
Perform the FFT-PCA-Projection histogram method on SSRL/LCLS/Simulations.
import h5py
import numpy as np
from scipy import fftpack
import matplotlib.pyplot as plt
from odin import xray
def PCA(A):
"""
Performs principal components analysis (PCA) on the n-by-p data matrix A.
Parameters
----------
A : np.ndarray, float
Rows of A correspond to observations, columns to variables.
Returns
-------
pcs : np.ndarray
is a p-by-p matrix, each column containing coefficients
for one principal component.
latent : np.ndarray
a vector containing the eigenvalues of the covariance matrix of A.
Sources
-------
..[1] http://glowingpython.blogspot.sg/2011/07/principal-component-analysis
-with-numpy.html
"""
# computing eigenvalues and eigenvectors of covariance matrix
M = ( A - np.mean(A.T, axis=1) ).T # subtract the mean (along columns)
eigenvalues, pcs = np.linalg.eig( np.cov(M) )
# sorting eigenvectors according to the sorted eigenvalues
idx = np.argsort(eigenvalues)
idx = idx[::-1] # in ascending order
pcs = pcs[:,idx]
eigenvalues = eigenvalues[idx]
return pcs, eigenvalues
def fft_pca_projections(intra_correlators, inter_correlators):
"""
Compute the projections of the fourier transformed intra- and inter-
correlators on the FFT/PCA basis.
Specifically, this performs the following:
1) Take the FFT of both the intra & inter correlators
2) Take the mod sqd on the FFT
3) PCA on the entire FFT'd intra/inter dataset
4) Project the FFT'd intra/inter correlators on the first PC
Parameters
----------
intra_correlators, inter_correlators : np.ndarray
A data matrix of correlation functions. These will be N x M 2d arrays,
with N shots and M delta values. The intra/inter arrays can be different
sizes.
Returns
-------
intra_projections, inter_projections : np.ndarray
Two arrays of the projections of the FFT'd correlators onto the first PC.
"""
# fft each correlation
intra_ffts = fftpack.fft( intra_correlators, axis=1 )
inter_ffts = fftpack.fft( inter_correlators, axis=1 )
# take the mod sqd and convert the data type to real
intra_ffts = np.real( intra_ffts * np.conj( intra_ffts ) )
inter_ffts = np.real( inter_ffts * np.conj( inter_ffts ) )
# combine inter & intra correlators and perform PCA
comb_pcs, comb_eigs = PCA( np.vstack(( inter_ffts,
intra_ffts )) )
# project each correlator on the first PC
intra_projections = np.real( np.dot( comb_pcs[:,0], intra_ffts.T ).flatten() )
inter_projections = np.real( np.dot( comb_pcs[:,0], inter_ffts.T ).flatten() )
# *** SEB *** I have been taking the abs value here -- could be a point of difference
intra_projections = np.abs(intra_projections)
inter_projections = np.abs(inter_projections)
return intra_projections, inter_projections
def plot_projection_histogram(intra_projections, inter_projections,
title='projection'):
plt.figure()
bins = 100
plt.hist( intra_projections, bins, histtype='step', lw=2, log=False, color='b' )
plt.hist( inter_projections, bins, histtype='step', lw=2, log=False, color='g' )
plt.xlabel('Projection Magnitude')
plt.ylabel('Frequency')
plt.title(title)
plt.legend(['intra', 'inter'])
#plt.show()
fn = title + '.png'
plt.savefig(fn)
print " Saved: %s" % fn
return
def save_histogram_as_txt(intra_projections, inter_projections,
filename='projections.txt'):
"""
Save the projections to a file. Will always save the same number of
intra and inters. In the resulting file, the intra correlators are the
left column, and inters are on the right.
"""
trunc = np.min([ len(intra_projections), len(inter_projections) ])
data = np.array([intra_projections[:trunc], inter_projections[:trunc]]).T
np.savetxt(filename, data, fmt='%.5e')
print " Saved: %s" % filename
return
def main():
"""
Loads up different data sets and performs Seb's FFT/PCA analysis.
"""
# load up all the data
datasets = {} # keep the individual data sets in a dict
# LCLS gold
f = h5py.File('r0135_correlators.h5')
intra = np.array( f['intra-2.67-2.67'] )
inter = np.array( f['inter-2.67-2.67'] )
f.close()
datasets['lcls-Au'] = (intra, inter)
# SSRL silver
f = h5py.File('silver_all_correlators_4seb.h5')
intra = np.array( f['intra-2.66-2.66-188mm'] )
inter = np.array( f['inter-2.66-2.66-188mm'] )
f.close()
# throw away the 6 outliers that dominate the signal
inds = np.loadtxt('corr_3std_from_mean-2.66auto.dat')
intra = intra[ np.logical_not(inds.astype(np.bool)) ]
datasets['ssrl-Ag'] = (intra, inter)
# Gold simulations
r = xray.load('combined_shot_1pht.ring')
intra = r.correlate_intra(2.66, 2.66)
inter = r.correlate_inter(2.66, 2.66, num_pairs=r.num_shots)
datasets['sim-Au'] = (intra, inter)
# iterate over datasets and plot histograms for each
for k in datasets.keys():
print "Analyzing dataset:", k
intra_projections, inter_projections = fft_pca_projections( *datasets[k] )
plot_projection_histogram(intra_projections, inter_projections, title=k)
save_histogram_as_txt(intra_projections, inter_projections,
filename=str(k+'_hist.txt'))
return
if __name__ == '__main__':
main()