Skip to content

Instantly share code, notes, and snippets.

@jiahao
Last active October 14, 2015 04:31
Show Gist options
  • Save jiahao/090e295f7a0cd8fbfd12 to your computer and use it in GitHub Desktop.
Save jiahao/090e295f7a0cd8fbfd12 to your computer and use it in GitHub Desktop.
"""
estimate_coeffs
August, 2015
Anna C. Gilbert
input: xs = samples of signal
Λ = list of (freq,coeff) pairs in current approx
Ω = list of (freq,coeff) pairs found in this iteration
k = length of short filter
ats = list of (offset, dilation) pairs for random arith. progs
N = signal length
"""
function estimate_coeffs(xs, Λ, Ω, k, ats, N)
reps = size(ats, 1)
L = length(Ω)
c = zeros(Complex128,reps,L)
for j = 1:reps
t = ats[j,1]
sig = ats[j,2]
u = sample_residual(vec(sub(xs,j,:)), Λ, t, sig, N)
for l = 1:L
θ = -2π*Ω[l]/N
for l2 = 0:k-1
c[j,l] += u[l2+1]*exp(-im*θ*l2*sig)
end
c[j,l]*= √N/k * exp(-im*θ*t)
end
end
median(real(c))
end
function eval_sig(x, pts, N)
p = length(pts)
s = zeros(Complex128, p)
for j = 1:p
for l in eachindex(x.inds)
s[j] += x.spx[l] * exp(2*π*im*pts[j]*(x.inds[l]-1)/N)
end
# to get total l^2 norm = ν, scale random variable by 1/√N
s[j] = 1/√N * (s[j] + x.ν * randn())
end
s
end
include("identify_frequencies.jl")
include("estimate_coeffs.jl")
include("sample_shattering.jl")
"""
function to implement simple version of AAFFT
Anna C. Gilbert
Input
xs1 : contains the samples of x required for all repetitions of the frequency
identification function, organized in a 3-d data structure
xs2 : contains the samples of x required for all repetitions of the estimation
function, organized in a 2-d data structure
ats1 : contains all the (t,s) pairs required for all repetions of frequency
identification function
ats2 : contains all the (t,s) pairs required for all repetions of
estimation
reps1, reps2, reps3 : number of repetitions for parts of the algorithm
N : signal length (should be a power of 2)
width : width of Dirichlet kernel (or bin) for hashing procedure (hash spectrum)
into bins of size width * m
Output : Λ = m x 2 array, the second column contains the m largest
fourier coefficients of x and the first column contains their
corresponding frequencies + 1, freqs belong to 0 to N-1, but the column
contains frequency + 1, therefore belong to 1 to N.
"""
function fourier_sampling(xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width)
k = width*m
Λ = Dict{Float64,Complex128}()
Ω = zeros(k)
c = Ω
list_length = 0
for j = 1:reps1
# identify frequencies, generate a list of no more than k unique candidate frequencies
Ω = identify_frequencies( sub(xs1,:,:,reps2*(j-1)+1:reps2*(j-1)+reps2),
Λ, k, sub(ats1, reps2*(j-1)+1 : reps2*(j-1)+reps2,:), N)
# estimate the coefficients of the frequencies identified above
c = estimate_coeffs( sub(xs2, reps3*(j-1)+1 : reps3*(j-1)+reps3,:),
Λ, Ω, k, sub(ats2, reps3*(j-1)+1 : reps3*(j-1)+reps3,:), N)
# adds 1 to deal with the Matlab indexing "bug"
Ω += 1
# merges list of frequencies found in this iteration with those (if any)
# found in previous iterations. If a frequency has already been identified
# in a previous iteration, we update the coefficient associated with it
# by adding it to the current estimation.
for q = 1:length(c)
Λ[Ω[q]] = get(Λ, Ω[q], zero(c[q])) + c[q]
end
# retain the top k = width*m frequencies (sorted by absolute value)
# one could change this to keep more/fewer frequencies (although m^2
# destroys the efficiency of the theoretical algorithm)
p = min(k,length(Λ))
Λ = Dict(sort(collect(Λ), by= x-> abs(x[1]), rev=true)[1:p])
end
# return the top m frequencies (sorted by absolute value)
# one could change this to keep the top c*m for some constant c.
p = min(m, length(Λ))
Λ = Dict(sort(collect(Λ), by=x->abs(x[1]), rev=true)[1:p])
#[collect(keys(Λ)) collect(values(Λ))]
end
include("eval_sig.jl")
"""
generates sample set for identification and for estimation.
"""
function generate_sample_set(x, N, m, ats1, ats2, width)
K = width*m
nr1 = size(ats1, 1)
α = ndigits(N, 2)
samp1 = zeros(α+1,K,nr1)
xs1 = zeros(Complex128,α+1,K,nr1)
for j = 1:nr1
t = ats1[j,1]
s = ats1[j,2]
final = t + s*(K-1)
aprog = t:s:final
for b = 0:α
geoprog = mod(aprog + (N/(2^b)),N)
xs1[b+1,:,j] = eval_sig(x, geoprog, N)
samp1[b+1,:,j] = geoprog
end
end
nr2 = size(ats2, 1)
samp2 = zeros(nr2,K)
xs2 = zeros(Complex128,nr2,K)
for j = 1:nr2
t = ats2[j,1]
s = ats2[j,2]
final = t + s*(K-1)
aprog = t:s:final
xs2[j,:] = eval_sig(x, mod(aprog,N), N)
samp2[j,:] = mod(aprog,N)
end
(xs1, xs2, samp1, samp2)
end
type Signal
inds
spx
ν
end
"""
generate_signal: randomly choose non-zero frequencies and give them
frequencies that are Gaussian random variables
inputs: sigsize: size (frequency bandwidth) of signal.
sparsity: number of non-zero frequencies
noise: the variance of the AWGN in the signal
outputs: x: physical-space signal
x.inds : location of non-zero frequencies (assuming we start with 1)
x.spx : fourier-space signal
x.nu : variance of the AWGN in signal
Anna C. Gilbert
"""
function generate_signal(sigsize, sparsity, noise)
a = randperm(sigsize) # permute frequencies
Signal(a[1:sparsity],
randn(sparsity), #Gaussian random coefficients
noise
)
end
"""
function to generate t and sigma pairs for running one instance of AAFFT.
Anna C. Gilbert
ats1 contains all the (t,s) pairs required for all repetions of frequency
identification routine
ats2 contains all the (t,s) pairs required for all repetions of estimation
routine
"""
function generate_tspairs(N,reps1,reps2,reps3)
ats1 = zeros(reps1*reps2,2)
ats2 = zeros(reps1*reps3,2)
α = log2(N)
for j = 0:reps1-1
s = rand(1:2:N-1) # draw s uniformly at random from the ODD integers [1,N-1]
for n = 1:reps2
t = rand(0:N-1) # draw t uniformly at random from integers [0,N-1]
ats1[j*(reps2)+n,1] = t
ats1[j*(reps2)+n,2] = s
end
for n = 1:reps3,
s = rand(1:2:N-1) # draw s uniformly at random from the ODD integers [1,N-1]
t = rand(0:N-1) # draw t uniformly at random from integers [0,N-1]
ats2[j*(reps3)+n,1] = t
ats2[j*(reps3)+n,2] = s
end
end
(ats1, ats2)
end
"""
Identify the k dominating frequencies in
residual = x - current approximation (given by Λ)
This function returns frequencies assuming that we start
indexing them at 0 (which is non-standard for Matlab)
Anna C. Gilbert
"""
function identify_frequencies(xs, Λ, k, ats, N)
reps = size(ats,1)
Ω = zeros(1,k)
α = ndigits(N, 2)
sig = ats[1,2]
for b = 0:(α - 1)
vote = zeros(k)
for j = 1:reps
t = ats[j,1]
u = sample_shattering(xs[1,:,j], Λ, t, sig, N)
v = sample_shattering(xs[2+b,:,j], Λ, t + (N/(2^(b+1))), sig, N)
for s = 1:k,
E0= u[s]+ v[s]*exp(-im*π*Ω[s]/(2^b))
E1= u[s]- v[s]*exp(-im*π*Ω[s]/(2^b))
if abs(E1) ≥ abs(E0)
vote[s] += 1
end
end
end
for s = 1:k
if vote[s] > reps/2
Ω[s] += 2^b
end
end
end
#return the list of unique frequencies found
unique(Ω)
end
"""
if the current repn is NOT empty, then draw samples from it.
current repn = sparse Fourier signal (without any noise)
Anna C. Gilbert
"""
function sample_residual(samples, Λ, t, sig, N)
if !isempty(Λ)
k = length(samples)
r = zeros(Complex128, k)
for q = 1:k
vq = 0.0+0.0im
for (freq, coef) in Λ
vq += coef * exp(2*π*im*(freq-1) *
(t + sig*(q-1))/N)
end
r[q] = samples[q] - vq/√N
end
else
r = samples
end
r
end
include("sample_residual.jl")
"""
Input
samples : a set of samples from the signal
Λ : data structure containing current frequencies and coefficients
t : one random offset (for arithmetic progression)
sig : one random step-size (for arithmetic progression)
N : signal length (should be a power of two)
Output : a vector z that is the FFT of samples from residual
Anna C. Gilbert
"""
function sample_shattering(samples, Λ, t, sig, N)
z = sample_residual(samples, Λ, t, sig, N)
n = length(z)
z = 1/√n*fft(z)
end
# test AAFFT with pre-sampling; i.e., don't take the IFFT to generate
# the signal explicitly + sample, just sample from the sparse signal.
#
# Anna C. Gilbert
include("generate_signal.jl")
include("generate_tspairs.jl")
include("generate_sample_set.jl")
include("fourier_sampling.jl")
# set up the parameters
N = 2^20 # N = signal length, must be a power of 2
m = 16 # νmber of total tones
ν = 0.01 # TOTAL norm of the additive white noise added to sparse signal
# generate the data structure that contains signal information
x = generate_signal(N,m,ν)
# reps1 = # repetitions in the main loop of AAFFT (in fourier_sampling.m)
# reps2 = # repetitions in identification of frequencies
# reps3 = # repetitions in estimation of coefficients
# width = width of Dirchlet kernel filter
# typical values are
# reps1 = 5
# reps2 = 5
# reps3 = 5
reps1 = 3
reps2 = 5
reps3 = 11
width = 15
# ats1 = frequency identification => reps1 * reps2 pairs of random seeds t,s
# (reps2 independent seeds for s and reps1*reps2 seeds for t)
# ats2 = coefficient estimation => reps1 * reps3 pairs of random seeds t,s (all independent)
# to visualize where the sampling set is:
# samp1 = all the sampling points for identification
# samp2 = all the sampling points for estimation
(ats1, ats2) = generate_tspairs(N,reps1,reps2,reps3)
(xs1, xs2, samp1, samp2) = generate_sample_set(x, N, m, ats1, ats2, width)
Λ = fourier_sampling( xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width )
@time Λ = fourier_sampling( xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width )
# Λ contains two columns: the first is the frequencies found and the second contains
# the estimated coefficients for each frequency.
# Calculate the relative l^2 error of the returned representation.
err_recov = 0.0
err_unrecov = 0.0
for (ω, c) in Λ
i = findfirst(_->_==ω, x.inds)
if i>0
err_recov += abs(c - x.spx[i])^2
else
err_unrecov += abs(c)^2
end
end
total_err = err_recov + err_unrecov
@printf("total rel. error = %f \n", total_err/norm(x.spx))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment