jiahao · October 14, 2015 04:31
diff --git a/estimate_coeffs.jl b/estimate_coeffs.jl
 """
 estimate_coeffs

 August, 2015
 Anna C. Gilbert

 input: xs = samples of signal
  	 Λ = list of (freq,coeff) pairs in current approx
  	 Ω = list of (freq,coeff) pairs found in this iteration
       k = length of short filter
       ats = list of (offset, dilation) pairs for random arith. progs
       N = signal length
 """
 function estimate_coeffs(xs, Λ, Ω, k, ats, N)

 	reps = size(ats, 1)
 	L = length(Ω)
 	c = zeros(Complex128,reps,L)

 	for j = 1:reps
 		t = ats[j,1]
 		sig = ats[j,2]
 		u = sample_residual(vec(sub(xs,j,:)), Λ, t, sig, N)
 		for l = 1:L
 			θ = -2π*Ω[l]/N
            for l2 = 0:k-1
                c[j,l] += u[l2+1]*exp(-im*θ*l2*sig)
            end
 			c[j,l]*= √N/k * exp(-im*θ*t)
 		end
    end

    median(real(c))
 end
diff --git a/eval_sig.jl b/eval_sig.jl
 function eval_sig(x, pts, N)
 	p = length(pts)
 	s = zeros(Complex128, p)

 	for j = 1:p
 		for l in eachindex(x.inds)
 			s[j] += x.spx[l] * exp(2*π*im*pts[j]*(x.inds[l]-1)/N)
        end
        # to get total l^2 norm = ν, scale random variable by 1/√N
 		s[j] = 1/√N * (s[j] + x.ν * randn())
 	end
    s
 end
diff --git a/fourier_sampling.jl b/fourier_sampling.jl
 include("identify_frequencies.jl")
 include("estimate_coeffs.jl")
 include("sample_shattering.jl")

 """
 function to implement simple version of AAFFT
 Anna C. Gilbert

 Input
 xs1 : contains the samples of x required for all repetitions of the frequency
      identification function, organized in a 3-d data structure
 xs2 : contains the samples of x required for all repetitions of the estimation
      function, organized in a 2-d data structure
 ats1 : contains all the (t,s) pairs required for all repetions of frequency
       identification function
 ats2 : contains all the (t,s) pairs required for all repetions of
       estimation
 reps1, reps2, reps3 : number of repetitions for parts of the algorithm
 N : signal length (should be a power of 2)
 width : width of Dirichlet kernel (or bin) for hashing procedure (hash spectrum)
        into bins of size width * m

 Output : Λ = m x 2 array, the second column contains the m largest
 fourier coefficients of x and the first column contains their
 corresponding frequencies + 1, freqs belong to 0 to N-1, but the column
 contains frequency + 1, therefore belong to 1 to N.
 """
 function fourier_sampling(xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width)
    k = width*m
    Λ = Dict{Float64,Complex128}()
    Ω = zeros(k)
    c = Ω
    list_length = 0

    for j = 1:reps1

        # identify frequencies, generate a list of no more than k unique candidate frequencies
        Ω = identify_frequencies( sub(xs1,:,:,reps2*(j-1)+1:reps2*(j-1)+reps2),
        	Λ, k, sub(ats1, reps2*(j-1)+1 : reps2*(j-1)+reps2,:), N)

        # estimate the coefficients of the frequencies identified above
        c = estimate_coeffs( sub(xs2, reps3*(j-1)+1 : reps3*(j-1)+reps3,:),
        	Λ, Ω, k, sub(ats2, reps3*(j-1)+1 : reps3*(j-1)+reps3,:), N)

        # adds 1 to deal with the Matlab indexing "bug"
        Ω += 1

        # merges list of frequencies found in this iteration with those (if any)
        # found in previous iterations. If a frequency has already been identified
        # in a previous iteration, we update the coefficient associated with it
        # by adding it to the current estimation.
        for q = 1:length(c)
            Λ[Ω[q]] = get(Λ, Ω[q], zero(c[q])) + c[q]
        end

        # retain the top k = width*m frequencies (sorted by absolute value)
        # one could change this to keep more/fewer frequencies (although m^2
        # destroys the efficiency of the theoretical algorithm)
        p = min(k,length(Λ))
        Λ = Dict(sort(collect(Λ), by= x-> abs(x[1]), rev=true)[1:p])
    end

    # return the top m frequencies (sorted by absolute value)
    # one could change this to keep the top c*m for some constant c.
    p = min(m, length(Λ))
    Λ = Dict(sort(collect(Λ), by=x->abs(x[1]), rev=true)[1:p])
    #[collect(keys(Λ)) collect(values(Λ))]
 end
diff --git a/generate_sample_set.jl b/generate_sample_set.jl
 include("eval_sig.jl")

 """
 generates sample set for identification and for estimation.
 """
 function generate_sample_set(x, N, m, ats1, ats2, width)

 	K = width*m

 	nr1 = size(ats1, 1)

    α = ndigits(N, 2)
 	samp1 = zeros(α+1,K,nr1)
 	xs1 = zeros(Complex128,α+1,K,nr1)
 	for j = 1:nr1
 			t = ats1[j,1]
 			s = ats1[j,2]
 	    	final = t + s*(K-1)
 	    	aprog = t:s:final
 			for b = 0:α
 				geoprog = mod(aprog + (N/(2^b)),N)
 				xs1[b+1,:,j] = eval_sig(x, geoprog, N)
 	        	samp1[b+1,:,j] = geoprog
 	    	end
 	end

 	nr2 = size(ats2, 1)
 	samp2 = zeros(nr2,K)
 	xs2 = zeros(Complex128,nr2,K)
 	for j = 1:nr2
 	    t = ats2[j,1]
 		s = ats2[j,2]
 		final = t + s*(K-1)
 		aprog = t:s:final
 		xs2[j,:] = eval_sig(x, mod(aprog,N), N)
 		samp2[j,:] = mod(aprog,N)
 	end
 	(xs1, xs2, samp1, samp2)
 end
diff --git a/generate_signal.jl b/generate_signal.jl
 type Signal
    inds
    spx
    ν
 end

 """
 generate_signal: randomly choose non-zero frequencies and give them
         frequencies that are Gaussian random variables

 inputs:  sigsize: size (frequency bandwidth) of signal.
         sparsity: number of non-zero frequencies
         noise: the variance of the AWGN in the signal

 outputs:  x: physical-space signal
          x.inds : location of non-zero frequencies (assuming we start with 1)
          x.spx : fourier-space signal
          x.nu : variance of the AWGN in signal

 Anna C. Gilbert
 """
 function generate_signal(sigsize, sparsity, noise)
 	a = randperm(sigsize)  # permute frequencies
 	Signal(a[1:sparsity],
        randn(sparsity), #Gaussian random coefficients
        noise
    )
 end
diff --git a/generate_tspairs.jl b/generate_tspairs.jl
 """
 function to generate t and sigma pairs for running one instance of AAFFT.
 Anna C. Gilbert

 ats1 contains all the (t,s) pairs required for all repetions of frequency
 identification routine
 ats2 contains all the (t,s) pairs required for all repetions of estimation
 routine
 """
 function generate_tspairs(N,reps1,reps2,reps3)
 	ats1 = zeros(reps1*reps2,2)
 	ats2 = zeros(reps1*reps3,2)
 	α = log2(N)

 	for j = 0:reps1-1
 		s = rand(1:2:N-1) # draw s uniformly at random from the ODD integers [1,N-1]

 		for n = 1:reps2
 			t = rand(0:N-1) # draw t uniformly at random from integers [0,N-1]
 			ats1[j*(reps2)+n,1] = t
 			ats1[j*(reps2)+n,2] = s
 		end

 		for n = 1:reps3,
 			s = rand(1:2:N-1) #     draw s uniformly at random from the ODD integers [1,N-1]
 			t = rand(0:N-1)   #     draw t uniformly at random from integers [0,N-1]
 			ats2[j*(reps3)+n,1] = t
 			ats2[j*(reps3)+n,2] = s
 		end
 	end
    (ats1, ats2)
 end
diff --git a/identify_frequencies.jl b/identify_frequencies.jl
 """
 Identify the k dominating frequencies in
 residual = x - current approximation (given by Λ)
 This function returns frequencies assuming that we start
 indexing them at 0 (which is non-standard for Matlab)

 Anna C. Gilbert
 """
 function identify_frequencies(xs, Λ, k, ats, N)
    reps = size(ats,1)
    Ω = zeros(1,k)
    α = ndigits(N, 2)

    sig = ats[1,2]
    for b = 0:(α - 1)
        vote = zeros(k)
        for j = 1:reps
            t = ats[j,1]
            u = sample_shattering(xs[1,:,j], Λ, t, sig, N)
            v = sample_shattering(xs[2+b,:,j], Λ, t + (N/(2^(b+1))), sig, N)

            for s = 1:k,
                E0= u[s]+ v[s]*exp(-im*π*Ω[s]/(2^b))
                E1= u[s]- v[s]*exp(-im*π*Ω[s]/(2^b))
                if abs(E1) ≥ abs(E0)
    				vote[s] += 1
    			end
            end
        end
        for s = 1:k
            if vote[s] > reps/2
    			Ω[s] += 2^b
    		end
        end
    end

    #return the list of unique frequencies found
    unique(Ω)
 end
diff --git a/sample_residual.jl b/sample_residual.jl
 """
 if the current repn is NOT empty, then draw samples from it.
 current repn = sparse Fourier signal (without any noise)

 Anna C. Gilbert
 """
 function sample_residual(samples, Λ, t, sig, N)
 	if !isempty(Λ)
 		k = length(samples)
 		r = zeros(Complex128, k)

 		for q = 1:k
    		vq = 0.0+0.0im
    		for (freq, coef) in Λ
                vq += coef * exp(2*π*im*(freq-1) *
                    (t + sig*(q-1))/N)
    		end
    		r[q] = samples[q] - vq/√N
 		end
 	else
 		r = samples
 	end
    r
 end
diff --git a/sample_shattering.jl b/sample_shattering.jl
 include("sample_residual.jl")

 """
 Input
 samples : a set of samples from the signal
 Λ : data structure containing current frequencies and coefficients
 t : one random offset (for arithmetic progression)
 sig : one random step-size (for arithmetic progression)
 N : signal length (should be a power of two)

 Output : a vector z that is the FFT of samples from residual

 Anna C. Gilbert
 """
 function sample_shattering(samples, Λ, t, sig, N)
 	z = sample_residual(samples, Λ, t, sig, N)
 	n = length(z)
 	z = 1/√n*fft(z)
 end
diff --git a/test_AAFFT_with_presampling.jl b/test_AAFFT_with_presampling.jl
 # test AAFFT with pre-sampling; i.e., don't take the IFFT to generate
 # the signal explicitly + sample, just sample from the sparse signal.
 #
 # Anna C. Gilbert

 include("generate_signal.jl")
 include("generate_tspairs.jl")
 include("generate_sample_set.jl")
 include("fourier_sampling.jl")

 # set up the parameters
 N = 2^20  # N = signal length, must be a power of 2
 m = 16     # νmber of total tones
 ν = 0.01 # TOTAL norm of the additive white noise added to sparse signal

 # generate the data structure that contains signal information
 x = generate_signal(N,m,ν)

 # reps1 = # repetitions in the main loop of AAFFT (in fourier_sampling.m)
 # reps2 = # repetitions in identification of frequencies
 # reps3 = # repetitions in estimation of coefficients
 # width = width of Dirchlet kernel filter
 # typical values are
 # reps1 = 5
 # reps2 = 5
 # reps3 = 5

 reps1 = 3
 reps2 = 5
 reps3 = 11
 width = 15

 # ats1 = frequency identification => reps1 * reps2 pairs of random seeds t,s
 #        (reps2 independent seeds for s and reps1*reps2 seeds for t)
 # ats2 = coefficient estimation => reps1 * reps3 pairs of random seeds t,s (all independent)
 # to visualize where the sampling set is:
 # samp1 = all the sampling points for identification
 # samp2 = all the sampling points for estimation

 (ats1, ats2) = generate_tspairs(N,reps1,reps2,reps3)

 (xs1, xs2, samp1, samp2) = generate_sample_set(x, N, m, ats1, ats2, width)

 Λ = fourier_sampling( xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width )

 @time Λ = fourier_sampling( xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width )

 # Λ contains two columns: the first is the frequencies found and the second contains
 # the estimated coefficients for each frequency.
 # Calculate the relative l^2 error of the returned representation.

 err_recov = 0.0
 err_unrecov = 0.0
 for (ω, c) in Λ
    i = findfirst(_->_==ω, x.inds)
    if i>0
        err_recov += abs(c - x.spx[i])^2
    else
        err_unrecov += abs(c)^2
    end
 end

 total_err = err_recov + err_unrecov

 @printf("total rel. error = %f \n", total_err/norm(x.spx))
	"""
	estimate_coeffs

	August, 2015
	Anna C. Gilbert

	input: xs = samples of signal
	Λ = list of (freq,coeff) pairs in current approx
	Ω = list of (freq,coeff) pairs found in this iteration
	k = length of short filter
	ats = list of (offset, dilation) pairs for random arith. progs
	N = signal length
	"""
	function estimate_coeffs(xs, Λ, Ω, k, ats, N)

	reps = size(ats, 1)
	L = length(Ω)
	c = zeros(Complex128,reps,L)

	for j = 1:reps
	t = ats[j,1]
	sig = ats[j,2]
	u = sample_residual(vec(sub(xs,j,:)), Λ, t, sig, N)
	for l = 1:L
	θ = -2π*Ω[l]/N
	for l2 = 0:k-1
	c[j,l] += u[l2+1]exp(-imθl2sig)
	end
	c[j,l]= √N/k exp(-imθt)
	end
	end

	median(real(c))
	end
	function eval_sig(x, pts, N)
	p = length(pts)
	s = zeros(Complex128, p)

	for j = 1:p
	for l in eachindex(x.inds)
	s[j] += x.spx[l] * exp(2πimpts[j](x.inds[l]-1)/N)
	end
	# to get total l^2 norm = ν, scale random variable by 1/√N
	s[j] = 1/√N * (s[j] + x.ν * randn())
	end
	s
	end
	include("identify_frequencies.jl")
	include("estimate_coeffs.jl")
	include("sample_shattering.jl")

	"""
	function to implement simple version of AAFFT
	Anna C. Gilbert

	Input
	xs1 : contains the samples of x required for all repetitions of the frequency
	identification function, organized in a 3-d data structure
	xs2 : contains the samples of x required for all repetitions of the estimation
	function, organized in a 2-d data structure
	ats1 : contains all the (t,s) pairs required for all repetions of frequency
	identification function
	ats2 : contains all the (t,s) pairs required for all repetions of
	estimation
	reps1, reps2, reps3 : number of repetitions for parts of the algorithm
	N : signal length (should be a power of 2)
	width : width of Dirichlet kernel (or bin) for hashing procedure (hash spectrum)
	into bins of size width * m

	Output : Λ = m x 2 array, the second column contains the m largest
	fourier coefficients of x and the first column contains their
	corresponding frequencies + 1, freqs belong to 0 to N-1, but the column
	contains frequency + 1, therefore belong to 1 to N.
	"""
	function fourier_sampling(xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width)
	k = width*m
	Λ = Dict{Float64,Complex128}()
	Ω = zeros(k)
	c = Ω
	list_length = 0

	for j = 1:reps1

	# identify frequencies, generate a list of no more than k unique candidate frequencies
	Ω = identify_frequencies( sub(xs1,:,:,reps2(j-1)+1:reps2(j-1)+reps2),
	Λ, k, sub(ats1, reps2(j-1)+1 : reps2(j-1)+reps2,:), N)

	# estimate the coefficients of the frequencies identified above
	c = estimate_coeffs( sub(xs2, reps3(j-1)+1 : reps3(j-1)+reps3,:),
	Λ, Ω, k, sub(ats2, reps3(j-1)+1 : reps3(j-1)+reps3,:), N)

	# adds 1 to deal with the Matlab indexing "bug"
	Ω += 1

	# merges list of frequencies found in this iteration with those (if any)
	# found in previous iterations. If a frequency has already been identified
	# in a previous iteration, we update the coefficient associated with it
	# by adding it to the current estimation.
	for q = 1:length(c)
	Λ[Ω[q]] = get(Λ, Ω[q], zero(c[q])) + c[q]
	end

	# retain the top k = width*m frequencies (sorted by absolute value)
	# one could change this to keep more/fewer frequencies (although m^2
	# destroys the efficiency of the theoretical algorithm)
	p = min(k,length(Λ))
	Λ = Dict(sort(collect(Λ), by= x-> abs(x[1]), rev=true)[1:p])
	end

	# return the top m frequencies (sorted by absolute value)
	# one could change this to keep the top c*m for some constant c.
	p = min(m, length(Λ))
	Λ = Dict(sort(collect(Λ), by=x->abs(x[1]), rev=true)[1:p])
	#[collect(keys(Λ)) collect(values(Λ))]
	end
	include("eval_sig.jl")

	"""
	generates sample set for identification and for estimation.
	"""
	function generate_sample_set(x, N, m, ats1, ats2, width)

	K = width*m

	nr1 = size(ats1, 1)

	α = ndigits(N, 2)
	samp1 = zeros(α+1,K,nr1)
	xs1 = zeros(Complex128,α+1,K,nr1)
	for j = 1:nr1
	t = ats1[j,1]
	s = ats1[j,2]
	final = t + s*(K-1)
	aprog = t:s:final
	for b = 0:α
	geoprog = mod(aprog + (N/(2^b)),N)
	xs1[b+1,:,j] = eval_sig(x, geoprog, N)
	samp1[b+1,:,j] = geoprog
	end
	end

	nr2 = size(ats2, 1)
	samp2 = zeros(nr2,K)
	xs2 = zeros(Complex128,nr2,K)
	for j = 1:nr2
	t = ats2[j,1]
	s = ats2[j,2]
	final = t + s*(K-1)
	aprog = t:s:final
	xs2[j,:] = eval_sig(x, mod(aprog,N), N)
	samp2[j,:] = mod(aprog,N)
	end
	(xs1, xs2, samp1, samp2)
	end
	type Signal
	inds
	spx
	ν
	end

	"""
	generate_signal: randomly choose non-zero frequencies and give them
	frequencies that are Gaussian random variables

	inputs: sigsize: size (frequency bandwidth) of signal.
	sparsity: number of non-zero frequencies
	noise: the variance of the AWGN in the signal

	outputs: x: physical-space signal
	x.inds : location of non-zero frequencies (assuming we start with 1)
	x.spx : fourier-space signal
	x.nu : variance of the AWGN in signal

	Anna C. Gilbert
	"""
	function generate_signal(sigsize, sparsity, noise)
	a = randperm(sigsize) # permute frequencies
	Signal(a[1:sparsity],
	randn(sparsity), #Gaussian random coefficients
	noise
	)
	end
	"""
	function to generate t and sigma pairs for running one instance of AAFFT.
	Anna C. Gilbert

	ats1 contains all the (t,s) pairs required for all repetions of frequency
	identification routine
	ats2 contains all the (t,s) pairs required for all repetions of estimation
	routine
	"""
	function generate_tspairs(N,reps1,reps2,reps3)
	ats1 = zeros(reps1*reps2,2)
	ats2 = zeros(reps1*reps3,2)
	α = log2(N)

	for j = 0:reps1-1
	s = rand(1:2:N-1) # draw s uniformly at random from the ODD integers [1,N-1]

	for n = 1:reps2
	t = rand(0:N-1) # draw t uniformly at random from integers [0,N-1]
	ats1[j*(reps2)+n,1] = t
	ats1[j*(reps2)+n,2] = s
	end

	for n = 1:reps3,
	s = rand(1:2:N-1) # draw s uniformly at random from the ODD integers [1,N-1]
	t = rand(0:N-1) # draw t uniformly at random from integers [0,N-1]
	ats2[j*(reps3)+n,1] = t
	ats2[j*(reps3)+n,2] = s
	end
	end
	(ats1, ats2)
	end
	"""
	Identify the k dominating frequencies in
	residual = x - current approximation (given by Λ)
	This function returns frequencies assuming that we start
	indexing them at 0 (which is non-standard for Matlab)

	Anna C. Gilbert
	"""
	function identify_frequencies(xs, Λ, k, ats, N)
	reps = size(ats,1)
	Ω = zeros(1,k)
	α = ndigits(N, 2)

	sig = ats[1,2]
	for b = 0:(α - 1)
	vote = zeros(k)
	for j = 1:reps
	t = ats[j,1]
	u = sample_shattering(xs[1,:,j], Λ, t, sig, N)
	v = sample_shattering(xs[2+b,:,j], Λ, t + (N/(2^(b+1))), sig, N)

	for s = 1:k,
	E0= u[s]+ v[s]exp(-imπ*Ω[s]/(2^b))
	E1= u[s]- v[s]exp(-imπ*Ω[s]/(2^b))
	if abs(E1) ≥ abs(E0)
	vote[s] += 1
	end
	end
	end
	for s = 1:k
	if vote[s] > reps/2
	Ω[s] += 2^b
	end
	end
	end

	#return the list of unique frequencies found
	unique(Ω)
	end
	"""
	if the current repn is NOT empty, then draw samples from it.
	current repn = sparse Fourier signal (without any noise)

	Anna C. Gilbert
	"""
	function sample_residual(samples, Λ, t, sig, N)
	if !isempty(Λ)
	k = length(samples)
	r = zeros(Complex128, k)

	for q = 1:k
	vq = 0.0+0.0im
	for (freq, coef) in Λ
	vq += coef * exp(2πim(freq-1)
	(t + sig*(q-1))/N)
	end
	r[q] = samples[q] - vq/√N
	end
	else
	r = samples
	end
	r
	end
	include("sample_residual.jl")

	"""
	Input
	samples : a set of samples from the signal
	Λ : data structure containing current frequencies and coefficients
	t : one random offset (for arithmetic progression)
	sig : one random step-size (for arithmetic progression)
	N : signal length (should be a power of two)

	Output : a vector z that is the FFT of samples from residual

	Anna C. Gilbert
	"""
	function sample_shattering(samples, Λ, t, sig, N)
	z = sample_residual(samples, Λ, t, sig, N)
	n = length(z)
	z = 1/√n*fft(z)
	end
	# test AAFFT with pre-sampling; i.e., don't take the IFFT to generate
	# the signal explicitly + sample, just sample from the sparse signal.
	#
	# Anna C. Gilbert

	include("generate_signal.jl")
	include("generate_tspairs.jl")
	include("generate_sample_set.jl")
	include("fourier_sampling.jl")

	# set up the parameters
	N = 2^20 # N = signal length, must be a power of 2
	m = 16 # νmber of total tones
	ν = 0.01 # TOTAL norm of the additive white noise added to sparse signal

	# generate the data structure that contains signal information
	x = generate_signal(N,m,ν)

	# reps1 = # repetitions in the main loop of AAFFT (in fourier_sampling.m)
	# reps2 = # repetitions in identification of frequencies
	# reps3 = # repetitions in estimation of coefficients
	# width = width of Dirchlet kernel filter
	# typical values are
	# reps1 = 5
	# reps2 = 5
	# reps3 = 5

	reps1 = 3
	reps2 = 5
	reps3 = 11
	width = 15

	# ats1 = frequency identification => reps1 * reps2 pairs of random seeds t,s
	# (reps2 independent seeds for s and reps1*reps2 seeds for t)
	# ats2 = coefficient estimation => reps1 * reps3 pairs of random seeds t,s (all independent)
	# to visualize where the sampling set is:
	# samp1 = all the sampling points for identification
	# samp2 = all the sampling points for estimation

	(ats1, ats2) = generate_tspairs(N,reps1,reps2,reps3)

	(xs1, xs2, samp1, samp2) = generate_sample_set(x, N, m, ats1, ats2, width)

	Λ = fourier_sampling( xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width )

	@time Λ = fourier_sampling( xs1, xs2, m, ats1, ats2, reps1, reps2, reps3, N, width )

	# Λ contains two columns: the first is the frequencies found and the second contains
	# the estimated coefficients for each frequency.
	# Calculate the relative l^2 error of the returned representation.

	err_recov = 0.0
	err_unrecov = 0.0
	for (ω, c) in Λ
	i = findfirst(_->_==ω, x.inds)
	if i>0
	err_recov += abs(c - x.spx[i])^2
	else
	err_unrecov += abs(c)^2
	end
	end

	total_err = err_recov + err_unrecov

	@printf("total rel. error = %f \n", total_err/norm(x.spx))