using Distributions

export Measure, DiscreteMeasure, Estimator, EstimatorQuality,
       blb, randsubset

abstract Measure

type DiscreteMeasure{S<:Number,T<:Number} <: Measure
  points :: Vector{S}
  weights :: Vector{T}
  function DiscreteMeasure{S<:Number, T<:Number}(points::Vector{S}, weights::Vector{T})
    length(points) == length(weights) ?
      new(points, weights)
    : error("number of point masses and weights don't add up") 
  end  
end

#If no explicit weights are given, assume uniform weight
DiscreteMeasure{S<:Number}(points::Vector{S}) = 
DiscreteMeasure(points, ones(length(points))/length(points))

typealias Estimator Function
typealias EstimatorQuality Function

#Inputs:
#  Data X1,...,Xn
#  θ̂: estimator
#  ξ : estimator quality assessment
#  b : subset size
#  s : number of sampled subsets
#  r : number of Monte Carlo iterations
#  Pn = sum([delta(X[i]) for i=1:n)/n #empirical distribution
function blb(Data, θ̂::Estimator, ξ::EstimatorQuality, b::Int, s::Int, r::Int)
  n = length(Data)
  Z = Multinomial(n, ones(b)/b)
  ξ = zeros(s)
  for j=1:s #Iterate over subsets
    #Subsample the data
    I = randsubset(n, b)
    #Approximate ξ(Qn(P(j)nb))
    θ̂⭒n = zeros(r)
    for k=1:r #Iterate over Monte Carlo iterations
      nn = rand(Z)
      P⭒nk = DiscreteMeasure([X[I[a]] for a=1:b], nn[a]/n)
      θ̂⭒n[k] = θ̂(P⭒nk)
    end
    Q⭒nj = DiscreteMeasure(θ̂⭒n)
    ξ⭒n[j] = ξ(Q⭒nj)
  end
  #Average values of ξ(Qn(P(j)nb)) computed for different data subsets
  return sum([ξ⭒n[j] for j=1:s])/s
end


#Uses Floyd's algorithm for determining a random subset of size M
#from 1:N
#Citation:
# J. Bentley, B. Floyd, "Programming pearls: a sample of brilliance",
# Commun. ACM 30 (9), 1987, 754-757
# doi:10.1145/30401.315746
function randsubset(N::Int, M::Int)
  M > N ? error("Subset cannot be larger than original set") : none
  S = Set{Int}()
  for J = (N-M+1):N
    T = 1 + int(rand()*(J-1))
    add!(S, (T in S) ? J : T)
  end
  S
end