using Distributions export Measure, DiscreteMeasure, Estimator, EstimatorQuality, blb, randsubset abstract Measure type DiscreteMeasure{S<:Number,T<:Number} <: Measure points :: Vector{S} weights :: Vector{T} function DiscreteMeasure{S<:Number, T<:Number}(points::Vector{S}, weights::Vector{T}) length(points) == length(weights) ? new(points, weights) : error("number of point masses and weights don't add up") end end #If no explicit weights are given, assume uniform weight DiscreteMeasure{S<:Number}(points::Vector{S}) = DiscreteMeasure(points, ones(length(points))/length(points)) typealias Estimator Function typealias EstimatorQuality Function #Inputs: # Data X1,...,Xn # θ̂: estimator # ξ : estimator quality assessment # b : subset size # s : number of sampled subsets # r : number of Monte Carlo iterations # Pn = sum([delta(X[i]) for i=1:n)/n #empirical distribution function blb(Data, θ̂::Estimator, ξ::EstimatorQuality, b::Int, s::Int, r::Int) n = length(Data) Z = Multinomial(n, ones(b)/b) ξ = zeros(s) for j=1:s #Iterate over subsets #Subsample the data I = randsubset(n, b) #Approximate ξ(Qn(P(j)nb)) θ̂⭒n = zeros(r) for k=1:r #Iterate over Monte Carlo iterations nn = rand(Z) P⭒nk = DiscreteMeasure([X[I[a]] for a=1:b], nn[a]/n) θ̂⭒n[k] = θ̂(P⭒nk) end Q⭒nj = DiscreteMeasure(θ̂⭒n) ξ⭒n[j] = ξ(Q⭒nj) end #Average values of ξ(Qn(P(j)nb)) computed for different data subsets return sum([ξ⭒n[j] for j=1:s])/s end #Uses Floyd's algorithm for determining a random subset of size M #from 1:N #Citation: # J. Bentley, B. Floyd, "Programming pearls: a sample of brilliance", # Commun. ACM 30 (9), 1987, 754-757 # doi:10.1145/30401.315746 function randsubset(N::Int, M::Int) M > N ? error("Subset cannot be larger than original set") : none S = Set{Int}() for J = (N-M+1):N T = 1 + int(rand()*(J-1)) add!(S, (T in S) ? J : T) end S end