Last active
August 29, 2015 14:03
-
-
Save vrld/1e4bd79921edac55d0d9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Crossvalidation | |
export KFold, StratifiedKFold, LOO, RandomSplit | |
#== | |
K-Fold cross validation | |
Example (5-Fold CV): | |
KFold(X, y, 5) do X_tr, y_tr, X_te, y_te | |
train!(model, X_tr, y_tr) | |
confusion = zeros(2,2) | |
for i = 1,length(y_te) | |
pred = predict(model, X_te[i,:]) | |
confusion[y_te[i]+1, pred+1] += 1 | |
end | |
confusion | |
end | |
==# | |
function KFold(f::Function, X::Matrix, y::Vector, K::Integer) | |
@assert size(X, 1) == size(y, 1) | |
nelem = length(y) | |
blocksize = ceil(nelem / K) | |
idx = shuffle!(collect(1:nelem)) | |
ret = {} | |
for i = 1:K | |
te, tr = idx[1:blocksize], idx[blocksize+1:end] | |
push!(ret, f(i, X[tr,:], y[tr], X[te,:], y[te])) | |
idx = circshift(idx, blocksize) | |
end | |
ret | |
end | |
function KFold(f::Function, X::Vector, y::Vector, K::Integer) | |
@assert size(X, 1) == size(y, 1) | |
nelem = length(y) | |
blocksize = ceil(nelem / K) | |
idx = shuffle!(collect(1:nelem)) | |
ret = {} | |
for i = 1:K | |
te, tr = idx[1:blocksize], idx[blocksize+1:end] | |
push!(ret, f(i, X[tr], y[tr], X[te], y[te])) | |
idx = circshift(idx, blocksize) | |
end | |
ret | |
end | |
#== | |
Stratified K-Fold cross validation: | |
Each fold has approximately the same distribution of labels | |
==# | |
function StratifiedKFold{T}(f::Function, X::Matrix, y::Vector{T}, K::Integer) | |
@assert size(X, 1) == size(y, 1) | |
labels = collect(Set(y)) | |
idx, blocksize = Dict{T, Vector{Int}}(), Dict{T, Int}() | |
for l in labels | |
idx[l] = shuffle!(collect(1:length(y))[y.==l]) | |
blocksize[l] = ceil(length(idx[l]) / K) | |
end | |
ret = {} | |
for i = 1:K | |
te,tr = Int[], Int[] | |
for l in labels | |
append!(te, idx[l][1:blocksize[l]]) | |
append!(tr, idx[l][blocksize[l]+1:end]) | |
idx[l] = circshift(idx[l], blocksize[l]) | |
end | |
push!(ret, f(i, X[tr,:], y[tr], X[te,:], y[te])) | |
end | |
ret | |
end | |
function StratifiedKFold{T}(f::Function, X::Vector, y::Vector{T}, K::Integer) | |
@assert size(X, 1) == size(y, 1) | |
labels = collect(Set(y)) | |
idx, blocksize = Dict{T, Vector{Int}}(), Dict{T, Int}() | |
for l in labels | |
idx[l] = shuffle!(collect(1:length(y))[y.==l]) | |
blocksize[l] = ceil(length(idx[l]) / K) | |
end | |
ret = {} | |
for i = 1:K | |
te,tr = Int[], Int[] | |
for l in labels | |
append!(te, idx[l][1:blocksize[l]]) | |
append!(tr, idx[l][blocksize[l]+1:end]) | |
idx[l] = circshift(idx[l], blocksize[l]) | |
end | |
push!(ret, f(i, X[tr], y[tr], X[te], y[te])) | |
end | |
ret | |
end | |
#== | |
Leave-one-out cross validation | |
==# | |
function LOO(f::Function, X::Matrix, y::Vector, K::Integer) | |
@assert size(X, 1) == size(y, 1) | |
te = falses(length(y)) | |
te[1] = true | |
ret = {} | |
for i = 1:length(y) | |
push!(ret, f(X[!te,:], y[!te], X[te,:], y[te])) | |
te = circshift(te, 1) | |
end | |
ret | |
end | |
#== | |
Random split in two sets | |
==# | |
function RandomSplit(X::Matrix, y::Vector, fraction::Float64 = 0.5) | |
@assert size(X, 1) == size(y, 1) | |
idx = shuffle!(collect(1:length(y)) .<= fraction*length(y)) | |
return X[idx,:], y[idx], X[!idx,:], y[!idx] | |
end | |
function RandomSplit(f::Function, X::Matrix, y::Vector, N::Int, fraction::Float64 = 0.5) | |
@assert size(X, 1) == size(y, 1) | |
ret = {} | |
for i = 1:N | |
idx = shuffle!(collect(1:length(y)) .<= fraction*length(y)) | |
push!(ret, f(i, X[idx,:], y[idx], X[!idx,:], y[!idx])) | |
end | |
return ret | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment