Skip to content

Instantly share code, notes, and snippets.

@riveSunder
Created August 30, 2020 21:47
Show Gist options
  • Save riveSunder/01544dc1f8cf6c0ac8b5eb936c5b6e77 to your computer and use it in GitHub Desktop.
Save riveSunder/01544dc1f8cf6c0ac8b5eb936c5b6e77 to your computer and use it in GitHub Desktop.
Code Snippets for XOR Tutorial
dim_x = 3
dim_h = 4
dim_y = 1
l2_reg = 1e-4
lr = 1e-2
max_steps = 1400000
θ = init_weights(dim_x, dim_y, dim_h)
x, y = get_xor(1024, dim_x)
println(size(x))
plt = violin([" "], reshape(θ[:wxh],dim_x * dim_h), label="wxh", title="Weights", alpha = 0.5)
violin!([" "], reshape(θ[:why],dim_h*dim_y), label="why", alpha = 0.5)
display(plt)
θ, losses, acc = train(x, θ, y, max_steps, lr, l2_reg)
plt = violin([" "], reshape(θ[:wxh],dim_x * dim_h), label="wxh", title="Weights", alpha = 0.5)
violin!([" "], reshape(θ[:why],dim_h*dim_y), label="why", alpha = 0.5)
display(plt)
steps = 1:size(losses)[1]
plt = plot(steps, losses, title="Training XOR", label="loss")
plot!(steps, acc, label="accuracy")
display(plt)
f(x, θ) = σ(σ(x * θ[:wxh]) * θ[:why])
get_accuracy(y, pred, boundary=0.5) = mean(y .== (pred .> boundary))
log_loss = function(y, pred)
return -(1 / size(y)[1]) .* sum(y .* log.(pred) .+ (1.0 .- y)
.* log.(1.0 .- pred))
end
get_loss = function(x, θ, y, l2=6e-4)
pred = f(x, θ)
loss = log_loss(y, pred)
loss = loss + l2 * (sum(abs.(θ[:wxh].^2))
+ sum(abs(θ[:why].^2)))
return loss
end
lr = 1e1;
x, y = get_xor(64,5);
θ = init_weights(5);
old_weights = append!(reshape(θ[:wxh],
size(θ[:wxh])[1]*size(θ[:wxh])[2]),
reshape(θ[:why], size(θ[:why])[1] * size(θ[:why])[2]))
dθ = gradient((θ) -> get_loss(x, θ, y), θ);
plt = scatter(old_weights, label = "old_weights");
θ[:wxh], θ[:why] = θ[:wxh] .- lr .* dθ[1][:wxh], θ[:why] .- lr .* dθ[1][:why]
new_weights = append!(reshape(θ[:wxh],
size(θ[:wxh])[1]*size(θ[:wxh])[2]),
reshape(θ[:why], size(θ[:why])[1] * size(θ[:why])[2]))
scatter!(new_weights, label="new weights")
display(plt)
using Zygote
using Stats
using Plots
using StatsPlots
get_xor = function(num_samples=512, dim_x=3)
x = 1*rand(num_samples,dim_x) .> 0.5
y = zeros(num_samples,1)
for ii = 1:size(y)[1]
y[ii] = reduce(xor, x[ii,:])
end
x = x + randn(num_samples,dim_x) / 10
return x, y
end
init_weights = function(dim_in=2, dim_out=1, dim_hid=4)
wxh = randn(dim_in, dim_hid) / 8
why = randn(dim_hid, dim_out) / 4
θ = Dict(:wxh => wxh, :why => why)
return θ
end
σ(x) = 1 ./ (1 .+ exp.(-x))
f(x, θ) = σ(x * θ[:w] .+ θ[:b])
θ = Dict(:w => randn(32,2)/10, :b => randn(1,2)/100)
x = randn(4,32)
f(x, θ)
4×2 Array{Float64,2}:
0.516507 0.482128
0.568403 0.639701
0.571232 0.416161
0.288268 0.546431
test_x, test_y = get_xor(512,3);
pred = f(test_x, θ);
test_accuracy = get_accuracy(test_y, pred);
test_loss = log_loss(test_y, pred);
println("Test loss and accuracy are $test_loss and $test_accuracy")
>>Test loss and accuracy are 0.03354685023541572 and 1.0
train = function(x, θ, y, max_steps=1000, lr = 1e-2, l2_reg=1e-4)
disp_every = max_steps // 100
losses = zeros(max_steps)
acc = zeros(max_steps)
for step = 1:max_steps
pred = f(x, θ)
loss = log_loss(y, pred)
losses[step] = loss
acc[step] = get_accuracy(y, pred)
dθ = gradient((θ) -> get_loss(x, θ, y, l2_reg), θ)
θ[:wxh], θ[:why] = θ[:wxh] .- lr
.* dθ[1][:wxh], θ[:why] .- lr .* dθ[1][:why]
if mod(step, disp_every) == 0
val_x, val_y = get_xor(512, size(x)[2]);
pred = f(val_x, θ)
loss = log_loss(val_y, pred)
accuracy = get_accuracy(val_y, pred)
println("$step loss = $loss, accuracy = $accuracy")
#save_frame(θ, step);
end
end
return θ, losses, acc
end
#Separating OR with a straight line is easy, your eyes will pick out the answer automatically
1 x x
0 o x
0 1
1 \x x
\
\
0 o \ x
\
0 1
# Separating XOR is not so simple, you'll need a curved line to do it.
1 x \ o
____ \____
|
0 o \ x
|
0 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment