riveSunder · August 30, 2020 21:47
diff --git a/call_train.jl b/call_train.jl
 dim_x = 3
 dim_h = 4
 dim_y = 1
 l2_reg = 1e-4
 lr = 1e-2
 max_steps = 1400000
 θ = init_weights(dim_x, dim_y, dim_h)
 x, y = get_xor(1024, dim_x)
 println(size(x))
 plt = violin([" "], reshape(θ[:wxh],dim_x * dim_h), label="wxh", title="Weights", alpha = 0.5)
 violin!([" "], reshape(θ[:why],dim_h*dim_y), label="why", alpha = 0.5)
 display(plt)
 θ, losses, acc = train(x, θ, y, max_steps, lr, l2_reg)
 plt = violin([" "], reshape(θ[:wxh],dim_x * dim_h), label="wxh", title="Weights", alpha = 0.5)
 violin!([" "], reshape(θ[:why],dim_h*dim_y), label="why", alpha = 0.5)
 display(plt)
 steps = 1:size(losses)[1]
 plt = plot(steps, losses, title="Training XOR", label="loss")
 plot!(steps, acc, label="accuracy")
 display(plt)
diff --git a/get_loss.jl b/get_loss.jl
 f(x, θ) = σ(σ(x * θ[:wxh]) * θ[:why])
 get_accuracy(y, pred, boundary=0.5) = mean(y .== (pred .> boundary))

 log_loss = function(y, pred)
   
    return -(1 / size(y)[1]) .* sum(y .* log.(pred) .+ (1.0 .- y)
             .* log.(1.0 .- pred))
 end
 get_loss = function(x, θ, y, l2=6e-4)
 pred = f(x, θ)
    loss = log_loss(y, pred)
    loss = loss + l2 * (sum(abs.(θ[:wxh].^2))    
           + sum(abs(θ[:why].^2)))
    return loss
 end
diff --git a/gradient_update.jl b/gradient_update.jl
 lr = 1e1;
 x, y = get_xor(64,5);
 θ = init_weights(5);
 old_weights = append!(reshape(θ[:wxh],  
    size(θ[:wxh])[1]*size(θ[:wxh])[2]),
    reshape(θ[:why], size(θ[:why])[1] * size(θ[:why])[2]))
    
    
 dθ = gradient((θ) -> get_loss(x, θ, y), θ);
 plt = scatter(old_weights, label = "old_weights");
 θ[:wxh], θ[:why] = θ[:wxh] .- lr .* dθ[1][:wxh], θ[:why] .- lr .* dθ[1][:why]
 new_weights = append!(reshape(θ[:wxh], 
    size(θ[:wxh])[1]*size(θ[:wxh])[2]),
    reshape(θ[:why], size(θ[:why])[1] * size(θ[:why])[2]))
 scatter!(new_weights, label="new weights")
 display(plt)
diff --git a/imports.jl b/imports.jl
 using Zygote
 using Stats
 using Plots
 using StatsPlots
diff --git a/initialization_functions.jl b/initialization_functions.jl
 get_xor = function(num_samples=512, dim_x=3)
    x = 1*rand(num_samples,dim_x) .> 0.5
    y = zeros(num_samples,1)
    for ii = 1:size(y)[1]
        y[ii] = reduce(xor, x[ii,:])
    end
    x = x + randn(num_samples,dim_x) / 10
    return x, y
 end

 init_weights = function(dim_in=2, dim_out=1, dim_hid=4)
    
    wxh = randn(dim_in, dim_hid) / 8
    why = randn(dim_hid, dim_out) / 4
    θ = Dict(:wxh => wxh, :why => why)
    
    return θ
    
 end
diff --git a/math_symbols.jl b/math_symbols.jl
 σ(x) = 1 ./ (1 .+ exp.(-x))

 f(x, θ) = σ(x * θ[:w] .+ θ[:b])

 θ = Dict(:w => randn(32,2)/10, :b => randn(1,2)/100)

 x = randn(4,32)

 f(x, θ)
diff --git a/return_00.jl b/return_00.jl
 4×2 Array{Float64,2}:
 0.516507  0.482128
 0.568403  0.639701
 0.571232  0.416161
 0.288268  0.546431
diff --git a/test.jl b/test.jl
 test_x, test_y = get_xor(512,3);
 pred = f(test_x, θ);
 test_accuracy = get_accuracy(test_y, pred);
 test_loss = log_loss(test_y, pred);
 println("Test loss and accuracy are $test_loss and $test_accuracy")

 >>Test loss and accuracy are 0.03354685023541572 and 1.0
diff --git a/train.jl b/train.jl
 train = function(x, θ, y, max_steps=1000, lr = 1e-2, l2_reg=1e-4)
    
    disp_every = max_steps // 100
    losses = zeros(max_steps)
    acc = zeros(max_steps)
    for step = 1:max_steps
        
        pred = f(x, θ)
        loss = log_loss(y, pred)
        
        losses[step] = loss 
        
        acc[step] = get_accuracy(y, pred)
        dθ = gradient((θ) -> get_loss(x, θ, y, l2_reg), θ)
        θ[:wxh], θ[:why] = θ[:wxh] .- lr 
                .* dθ[1][:wxh], θ[:why] .- lr .* dθ[1][:why]       
        
        if mod(step, disp_every) == 0
            
            val_x, val_y = get_xor(512, size(x)[2]);
            pred = f(val_x, θ) 
            loss = log_loss(val_y, pred)
            accuracy = get_accuracy(val_y, pred)
            println("$step loss = $loss, accuracy = $accuracy")
            #save_frame(θ, step);
       end
    end
    return θ, losses, acc
 end
diff --git a/xor_lines.jl b/xor_lines.jl
 #Separating OR with a straight line is easy, your  eyes will pick out the answer automatically
  
 1   x      x
  
  
 0   o      x 
    0      1


 1  \x      x
    \
     \
 0   o \    x
       \
    0      1

 # Separating XOR is not so simple, you'll need a curved line to do it. 
 1   x   \  o
  ____   \____
      |
 0   o  \   x
        |
    0      1
	dim_x = 3
	dim_h = 4
	dim_y = 1
	l2_reg = 1e-4
	lr = 1e-2
	max_steps = 1400000
	θ = init_weights(dim_x, dim_y, dim_h)
	x, y = get_xor(1024, dim_x)
	println(size(x))
	plt = violin([" "], reshape(θ[:wxh],dim_x * dim_h), label="wxh", title="Weights", alpha = 0.5)
	violin!([" "], reshape(θ[:why],dim_h*dim_y), label="why", alpha = 0.5)
	display(plt)
	θ, losses, acc = train(x, θ, y, max_steps, lr, l2_reg)
	plt = violin([" "], reshape(θ[:wxh],dim_x * dim_h), label="wxh", title="Weights", alpha = 0.5)
	violin!([" "], reshape(θ[:why],dim_h*dim_y), label="why", alpha = 0.5)
	display(plt)
	steps = 1:size(losses)[1]
	plt = plot(steps, losses, title="Training XOR", label="loss")
	plot!(steps, acc, label="accuracy")
	display(plt)
	f(x, θ) = σ(σ(x * θ[:wxh]) * θ[:why])
	get_accuracy(y, pred, boundary=0.5) = mean(y .== (pred .> boundary))

	log_loss = function(y, pred)

	return -(1 / size(y)[1]) .* sum(y .* log.(pred) .+ (1.0 .- y)
	.* log.(1.0 .- pred))
	end
	get_loss = function(x, θ, y, l2=6e-4)
	pred = f(x, θ)
	loss = log_loss(y, pred)
	loss = loss + l2 * (sum(abs.(θ[:wxh].^2))
	+ sum(abs(θ[:why].^2)))
	return loss
	end
	lr = 1e1;
	x, y = get_xor(64,5);
	θ = init_weights(5);
	old_weights = append!(reshape(θ[:wxh],
	size(θ[:wxh])[1]*size(θ[:wxh])[2]),
	reshape(θ[:why], size(θ[:why])[1] * size(θ[:why])[2]))


	dθ = gradient((θ) -> get_loss(x, θ, y), θ);
	plt = scatter(old_weights, label = "old_weights");
	θ[:wxh], θ[:why] = θ[:wxh] .- lr .* dθ[1][:wxh], θ[:why] .- lr .* dθ[1][:why]
	new_weights = append!(reshape(θ[:wxh],
	size(θ[:wxh])[1]*size(θ[:wxh])[2]),
	reshape(θ[:why], size(θ[:why])[1] * size(θ[:why])[2]))
	scatter!(new_weights, label="new weights")
	display(plt)
	get_xor = function(num_samples=512, dim_x=3)
	x = 1*rand(num_samples,dim_x) .> 0.5
	y = zeros(num_samples,1)
	for ii = 1:size(y)[1]
	y[ii] = reduce(xor, x[ii,:])
	end
	x = x + randn(num_samples,dim_x) / 10
	return x, y
	end

	init_weights = function(dim_in=2, dim_out=1, dim_hid=4)

	wxh = randn(dim_in, dim_hid) / 8
	why = randn(dim_hid, dim_out) / 4
	θ = Dict(:wxh => wxh, :why => why)

	return θ

	end
	σ(x) = 1 ./ (1 .+ exp.(-x))

	f(x, θ) = σ(x * θ[:w] .+ θ[:b])

	θ = Dict(:w => randn(32,2)/10, :b => randn(1,2)/100)

	x = randn(4,32)

	f(x, θ)
	4×2 Array{Float64,2}:
	0.516507 0.482128
	0.568403 0.639701
	0.571232 0.416161
	0.288268 0.546431
	test_x, test_y = get_xor(512,3);
	pred = f(test_x, θ);
	test_accuracy = get_accuracy(test_y, pred);
	test_loss = log_loss(test_y, pred);
	println("Test loss and accuracy are $test_loss and $test_accuracy")

	>>Test loss and accuracy are 0.03354685023541572 and 1.0
	train = function(x, θ, y, max_steps=1000, lr = 1e-2, l2_reg=1e-4)

	disp_every = max_steps // 100
	losses = zeros(max_steps)
	acc = zeros(max_steps)
	for step = 1:max_steps

	pred = f(x, θ)
	loss = log_loss(y, pred)

	losses[step] = loss

	acc[step] = get_accuracy(y, pred)
	dθ = gradient((θ) -> get_loss(x, θ, y, l2_reg), θ)
	θ[:wxh], θ[:why] = θ[:wxh] .- lr
	.* dθ[1][:wxh], θ[:why] .- lr .* dθ[1][:why]

	if mod(step, disp_every) == 0

	val_x, val_y = get_xor(512, size(x)[2]);
	pred = f(val_x, θ)
	loss = log_loss(val_y, pred)
	accuracy = get_accuracy(val_y, pred)
	println("$step loss = $loss, accuracy = $accuracy")
	#save_frame(θ, step);
	end
	end
	return θ, losses, acc
	end
	#Separating OR with a straight line is easy, your eyes will pick out the answer automatically

	1 x x


	0 o x
	0 1


	1 \x x
	\
	\
	0 o \ x
	\
	0 1

	# Separating XOR is not so simple, you'll need a curved line to do it.
	1 x \ o
	____ \____
	\|
	0 o \ x
	\|
	0 1