lbollar · April 3, 2017 18:50
diff --git a/rd_graddescent.jl b/rd_graddescent.jl
 using ReverseDiff: GradientTape, gradient, gradient!, compile

 #=
 Here I am using what I would consider classical stats style of having the 
 observations as rows. Julia is column major order, so it is often common
 to see the observations as columns and variables as rows. With certain types
 of operations, this can have major performance implications.

 I was using Julia v0.5 at the time of writing this.
 =#

 N = 1000
 nvar = 5

 X = randn(1000, 5)

 actual_b = [0.5, 2.1, -1.3, 1.7, -0.6]
 actual_a = [1.23]

 y = X * actual_b .+ actual_a

 loss(a, w) = sum(abs2.(y - (X * w .+ a))) / N

 w = randn(5)
 a = randn(1)

 const f_tape = GradientTape(loss, (randn(1), randn(5)))
 const compiled_f_tape = compile(f_tape)

 results = (similar(a), similar(w))

 function train(a, w, X, y; lr=0.1)
    gradient!(results, compiled_f_tape, (a, w))
    a -= lr * results[1]
    w -= lr * results[2]
    return (a ,w)
 end

 for _ in 1:100
    a, w = train(a, w, X, y)
    println(a)
    println(w)
 end

 println()

 @printf("Actual Beta: %f, %f, %f, %f, %f \n", w[1], w[2], w[3], w[4], w[5])
 @printf("Actual Alpha: %f\n", a[1])
	using ReverseDiff: GradientTape, gradient, gradient!, compile

	#=
	Here I am using what I would consider classical stats style of having the
	observations as rows. Julia is column major order, so it is often common
	to see the observations as columns and variables as rows. With certain types
	of operations, this can have major performance implications.

	I was using Julia v0.5 at the time of writing this.
	=#

	N = 1000
	nvar = 5

	X = randn(1000, 5)

	actual_b = [0.5, 2.1, -1.3, 1.7, -0.6]
	actual_a = [1.23]

	y = X * actual_b .+ actual_a

	loss(a, w) = sum(abs2.(y - (X * w .+ a))) / N

	w = randn(5)
	a = randn(1)

	const f_tape = GradientTape(loss, (randn(1), randn(5)))
	const compiled_f_tape = compile(f_tape)

	results = (similar(a), similar(w))

	function train(a, w, X, y; lr=0.1)
	gradient!(results, compiled_f_tape, (a, w))
	a -= lr * results[1]
	w -= lr * results[2]
	return (a ,w)
	end

	for _ in 1:100
	a, w = train(a, w, X, y)
	println(a)
	println(w)
	end

	println()

	@printf("Actual Beta: %f, %f, %f, %f, %f \n", w[1], w[2], w[3], w[4], w[5])
	@printf("Actual Alpha: %f\n", a[1])