Created
January 7, 2015 11:54
-
-
Save akabe/0194f623b31cc0a242f1 to your computer and use it in GitHub Desktop.
Recursive Neural Networks and Online Backpropagation Through Structure (BPTS)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* recursiveNeuralNetwork.ml --- Recursive Neural Networks and | |
Online Backpropagation Through Structure (BPTS) | |
[MIT License] Copyright (C) 2015 Akinori ABE | |
Compilation: | |
$ ocamlfind ocamlopt -linkpkg -package slap recursiveNeuralNetwork.ml | |
This program requires Sized Linear Algebra Library (SLAP), a linear algebra | |
library for OCaml with static size checking for matrix operations (see | |
http://akabe.github.io/slap/ for details). | |
*) | |
open Format | |
open Slap.Io | |
open Slap.D | |
open Slap.Common | |
module Size = Slap.Size | |
type 'n tree = Node of ('n, Slap.cnt) vec * 'n tree list | |
type 'n params = { | |
wleft : ('n, 'n, Slap.cnt) mat; | |
wright : ('n, 'n, Slap.cnt) mat; | |
bias : ('n, Slap.cnt) vec; | |
} | |
let make_params feature_dim = | |
{ | |
wleft = Mat.random feature_dim feature_dim; | |
wright = Mat.random feature_dim feature_dim; | |
bias = Vec.random feature_dim; | |
} | |
(* activation function *) | |
let sigm a = 1.0 /. (1.0 +. exp (~-. a)) | |
let actv_f ?y x = Vec.map ?y sigm x | |
let actv_f' y = | |
let ones = Vec.make1 (Vec.dim y) in | |
Vec.mul y (Vec.sub ones y) | |
(** [feedforawd params tree] returns an output of a recursive neural network. *) | |
let feedforward {wleft; wright; bias} tree = | |
let rec calc_vec = function | |
| Node (x, []) -> x (* a leaf *) | |
| Node (y, children) -> (* a non-leaf node *) | |
let last_i = List.length children - 1 in | |
let add_child_vec i child = | |
let x = calc_vec child in | |
let cr = if last_i = 0 then 0.5 else (float i) /. (float last_i) in | |
let cl = 1.0 -. cr in | |
ignore (gemv ~trans:normal ~alpha:cl wleft x ~beta:1.0 ~y); | |
ignore (gemv ~trans:normal ~alpha:cr wright x ~beta:1.0 ~y) | |
in | |
ignore (Vec.copy bias ~y); | |
List.iteri add_child_vec children; | |
actv_f ~y y | |
in | |
calc_vec tree | |
(** [feedback grads params tree target] computes gradients by backpropagation | |
through structure (BPTS) and stores them into [grads]. | |
*) | |
let feedback grads {wleft; wright; bias} tree target = | |
Mat.fill grads.wleft 0.0; | |
Mat.fill grads.wright 0.0; | |
Vec.fill grads.bias 0.0; | |
let rec add_grads delta = function | |
| [] -> () (* a leaf *) | |
| children -> (* a non-leaf node *) | |
let last_i = List.length children - 1 in | |
let add_grads_child i (Node (x, c_children)) = | |
let cr = if last_i = 0 then 0.5 else float i /. float last_i in | |
let cl = 1.0 -. cr in | |
(* Compute gradients of weights *) | |
ignore (ger ~alpha:cl delta x grads.wleft); | |
ignore (ger ~alpha:cr delta x grads.wright); | |
(* Compute delte for a child *) | |
let z = gemv ~trans:trans ~alpha:cl wleft delta in | |
ignore (gemv ~trans:trans ~alpha:cr wright delta ~beta:1.0 ~y:z); | |
let c_delta = Vec.mul z (actv_f' x) in | |
add_grads c_delta c_children | |
in | |
List.iteri add_grads_child children; | |
axpy ~alpha:1.0 ~x:delta grads.bias (* Compute gradients of biases *) | |
in | |
let Node (y, children) = tree in | |
let root_delta = Vec.mul (Vec.sub y target) (actv_f' y) in | |
add_grads root_delta children | |
(** [check_gradient grads params tree target] checks whether given gradients | |
[grads] is correct or not by comparison with results of naive numerical | |
differentiation. This routine is only for checking implementation. | |
The numerical differentiation is much slower than back propagation. | |
cf. http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization | |
*) | |
let check_gradient grads params tree target = | |
let epsilon = 1e-4 in | |
let check_digits dE1 dE2 = (* Check 4 significant digits *) | |
let abs_dE1 = abs_float dE1 in | |
if abs_dE1 < 1e-9 | |
then abs_float dE2 < 1e-9 (* true if both `dE1' and `dE2' are nealy zero *) | |
else let diff = (dE1 -. dE2) *. (0.1 ** (floor (log10 abs_dE1) +. 1.0)) in | |
abs_float diff < epsilon (* true if 4 significant digits are the same *) | |
in | |
let calc_error () = Vec.ssqr_diff (feedforward params tree) target /. 2.0 in | |
let check_vec label x dx = | |
let check i dE2 = | |
let elm = Vec.get_dyn x i in | |
Vec.set_dyn x i (elm +. epsilon); | |
let pos_err = calc_error () in | |
Vec.set_dyn x i (elm -. epsilon); | |
let neg_err = calc_error () in | |
Vec.set_dyn x i elm; (* restore *) | |
let dE1 = (pos_err -. neg_err) /. (2.0 *. epsilon) in | |
if not (check_digits dE1 dE2) | |
then eprintf "WARNING: %s[%d] naive diff = %.6g, backprop = %.6g@." | |
label i dE1 dE2 | |
in | |
Vec.iteri check dx | |
in | |
let check_mat label a da = | |
Mat.fold_topi (fun i () ai -> | |
let label' = label ^ "[" ^ (string_of_int i) ^ "]" in | |
check_vec label' ai (Mat.row_dyn da i)) () a | |
in | |
check_vec "dE/db" params.bias grads.bias; | |
check_mat "dE/dWleft" params.wleft grads.wleft; | |
check_mat "dE/dWright" params.wright grads.wright | |
(** Online training for a recursive neural network. *) | |
let train ~eta grads params tree target = | |
ignore (feedforward params tree); (* feedforward *) | |
feedback grads params tree target; (* feedback *) | |
check_gradient grads params tree target; (* gradient checking *) | |
(* Update parameters *) | |
let alpha = ~-. eta in | |
Mat.axpy ~alpha ~x:grads.wleft params.wleft; | |
Mat.axpy ~alpha ~x:grads.wright params.wright; | |
axpy ~alpha ~x:grads.bias params.bias | |
let main () = | |
Random.self_init (); | |
let module N = (val Size.of_int_dyn 5 : Size.SIZE) in | |
let node children = Node (Vec.create N.value, children) in | |
let leaf feature_vec = Node (feature_vec, []) in | |
let tree = node [ | |
node [ | |
leaf (Vec.make N.value 1.0); | |
node [leaf (Vec.make N.value 2.0); | |
leaf (Vec.make N.value 3.0)]]; | |
leaf (Vec.make N.value 4.0); | |
node [leaf (Vec.make N.value 5.0); | |
leaf (Vec.make N.value 6.0); | |
leaf (Vec.make N.value 7.0)]; | |
] in | |
let target = Vec.make N.value 0.6 in | |
let params = make_params N.value in | |
let grads = make_params N.value in | |
let eta = ref 0.02 in | |
for i = 1 to 1000 do | |
train ~eta:!eta grads params tree target; | |
eta := !eta *. 0.99 | |
done; | |
printf "target = [ %a]; prediction = [ %a]@." | |
pp_rfvec target pp_rfvec (feedforward params tree) | |
let () = main () |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment