-
-
Save pjere/58fbf0bf5d93ebce3b3faf04067ae5a6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# | |
# This file is a rewriting of the tensorflow tutorial in https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767 | |
# The objective is to train a RNN with LSTM for an objective function that is "entry function shifted three times". | |
# LSTM allows for longer time dependencies. | |
#MODEL PARAMETERS | |
num_epochs <- 100 | |
total_series_length <- 75000 | |
truncated_backprop_length <- 15 | |
state_size <- 4 | |
num_classes <- 2 | |
echo_step <- 3 | |
batch_size <- 5 | |
num_batches <- 1000 | |
#CREATING DATA | |
createData <- function() | |
{ | |
x <- rbinom(total_series_length,1,0.5) | |
y <- Lag(x, echo_step) | |
y[1:echo_step] <- 0 | |
x<- matrix(x, nrow = 5, byrow = TRUE) | |
y<- matrix(y, nrow = 5, byrow = TRUE) | |
return(list(xlab=x,ylab=y)) | |
} | |
# BUILDING VARIABLES AND PLACEHOLDERS | |
batchY_placeholder <- tf$placeholder(tf$int32,c(batch_size,truncated_backprop_length)) | |
batchX_placeholder <- tf$placeholder(tf$float32,c(batch_size,truncated_backprop_length)) | |
init_state <- tf$placeholder(tf$float32, c(batch_size, state_size)) | |
# BUILDING WEIGHTS AND BIASES | |
W <- tf$Variable(matrix(runif((state_size+1)*state_size,0,1),state_size+1,state_size), dtype=tf$float32) | |
b <- tf$Variable(rep(0,state_size), dtype=tf$float32) | |
W2 <- tf$Variable(matrix(runif(num_classes*state_size,0,1),state_size,num_classes), dtype=tf$float32) | |
b2 <- tf$Variable(rep(0,num_classes), dtype=tf$float32) | |
#SLICING INPUTS | |
#SLICES INTO A LIST OF BATCH_SIZE TENSORS. | |
inputs_series <- tf$unstack(batchX_placeholder, num=truncated_backprop_length, axis=1) | |
labels_series <- tf$unstack(batchY_placeholder, num=truncated_backprop_length, axis=1) | |
current_state <- init_state | |
states_series <- list() | |
for(current_input in inputs_series) | |
{ | |
current_input <- tf$reshape(current_input, tf$cast(c(batch_size, 1),dtype=tf$int32)) | |
input_and_state_concatenated <- tf$concat( c(current_input, current_state), axis=tf$cast(1,dtype=tf$int32)) | |
next_state <- tf$tanh(tf$matmul(input_and_state_concatenated, W) + b) | |
states_series <-c(states_series, next_state) | |
current_state <- next_state | |
} | |
#ADDING THE TF OPTIMIZATION ENGINE | |
#---- AUXILIARY FUNCTIONS | |
logits<- function(state) | |
{ | |
return(tf$matmul(state, W2) + b2) | |
} | |
prediction <- function(logits_val) | |
{ | |
return(tf$nn$softmax(logits_val)) | |
} | |
logits_series <- lapply(states_series,logits) | |
predictions_series <- lapply(logits_series, prediction) | |
losses_series <- logits_series | |
for(i in 1:length(losses_series)) | |
{ | |
losses_series[[i]] <- tf$nn$sparse_softmax_cross_entropy_with_logits(labels = labels_series[[i]], logits = logits_series[[i]]) | |
} | |
total_loss <- tf$reduce_mean(losses_series) | |
train_step <- tf$train$AdagradOptimizer(0.3)$minimize(total_loss) | |
#RUNNING SESSION | |
with(tf$Session() %as% sess, { | |
sess$run(tf$global_variables_initializer()) | |
loss_list <- list() | |
for(epoch_idx in (0:(num_epochs-1))) | |
{ | |
#cat("Running epoch #",epoch_idx,"\n") | |
Data <- createData() | |
x <- Data$xlab | |
y <- Data$ylab | |
current_state_init <- matrix(rep(0,batch_size* state_size),nrow = batch_size , ncol= state_size) | |
for(batch_idx in (0:(num_batches-1))) | |
{ | |
#cat("Running batch #",batch_idx,"\n") | |
start_idx <- batch_idx * truncated_backprop_length+1 | |
end_idx <- start_idx+truncated_backprop_length-1 | |
#cat("start_idx :",start_idx,"\n") | |
#cat("end_idx :",end_idx,"\n") | |
batchX <- x[,start_idx:(end_idx)] | |
batchY <- y[,start_idx:(end_idx)] | |
Results_list <- sess$run(fetches = list(total_loss, train_step, current_state, predictions_series), feed_dict=dict(batchX_placeholder=batchX, batchY_placeholder=batchY, init_state=current_state_init)) | |
loss_list<-c(loss_list, Results_list[[1]]) | |
if(batch_idx%%100 == 0) | |
{ | |
cat("Step :",batch_idx, "- Loss :", Results_list[[1]],"\n") | |
} | |
} | |
} | |
}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For some reason the above code, which is a rewriting of the E. Hallstrom Tensorflow tutorial using the R Tensorflow library does not reach losses below 0.125, contrary to what is shown on the Medium page using Tensorboard. My guess is that it is related to some mistake I made switching to R indexing (starts at 1) from Python indexing (starts at 0) but I can't seem to find it ...