Skip to content

Instantly share code, notes, and snippets.

@pjere
Forked from 3h4/1-10-vanilla-rnn.py
Last active June 18, 2017 16:22
Show Gist options
  • Save pjere/58fbf0bf5d93ebce3b3faf04067ae5a6 to your computer and use it in GitHub Desktop.
Save pjere/58fbf0bf5d93ebce3b3faf04067ae5a6 to your computer and use it in GitHub Desktop.
#
#
# This file is a rewriting of the tensorflow tutorial in https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767
# The objective is to train a RNN with LSTM for an objective function that is "entry function shifted three times".
# LSTM allows for longer time dependencies.
#MODEL PARAMETERS
num_epochs <- 100
total_series_length <- 75000
truncated_backprop_length <- 15
state_size <- 4
num_classes <- 2
echo_step <- 3
batch_size <- 5
num_batches <- 1000
#CREATING DATA
createData <- function()
{
x <- rbinom(total_series_length,1,0.5)
y <- Lag(x, echo_step)
y[1:echo_step] <- 0
x<- matrix(x, nrow = 5, byrow = TRUE)
y<- matrix(y, nrow = 5, byrow = TRUE)
return(list(xlab=x,ylab=y))
}
# BUILDING VARIABLES AND PLACEHOLDERS
batchY_placeholder <- tf$placeholder(tf$int32,c(batch_size,truncated_backprop_length))
batchX_placeholder <- tf$placeholder(tf$float32,c(batch_size,truncated_backprop_length))
init_state <- tf$placeholder(tf$float32, c(batch_size, state_size))
# BUILDING WEIGHTS AND BIASES
W <- tf$Variable(matrix(runif((state_size+1)*state_size,0,1),state_size+1,state_size), dtype=tf$float32)
b <- tf$Variable(rep(0,state_size), dtype=tf$float32)
W2 <- tf$Variable(matrix(runif(num_classes*state_size,0,1),state_size,num_classes), dtype=tf$float32)
b2 <- tf$Variable(rep(0,num_classes), dtype=tf$float32)
#SLICING INPUTS
#SLICES INTO A LIST OF BATCH_SIZE TENSORS.
inputs_series <- tf$unstack(batchX_placeholder, num=truncated_backprop_length, axis=1)
labels_series <- tf$unstack(batchY_placeholder, num=truncated_backprop_length, axis=1)
current_state <- init_state
states_series <- list()
for(current_input in inputs_series)
{
current_input <- tf$reshape(current_input, tf$cast(c(batch_size, 1),dtype=tf$int32))
input_and_state_concatenated <- tf$concat( c(current_input, current_state), axis=tf$cast(1,dtype=tf$int32))
next_state <- tf$tanh(tf$matmul(input_and_state_concatenated, W) + b)
states_series <-c(states_series, next_state)
current_state <- next_state
}
#ADDING THE TF OPTIMIZATION ENGINE
#---- AUXILIARY FUNCTIONS
logits<- function(state)
{
return(tf$matmul(state, W2) + b2)
}
prediction <- function(logits_val)
{
return(tf$nn$softmax(logits_val))
}
logits_series <- lapply(states_series,logits)
predictions_series <- lapply(logits_series, prediction)
losses_series <- logits_series
for(i in 1:length(losses_series))
{
losses_series[[i]] <- tf$nn$sparse_softmax_cross_entropy_with_logits(labels = labels_series[[i]], logits = logits_series[[i]])
}
total_loss <- tf$reduce_mean(losses_series)
train_step <- tf$train$AdagradOptimizer(0.3)$minimize(total_loss)
#RUNNING SESSION
with(tf$Session() %as% sess, {
sess$run(tf$global_variables_initializer())
loss_list <- list()
for(epoch_idx in (0:(num_epochs-1)))
{
#cat("Running epoch #",epoch_idx,"\n")
Data <- createData()
x <- Data$xlab
y <- Data$ylab
current_state_init <- matrix(rep(0,batch_size* state_size),nrow = batch_size , ncol= state_size)
for(batch_idx in (0:(num_batches-1)))
{
#cat("Running batch #",batch_idx,"\n")
start_idx <- batch_idx * truncated_backprop_length+1
end_idx <- start_idx+truncated_backprop_length-1
#cat("start_idx :",start_idx,"\n")
#cat("end_idx :",end_idx,"\n")
batchX <- x[,start_idx:(end_idx)]
batchY <- y[,start_idx:(end_idx)]
Results_list <- sess$run(fetches = list(total_loss, train_step, current_state, predictions_series), feed_dict=dict(batchX_placeholder=batchX, batchY_placeholder=batchY, init_state=current_state_init))
loss_list<-c(loss_list, Results_list[[1]])
if(batch_idx%%100 == 0)
{
cat("Step :",batch_idx, "- Loss :", Results_list[[1]],"\n")
}
}
}
})
@pjere
Copy link
Author

pjere commented Jun 18, 2017

For some reason the above code, which is a rewriting of the E. Hallstrom Tensorflow tutorial using the R Tensorflow library does not reach losses below 0.125, contrary to what is shown on the Medium page using Tensorboard. My guess is that it is related to some mistake I made switching to R indexing (starts at 1) from Python indexing (starts at 0) but I can't seem to find it ...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment