You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Instructions for installing CUDA 9.1 and cuDNN 7.1.2 on Ubuntu 16.04
A similar process can be followed for installing other version (especially from CUDA 7.5 onwards). Just download the official CUDA and Cudnn files from Nvidia.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In this usecase or pattern, we will be using a multiple iterators for generating training dataset as well as eval_training and eval_testing datasets.
This pattern is slightly complex but it allows us to use training and evaluation datasets simultaneously without resetting each other.
This pattern is useful if you wanted to evaluate your model in the middle of your training epoch. Usually for large image datasets, a lot of learning takes places within an epoch. It would be nice to have access to evaluation loss and accuracy every 100 batches. This lets us use separate iterators for the training and evaluation datasets.
WARNING: The _check_accuracy function is quite useful but it is not documented well enough.
Code Template
def_check_accuracy(sess, correct_prediction, dataset_init_op, merged_summary, file_writer, global_step, is_training, use_dataset, use_dataset_str):
"""Check the accuracy of the model on either train or val (depending on dataset_init_op). Source: https://gist.github.com/omoindrot/dedc857cdc0e680dfb1be99762990c9c/ Args: sess (tf.Session): Current session. correct_prediction (tf.Tensor): Boolean tensor of correct predictions of one batch. It is True for the indexes within a batch where the prediction was correct and False for the indexes where the prediction was wrong. dataset_init_op (tf.Dataset.Iterator.Initializer): The training or validation iterator initializer. """# Initialize the correct datasetsess.run([dataset_init_op], feed_dict={is_training: False, use_dataset: use_dataset_str})
num_correct, num_samples=0, 0whileTrue:
try:
correct_pred, summary, step=sess.run(
[correct_prediction, merged_summary, global_step], feed_dict={is_training: False, use_dataset: use_dataset_str})
num_correct+=correct_pred.sum()
num_samples+=correct_pred.shape[0]
excepttf.errors.OutOfRangeError:
breakfile_writer.add_summary(summary, step)
# Return the fraction of datapoints that were correctly classifiedacc=float(num_correct) /num_samplesreturnaccdef_preprocess_function(image, label, config):
"""Preprocess the dataset item where the inputs are an image and a label."""ifconfig['standardize']:
image=tf.image.per_image_standardization(image)
returnimage, labeldef_parsing_function(filename, label, config):
"""A function that parses one item of a dataset."""file_string=tf.read_file(filename)
image=tf.image.decode_jpeg(contents=file_string)
image=tf.reshape(image, shape=config['image_shape'])
image=tf.cast(image, tf.float32)
returnimage, label######################################################################### Placeholders which help us decide which dataset_iterator to use########################################################################is_training=tf.placeholder(tf.bool)
use_dataset=tf.placeholder(tf.string)
use_dataset_train=tf.constant("train", dtype=tf.string) # For comparisonuse_dataset_eval_train=tf.constant("eval_train", dtype=tf.string) # For comparisonuse_dataset_eval_test=tf.constant("eval_test", dtype=tf.string) # For comparison######################################################################### Dataset Functions, partial fill########################################################################config=read_config(config_filename)
fromfunctoolsimportpartialparsing_function=partial(_parsing_function, config=config)
preprocess_function=partial(_preprocess_function, config=config)
######################################################################### Train tf.Dataset########################################################################train_dataset=tf.data.Dataset.from_tensor_slices((train_filenames, train_labels))
train_dataset=train_dataset.shuffle(10000)
train_dataset=train_dataset.map(parsing_function,
num_parallel_calls=dataset_config['num_parallel_calls'])
train_dataset=train_dataset.map(preprocess_function,
num_parallel_calls=dataset_config['num_parallel_calls'])
train_dataset=train_dataset.prefetch(buffer_size=2*dataset_config['batch_size'])
batched_train_dataset=train_dataset.batch(batch_size=dataset_config['batch_size'])
logger.info("batched_train_dataset.shape: {}".format(batched_train_dataset.output_shapes))
######################################################################### Eval Train tf.Dataset########################################################################eval_train_dataset=tf.data.Dataset.from_tensor_slices((train_filenames, train_labels))
eval_train_dataset=eval_train_dataset.map(parsing_function,
num_parallel_calls=dataset_config['num_parallel_calls'])
eval_train_dataset=eval_train_dataset.map(preprocess_function,
num_parallel_calls=dataset_config['num_parallel_calls'])
eval_train_dataset=eval_train_dataset.prefetch(buffer_size=2*dataset_config['eval_batch_size'])
eval_train_dataset=eval_train_dataset.shuffle(buffer_size=2*dataset_config['eval_batch_size'])
batched_eval_train_dataset=eval_train_dataset.batch(batch_size=dataset_config['eval_batch_size'])
logger.info("batched_eval_train_dataset.shape: {}".format(batched_eval_train_dataset.output_shapes))
######################################################################### Eval Test tf.Dataset########################################################################eval_test_dataset=tf.data.Dataset.from_tensor_slices((test_filenames, test_labels))
eval_test_dataset=eval_test_dataset.map(parsing_function,
num_parallel_calls=dataset_config['num_parallel_calls'])
eval_test_dataset=eval_test_dataset.map(preprocess_function,
num_parallel_calls=dataset_config['num_parallel_calls'])
eval_test_dataset=eval_test_dataset.prefetch(buffer_size=2*dataset_config['eval_batch_size'])
eval_test_dataset=eval_test_dataset.shuffle(buffer_size=2*dataset_config['eval_batch_size'])
batched_eval_test_dataset=eval_test_dataset.batch(batch_size=dataset_config['eval_batch_size'])
logger.info("batched_eval_test_dataset.shape: {}".format(batched_eval_test_dataset.output_shapes))
######################################################################### Dataset Iterators########################################################################iterator_train=tf.data.Iterator.from_structure(output_types=batched_train_dataset.output_types,
output_shapes=batched_train_dataset.output_shapes)
train_iterator_init_op=iterator_train.make_initializer(batched_train_dataset,
name='train_iterator')
iterator_eval_train=tf.data.Iterator.from_structure(output_types=batched_train_dataset.output_types,
output_shapes=batched_train_dataset.output_shapes)
eval_train_iterator_init_op=iterator_eval_train.make_initializer(batched_eval_train_dataset,
name='eval_train_iterator')
iterator_eval_test=tf.data.Iterator.from_structure(output_types=batched_train_dataset.output_types,
output_shapes=batched_train_dataset.output_shapes)
eval_test_iterator_init_op=iterator_eval_test.make_initializer(batched_eval_test_dataset,
name='eval_test_iterator')
deff_train(): returniterator_train.get_next()
deff_eval_train(): returniterator_eval_train.get_next()
deff_eval_test(): returniterator_eval_test.get_next()
######################################################################### Get batches from the iterators######################################################################### The depending on which initializer is `sess.run`, that data is generated_batch_x, _batch_y=tf.case(pred_fn_pairs={
tf.equal(use_dataset, use_dataset_train): f_train,
tf.equal(use_dataset, use_dataset_eval_train): f_eval_train,
tf.equal(use_dataset, use_dataset_eval_test): f_eval_test
}, exclusive=True)
logger.info("batch_x.shape: {}, batch_y.shape: {}".format(_batch_x.shape, _batch_y.shape))
## ... use batch_x and batch_y# ... your architecture# ... your loss and optimization operations############################################################################## Start session############################################################################withtf.Session(graph=graph) assess:
######################################################################### Learning########################################################################n_epochs=10start_epoch=last_epoch+1forepochinrange(n_epochs):
##################################################################### Training: Run until the training iterator has ended####################################################################start_time=time.time()
# Initialize training iteratorsess.run(train_iterator_init_op)
sess.run(eval_train_iterator_init_op)
sess.run(eval_test_iterator_init_op)
##################################################################### Training: Run until the training iterator has ended####################################################################step=0whileTrue:
##################################################################### Summary Section, occurs only once in a while (is_training=False)####################################################################ifstep%admin_config['train_summary_step'] ==0:
# Eval Trainbatch_loss, batch_accuracy, merged_summary, global_step=sess.run(
[_loss, _accuracy, _merged_summary, _global_step],
feed_dict={is_training: False, use_dataset: 'eval_train'})
logger.info("Eval-Train: Epoch {}/{} ::: step {:5} ::: global_step {:5} ::: batch_loss {:10.4f} ::: batch_accuracy {:10.2f}"
.format(epoch, n_epochs, step, global_step, float(batch_loss), float(batch_accuracy)))
# Eval Testbatch_loss, batch_accuracy, merged_summary, global_step=sess.run(
[_loss, _accuracy, _merged_summary, _global_step],
feed_dict={is_training: False, use_dataset: 'eval_test'})
logger.info("Eval-Test : Epoch {}/{} ::: step {:5} ::: global_step {:5} ::: batch_loss {:10.4f} ::: batch_accuracy {:10.2f}"
.format(epoch, n_epochs, step, global_step, float(batch_loss), float(batch_accuracy)))
# Prefetch for the next iterationsess.run(eval_train_iterator_init_op)
sess.run(eval_test_iterator_init_op)
##################################################################### Training Section, occurs every step, even on summary steps (is_training= True)####################################################################try:
sess.run(_train, feed_dict={is_training: True, use_dataset: 'train'})
excepttf.errors.OutOfRangeError:
end_time=time.time()
logger.info("Training epoch ended: {:<5d}, Time taken: {:0.2f}"
.format(epoch, end_time-start_time))
breakstep=step+1##################################################################### Validation: Check overall training and test accuracy after every epoch####################################################################train_acc=_check_accuracy(sess, _y_correct, train_iterator_init_op,
_merged_summary, train_writer, _global_step, is_training, use_dataset, 'eval_train')
test_acc=_check_accuracy(sess, _y_correct, eval_test_iterator_init_op,
_merged_summary, test_writer, _global_step, is_training, use_dataset, 'eval_test')
logger.error("Epoch {}/{} ::: END OF EPOCH ::: train_acc {:0.4f} ::: test_acc {:0.4f}"
.format(epoch, n_epochs, train_acc, test_acc))
In this usecase or pattern, we will be using a single iterator for generating both training and testing dataset.
This pattern is simple to use and easy to understand.
One drawback of this pattern is that you cannot use the training and testing datasets simultaneously.
This pattern would be useful only if you intend to train your model using the entire training dataset once (i.e. one entire epoch) and then run the evaluation pipeline on the entire test dataset once and then repeat this process several times or for several epochs.
Code Template
def_check_accuracy(sess, correct_prediction, dataset_init_op):
"""Check the accuracy of the model on either train or val (depending on dataset_init_op). Source: https://gist.github.com/omoindrot/dedc857cdc0e680dfb1be99762990c9c/ Args: sess (tf.Session): Current session. correct_prediction (tf.Tensor): Boolean tensor of correct predictions of one batch. It is True for the indexes within a batch where the prediction was correct and False for the indexes where the prediction was wrong. dataset_init_op (tf.Dataset.Iterator.Initializer): The training or validation iterator initializer. """# Initialize the correct datasetsess.run(dataset_init_op)
num_correct, num_samples=0, 0whileTrue:
try:
correct_pred=sess.run(correct_prediction)
num_correct+=correct_pred.sum()
num_samples+=correct_pred.shape[0]
excepttf.errors.OutOfRangeError:
break# Return the fraction of datapoints that were correctly classifiedacc=float(num_correct) /num_samplesreturnaccdef_preprocess_function(image, label, config):
"""Preprocess the dataset item where the inputs are an image and a label."""ifconfig['standardize']:
image=tf.image.per_image_standardization(image)
returnimage, labeldef_parsing_function(filename, label, config):
"""A function that parses one item of a dataset."""file_string=tf.read_file(filename)
image=tf.image.decode_jpeg(contents=file_string)
image=tf.reshape(image, shape=config['image_shape'])
image=tf.cast(image, tf.float32)
returnimage, label######################################################################### Dataset Functions, partial fill########################################################################config=read_config(config_filename)
fromfunctoolsimportpartialparsing_function=partial(_parsing_function, config=config)
preprocess_function=partial(_preprocess_function, config=config)
######################################################################### Train tf.Dataset########################################################################train_dataset=tf.data.Dataset.from_tensor_slices((train_filenames, train_labels))
train_dataset=train_dataset.map(parsing_function, num_parallel_calls=config['num_parallel_calls'])
train_dataset=train_dataset.map(preprocess_function, num_parallel_calls=config['num_parallel_calls'])
train_dataset=train_dataset.prefetch(buffer_size=2*config['batch_size'])
batched_train_dataset=train_dataset.batch(batch_size=config['batch_size'])
######################################################################### Test tf.Dataset########################################################################test_dataset=tf.data.Dataset.from_tensor_slices((test_filenames, test_labels))
test_dataset=test_dataset.map(parsing_function, num_parallel_calls=config['num_parallel_calls'])
test_dataset=test_dataset.map(preprocess_function, num_parallel_calls=config['num_parallel_calls'])
test_dataset=test_dataset.prefetch(buffer_size=2*config['batch_size'])
batched_test_dataset=test_dataset.batch(batch_size=config['batch_size'])
######################################################################### Dataset Iterators########################################################################iterator=tf.data.Iterator.from_structure(output_types=batched_train_dataset.output_types,
output_shapes=batched_train_dataset.output_shapes)
train_iterator_init_op=iterator.make_initializer(batched_train_dataset, name='train_iterator')
test_iterator_init_op=iterator.make_initializer(batched_test_dataset, name='test_iterator')
######################################################################### Get batches from the iterators######################################################################### The depending on which initializer is `sess.run`, that data is generatedbatch_x, batch_y=iterator.get_next()
logger.info("batch_x.shape: {}, batch_y.shape: {}".format(batch_x.shape, batch_y.shape))
## ... use batch_x and batch_y# ... your architecture# ... your loss and optimization operations############################################################################## Start session############################################################################withtf.Session(graph=graph) assess:
######################################################################### Learning########################################################################n_epochs=10forepochinrange(n_epochs):
##################################################################### Training: Run until the training iterator has ended##################################################################### Initialize training iteratorsess.run(train_iterator_init_op)
step=0whileTrue:
try:
sess.run(training_step)
excepttf.errors.OutOfRangeError:
logger.info("Training epoch ended: {}".format(epoch+1))
breakstep=step+1##################################################################### Validation: Check overall training and test accuracy after every epoch####################################################################train_acc=_check_accuracy(sess, _y_correct, train_iterator_init_op)
test_acc=_check_accuracy(sess, _y_correct, test_iterator_init_op)
logger.info("Epoch {}/{} ::: END OF EPOCH ::: train_acc {:0.2f} ::: test_acc {:0.2f}"
.format(epoch+1, n_epochs, train_acc, test_acc))
Not training or optimizing shared variables: In this pattern we comment out the shared variables from the optimization scope. This will lead to shared variables not being trained. Run the code snippet below and you will notice that the values of the shared variables do not change between two training steps. Keep in mind that the output of the shared layer will change if you change the input even if the shared variables do not get updated. This is because the output of the layers are just activations and they depend on the input.
Training or optimizing shared variables: In this pattern we optimize the shared variables as well and we notice that the variables values changes between training steps.
Copying variables from model one to model two: In this pattern we optimize variables of model one and then later transfer them to model two.
Optimizing original variables that have been updated using tf.assign: In this pattern we try to optimize variables that have their values assigned from another set of variables. When using tf.assign it returns updated references to the original variables. So I tried to optimize these updated variables. Unfortunately these optimizer would not accept these updated references. So instead I had to optimize the original variable references itself with the HOPE that it would optimize the updated values. This script shows that my HOPES have been realized and using the original variable references is fine so long as you use control_dependencies and make sure that the optimization step happens only after the update or the tf.assign step.
Script: Not training or optimizing shared variables
In these patterns we explore how the graph behaves when we ask to run only some portions of the graph and how dependency nodes automatically run when required and what the behavior is on corner cases.
How variables are updated on update operations: This pattern shows how variables and tensors are updated and take on values only when updated operations are explicity called unless there's some natural dependency in the graph or tf.control_dependencies() is called.
Is feed_dict required for placeholder independent parts of the graph? - Apparently not. This pattern shows that we can execute parts of the graph which don't directly rely on placeholders without using feed_dict. It also shows that you can update a variable via an assign operation on one sess.run() using a feed_dict and in another sess.run() you can use that updated variable to do other things without providing a feed_dict.
Computing gradients in one sess.run and then applying gradients in another sess.run: This pattern shows how to compute gradients in one sess.run and then in the same sess.run save the gradients to a backup variables. In the next sess.run, we use the saved gradients in the backup variables to apply the gradients to optimize the weights. This has the wonderful property of sharing gradients across sess.runs and not having to use feed_dict at the end.
Script: How variables are updated when update operations are called explicity and implicitly
defdo_stuff():
frompprintimportpprintimporttensorflowastfimportnumpyasnp# Allocate only 20% of GPU memory (remember to add this to the session configProto)gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
x=tf.placeholder(tf.float32, shape=[None, 1])
y=tf.placeholder(tf.float32, shape=[None, 1])
W=tf.get_variable('W', shape=[1, 1], dtype=tf.float32, initializer=None)
b=tf.get_variable('b', shape=[1], dtype=tf.float32, initializer=None)
pred=tf.add(tf.matmul(x, W), b)
loss=tf.reduce_mean(tf.losses.mean_squared_error(y, pred))
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05)
opt_vars=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'W')
opt_vars+=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'b')
optimize=optimizer.minimize(loss=loss, var_list=opt_vars, name='optimize')
copy_W=tf.get_variable('copy_W', shape=[1, 1], dtype=tf.float32, initializer=None)
copy_b=tf.get_variable('copy_b', shape=[1], dtype=tf.float32, initializer=None)
withtf.control_dependencies([optimize]):
assign_op_W=copy_W.assign(W)
assign_op_b=copy_b.assign(b)
assign_op=tf.group([assign_op_W, assign_op_b], name='assign_op')
copy_pred=tf.add(tf.matmul(x, copy_W), copy_b)
copy_loss=tf.reduce_mean(tf.losses.mean_squared_error(y, copy_pred))
pprint("global_vars-----------------------------------------------")
pprint(tf.global_variables())
withtf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) assess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
foriinrange(5):
print("#####################################################")
print("---------------------------------------------------- no-op")
# No change in any losses, tf.no_op()sess.run([tf.no_op()], feed_dict={x: [[1], [2]], y: [[1], [2]]})
pprint(sess.run([loss, copy_loss],
feed_dict={x: [[1], [2]], y: [[1], [2]]}))
# Change only in loss and not in copy_loss, optimizeprint("---------------------------------------------------- optimize")
sess.run([optimize], feed_dict={x: [[1], [2]], y: [[1], [2]]})
pprint(sess.run([loss, copy_loss],
feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("---------------------------------------------------- no-op --> get current values")
# No change in any losses, tf.no_op()sess.run([tf.no_op()], feed_dict={x: [[1], [2]], y: [[1], [2]]})
pprint(sess.run([loss, copy_loss],
feed_dict={x: [[1], [2]], y: [[1], [2]]}))
# Change in both loss and copy_loss, assign_opprint("---------------------------------------------------- optimize, assign-op")
sess.run([assign_op], feed_dict={x: [[1], [2]], y: [[1], [2]]})
pprint(sess.run([loss, copy_loss],
feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("---------------------------------------------------- no-op --> get current values")
# No change in any losses, tf.no_op()sess.run([tf.no_op()], feed_dict={x: [[1], [2]], y: [[1], [2]]})
pprint(sess.run([loss, copy_loss],
feed_dict={x: [[1], [2]], y: [[1], [2]]}))
Script: Is feed_dict required for placeholder independent parts of the graph? - Apparently not
defdo_stuff():
frompprintimportpprintimporttensorflowastfimportnumpyasnp# Allocate only 20% of GPU memory (remember to add this to the session configProto)gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
is_training=tf.placeholder(tf.bool, shape=[1])
x=tf.placeholder(tf.float32, shape=[None, 1])
y=tf.placeholder(tf.float32, shape=[None, 1])
W=tf.get_variable('W', shape=[1, 1], dtype=tf.float32, initializer=None)
b=tf.get_variable('b', shape=[1], dtype=tf.float32, initializer=None)
dependent=tf.add(tf.matmul(x, W), b)
x_independent=tf.get_variable('independent', shape=[2, 1], dtype=tf.float32, initializer=None)
x_independent_updated=tf.assign(x_independent, dependent)
independent=tf.add(tf.matmul(x_independent, W), b)
pprint("global_vars-----------------------------------------------")
pprint(tf.global_variables())
withtf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) assess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
foriinrange(1):
print("#####################################################")
print("---------------------------------------------------- no-op with feed_dict")
pprint(sess.run([tf.no_op()], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("---------------------------------------------------- dependent with feed_dict")
pprint(sess.run([dependent], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("---------------------------------------------------- x_independent, independent with no feed_dict")
pprint(sess.run([x_independent, independent]))
print("---------------------------------------------------- x_independent_updated with feed_dict")
pprint(sess.run([x_independent_updated], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("---------------------------------------------------- x_independent, independent with no feed_dict")
pprint(sess.run([x_independent, independent]))
Script: Computing gradients in one sess.run and then applying gradients in another sess.run
defdo_stuff():
frompprintimportpprintimporttensorflowastfimportnumpyasnp# Allocate only 20% of GPU memory (remember to add this to the session configProto)gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
# is_training = tf.placeholder(tf.bool, shape=[1])x=tf.placeholder(tf.float32, shape=[None, 1])
y=tf.placeholder(tf.float32, shape=[None, 1])
W=tf.get_variable('W', shape=[1, 1], dtype=tf.float32, initializer=None)
b=tf.get_variable('b', shape=[1], dtype=tf.float32, initializer=None)
pred=tf.add(tf.matmul(x, W), b)
loss=tf.reduce_mean(tf.losses.mean_squared_error(y, pred))
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05)
opt_vars=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'W')
opt_vars+=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'b')
# After model variables have been created# Create gradient variableswithtf.control_dependencies(opt_vars):
grad_vars= []
print("#######################################################################")
fori, varinenumerate(opt_vars):
print('----------------------------------------------------------------------- creating gradient variable from variable')
print("var.name: {} ::: var.shape: {}".format(var.name, var.shape))
grad_var=tf.get_variable(name='grad_var_{}'.format(i),
shape=var.shape,
trainable=False,
dtype=var.dtype,
initializer=None)
grad_vars.append(grad_var)
# After gradient variables have been created.# Compute gradientswithtf.control_dependencies(grad_vars):
grads_and_vars=optimizer.compute_gradients(loss=loss, var_list=[opt_vars])
grads= [g_and_v[0] forg_and_vingrads_and_vars]
# If you have gradients & variables after compute_gradients# Backupwithtf.control_dependencies([g_or_vforg_and_vingrads_and_varsforg_or_ving_and_v]):
grad_vars_backup= []
print("#######################################################################")
fori, ((grad, var), grad_var) inenumerate(zip(grads_and_vars, grad_vars)):
print('----------------------------------------------------------------------- assigning grad_var the value of grad')
print("var.name: {:50s} ::: var.shape: {}".format(var.name, var.shape))
print("grad.name: {:50s} ::: grad.shape: {}".format(grad.name, grad.shape))
print("grad_var.name: {:50s} ::: grad_var.shape: {}".format(grad_var.name, grad_var.shape))
grad_var_backup=tf.assign(grad_var, grad)
grad_vars_backup.append(grad_var_backup)
# After you have taken a backup of gradients into gradient variables# Restorewithtf.control_dependencies(grad_vars_backup):
restore_grads_and_vars= []
fori, (grad_var, var) inenumerate(zip(grad_vars, opt_vars)):
restore_grads_and_vars.append([grad_var, var])
# After you have restored gradients from gradient variables# Optimizewithtf.control_dependencies([g_or_vforg_and_vinrestore_grads_and_varsforg_or_ving_and_v]):
optimize=optimizer.apply_gradients(grads_and_vars=restore_grads_and_vars)
print("#######################################################################")
pprint("global_vars-----------------------------------------------")
pprint(tf.global_variables())
withtf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) assess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer()) # ????????foriinrange(1):
print("#######################################################################")
print("----------------------------------------------------------------------- loss")
pprint(sess.run([loss], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("----------------------------------------------------------------------- loss, NO CHANGE")
pprint(sess.run([loss], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("----------------------------------------------------------------------- grads")
pprint(sess.run([grads], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("----------------------------------------------------------------------- grad_vars, NO FEED_DICT")
pprint(sess.run([grad_vars], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("----------------------------------------------------------------------- grad_vars_backup")
pprint(sess.run([grad_vars_backup], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("----------------------------------------------------------------------- grad_vars, NO FEED_DICT, grad_vars == grad_vars_backup")
pprint(sess.run([grad_vars]))
print("----------------------------------------------------------------------- restore_grads_and_vars, NO FEED_DICT")
pprint(sess.run([restore_grads_and_vars]))
print("----------------------------------------------------------------------- optimize, NO FEED_DICT")
pprint(sess.run([optimize]))
print("----------------------------------------------------------------------- loss, UPDATED")
pprint(sess.run([loss], feed_dict={x: [[1], [2]], y: [[1], [2]]}))
print("----------------------------------------------------------------------- loss, NO CHANGE")
pprint(sess.run([loss], feed_dict={x: [[1], [2]], y: [[1], [2]]}))