Created
March 3, 2021 04:39
-
-
Save jainxy/c079f1fcda5893f5cbb1c5c59ca0111b to your computer and use it in GitHub Desktop.
Keras and related code samples
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Training | |
Validation on a holdout set generated from the original training data | |
Evaluation on the test data | |
- correct and test batch generation | |
- Normalize input by 255? | |
- add batchnorm layers? use model(x, training=False) then | |
- tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size) ? dataset = dataset.cache()? | |
- get_compiled_model() | |
- test last batch having non-dividing batch-size aka residual batch issue | |
- model.evaluate(test_dataset) -> setup command | |
- https://keras.io/api/models/model_training_apis/#evaluate-method | |
- Try Y-channel only | |
- tf.data.dataset.prefetch(buffer_size) | |
- tf.one_hot(y,num_classes) to get tensor form | |
- NN model for tabular data | |
- Checkpoint | |
""" | |
# ======================================================DATA | |
# Preprocess the data (these are NumPy arrays) | |
x_train = x_train.reshape(60000, 784).astype("float32") / 255 | |
x_test = x_test.reshape(10000, 784).astype("float32") / 255 | |
y_train = y_train.astype("float32") | |
y_test = y_test.astype("float32") | |
# Reserve 10,000 samples for validation | |
x_val = x_train[-10000:] | |
y_val = y_train[-10000:] | |
x_train = x_train[:-10000] | |
y_train = y_train[:-10000] | |
# ======================================================DATASET | |
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes = (), ) | |
# ======================================================COMPILE | |
model.compile( | |
optimizer=keras.optimizers.RMSprop(learning_rate=1e-3), | |
loss=keras.losses.SparseCategoricalCrossentropy(), | |
metrics=[keras.metrics.SparseCategoricalAccuracy()],) | |
model.compile( | |
optimizer="rmsprop", | |
loss="sparse_categorical_crossentropy", | |
metrics=["sparse_categorical_accuracy"],) | |
## OPTIMIZERS: SGD() (w/ or w/o momentum) - RMSprop() - Adam() | |
## LOSS: SparseCategoricalCrossentropy() - CategoricalCrossentropy() | |
## METRICS: AUC() - Precision() - Recall() | |
# ======================================================LR Decay | |
initial_learning_rate = 0.1 | |
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True) | |
optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule) | |
## Static LR Decays: ExponentialDecay, PiecewiseConstantDecay, PolynomialDecay, and InverseTimeDecay | |
## Dynamic LR Decays: | |
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, | |
patience=5, min_lr=0.0) | |
model.fit(X_train, Y_train, callbacks=[reduce_lr]) | |
# ======================================================CALLBACKS | |
# -- General -- | |
# global callback syntax | |
on_(train|test|predict)_(begin|end)(self, logs=None) | |
# batch-level | |
on_(train|test|predict)_batch_(begin|end)(self, batch, logs=None) # For batch_end, logs is a dict containing metrics results | |
# epoch-level | |
on_train_(begin|end)(self, epoch, logs=None) | |
# -- Early stopping -- | |
tf.keras.callbacks.EarlyStopping(patience=1) | |
# -- Checkpoint -- | |
# Prepare a directory to store all the checkpoints. | |
checkpoint_dir = "./ckpt" | |
if not os.path.exists(checkpoint_dir): | |
os.makedirs(checkpoint_dir) | |
def make_or_restore_model(): | |
# Either restore the latest model, or create a fresh one | |
# if there is no checkpoint available. | |
checkpoints = [checkpoint_dir + "/" + name for name in os.listdir(checkpoint_dir)] | |
if checkpoints: | |
latest_checkpoint = max(checkpoints, key=os.path.getctime) | |
print("Restoring from", latest_checkpoint) | |
return keras.models.load_model(latest_checkpoint) | |
print("Creating a new model") | |
return get_compiled_model() | |
model = make_or_restore_model() | |
callbacks = [ | |
# This callback saves a SavedModel every epoch | |
# We include the current epoch in the folder name. | |
keras.callbacks.ModelCheckpoint( | |
filepath=checkpoint_dir + "/model_3dcnn_<HP-values>-{epoch}", save_freq="epoch"/100)] | |
callbacks = [ | |
keras.callbacks.ModelCheckpoint( | |
# Path where to save the model. The two parameters below mean that we will overwrite | |
# the current checkpoint if and only if the `val_loss` score has improved. | |
# The saved model name will include the current epoch. | |
filepath="mymodel_{epoch}", | |
save_best_only=True, # Only save a model if `val_loss` has improved. | |
monitor="val_loss", | |
verbose=1,)] | |
# -- Lambda:printlogs after each batch -- | |
from keras.callbacks import LambdaCallback | |
callbacks = callbacks=[LambdaCallback(on_batch_end=lambda batch,logs:print(logs))] | |
# ======================================================FIT/TRAIN | |
print("Fit model on training data") | |
history = model.fit( | |
x_train, | |
y_train, | |
batch_size=64, | |
epochs=2, | |
# We pass some validation for | |
# monitoring validation loss and metrics | |
# at the end of each epoch | |
validation_data=(x_val, y_val), | |
) | |
history.history | |
# ======================================================EVALUATE/PREDICT | |
# Evaluate the model on the test data using `evaluate` | |
print("Evaluate on test data") | |
results = model.evaluate(x_test, y_test, batch_size=128) | |
print("test loss, test acc:", results) | |
dict(zip(model.metrics_names, result)) | |
# Generate predictions (probabilities -- the output of the last layer) | |
# on new data using `predict` | |
print("Generate predictions for 3 samples") | |
predictions = model.predict(x_test[:3]) | |
print("predictions shape:", predictions.shape) | |
# ====================================================== Function/Class model | |
## Create function for model def and compilation, for repeated calls. CAN PARAMETRIZE to customize things during search | |
## DO IT FOR DATA GENERATR AS WELL | |
def get_uncompiled_model(): | |
inputs = keras.Input(shape=(784,), name="digits") | |
x = layers.Dense(64, activation="relu", name="dense_1")(inputs) | |
x = layers.Dense(64, activation="relu", name="dense_2")(x) | |
outputs = layers.Dense(10, activation="softmax", name="predictions")(x) | |
model = keras.Model(inputs=inputs, outputs=outputs) | |
return model | |
def get_compiled_model(): | |
model = get_uncompiled_model() | |
model.compile( | |
optimizer="rmsprop", | |
loss="sparse_categorical_crossentropy", | |
metrics=["sparse_categorical_accuracy"], | |
) | |
return model | |
# ======================================================BATCHING | |
""" | |
if using 'steps_per_epoch' -> create an infinitely-looping Dataset | |
""" | |
padded_batch(batch_size, padded_shapes=None, padding_values=None, drop_remainder=False) | |
dataset = dataset.batch(5).shuffle(3, reshuffle_each_iteration=True).repeat(4) | |
ds_series = tf.data.Dataset.from_generator( | |
gen_series, | |
output_types=(tf.int32, tf.float32), | |
output_shapes=((), (None,))) | |
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes = (), ) | |
def count(start, end, batch_size): | |
sample_count = end-start | |
n_batches = int(sample_count//batch_size) | |
remainder_samples = sample_count%batch_size | |
if remainder_samples>0: | |
n_batches = n_batches + 1 | |
for idx in range(0, n_batches): | |
if idx == n_batches - 1: | |
pad = random.choices(range(start+idx*batch_size,end), k=(start+batch_size*(idx+1)-end)) | |
batch = list(range(start+idx*batch_size,end)) + pad | |
else: | |
batch = list(range(start + idx*batch_size , start + idx*batch_size+batch_size)) | |
yield batch | |
ds_counter = tf.data.Dataset.from_generator(count, args=[6,100,5], output_types=tf.int32, output_shapes = (5), ) | |
for count_batch in ds_counter.repeat().batch(10, drop_remainder=False).take(10): | |
print(count_batch.numpy()) | |
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes = (), ) | |
ds_counter = ds_counter.padded_batch(5, padded_shapes=None, drop_remainder=True).shuffle(100) | |
for count_batch in ds_counter: | |
print(count_batch.numpy()) | |
dataset2 = dataset.padded_batch(2, | |
padded_shapes=([4], [None]), | |
padding_values=(-1, 100)) | |
## ========================================================EVALUATE/ANALYSE | |
# Generate generalization metrics | |
score = model.evaluate(X_test, targets_test, verbose=0) | |
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}') | |
# Plot history: Categorical crossentropy & Accuracy | |
plt.plot(history.history['loss'], label='Categorical crossentropy (training data)') | |
plt.plot(history.history['val_loss'], label='Categorical crossentropy (validation data)') | |
plt.plot(history.history['accuracy'], label='Accuracy (training data)') | |
plt.plot(history.history['val_accuracy'], label='Accuracy (validation data)') | |
plt.title('Model performance for 3D MNIST Keras Conv3D example') | |
plt.ylabel('Loss value') | |
plt.xlabel('No. epoch') | |
plt.legend(loc="upper left") | |
plt.show() | |
## | |
fig, ax = plt.subplots(1, 2, figsize=(20, 3)) | |
ax = ax.ravel() | |
for i, metric in enumerate(["acc", "loss"]): | |
ax[i].plot(model.history.history[metric]) | |
ax[i].plot(model.history.history["val_" + metric]) | |
ax[i].set_title("Model {}".format(metric)) | |
ax[i].set_xlabel("epochs") | |
ax[i].set_ylabel(metric) | |
ax[i].legend(["train", "val"]) | |
#======================== Tensorboard | |
bucket = sagemaker_session.default_bucket() | |
prefix = 'tensorboard_keras_cifar10' | |
tensorflow_logs_path = "s3://{}/{}/logs".format(bucket, prefix) | |
print('Bucket: {}'.format(bucket)) | |
print('SageMaker ver: ' + sagemaker.__version__) | |
print('Tensorflow ver: ' + tf.__version__) | |
writer = tf.io.TFRecordWriter(filename) | |
aws_region = sagemaker_session.boto_region_name | |
!AWS_REGION={aws_region} tensorboard --logdir {tensorflow_logs_path} | |
#======================== | |
# Print number of batches ; print epoch# at every 10th epoch; save-model check | |
# Print lr; # save stdout => checkpoints and logs ; # | |
""" | |
""" | |
# Let's check: | |
np.testing.assert_allclose(model.predict(test_input), reconstructed_model.predict(test_input)) | |
## DO analysis on source files -> feature generation etc. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment