Last active
March 5, 2022 20:01
-
-
Save XinyueZ/87dba0d4a26650800bc7efaece65b3ec to your computer and use it in GitHub Desktop.
The skeleton of a deep convolutional network
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import tensorflow as tf | |
| from IPython.display import Markdown, display | |
| import numpy as np | |
| mnist = tf.keras.datasets.mnist | |
| (x_train, y_train), (x_test, y_test) = mnist.load_data() | |
| x_train, x_test = x_train / 255.0, x_test / 255.0 | |
| y_train = tf.one_hot(y_train, 10) | |
| y_test = tf.one_hot(y_test, 10) | |
| width = 28 # width of the image in pixels | |
| height = 28 # height of the image in pixels | |
| flat = width * height # number of pixels in one image | |
| class_output = 10 # number of possible classifications for the problem | |
| x_image_train = tf.reshape(x_train, [-1,28,28,1]) | |
| x_image_train = tf.cast(x_image_train, 'float32') | |
| x_image_test = tf.reshape(x_test, [-1,28,28,1]) | |
| x_image_test = tf.cast(x_image_test, 'float32') | |
| #creating new dataset with reshaped inputs | |
| train_ds2 = tf.data.Dataset.from_tensor_slices((x_image_train, y_train)).batch(50) | |
| test_ds2 = tf.data.Dataset.from_tensor_slices((x_image_test, y_test)).batch(50) | |
| # Convolutional 1st Layer | |
| W_conv1 = tf.Variable(tf.random.truncated_normal([5, 5, 1, 32], stddev=0.1, seed=0)) | |
| b_conv1 = tf.Variable(tf.constant(0.1, shape=[32])) # need 32 biases for 32 outputs | |
| def convolve1(x):return(tf.nn.conv2d(x, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1) | |
| def h_conv1(x): return(tf.nn.relu(convolve1(x))) | |
| # Apply the max pooling | |
| def conv1(x):return tf.nn.max_pool(h_conv1(x), ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') | |
| # Convolutional 2nd Layer | |
| W_conv2 = tf.Variable(tf.random.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=1)) | |
| b_conv2 = tf.Variable(tf.constant(0.1, shape=[64])) #need 64 biases for 64 outputs | |
| # Apply the max pooling | |
| def convolve2(x): return(tf.nn.conv2d(conv1(x), W_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2) | |
| def h_conv2(x): return tf.nn.relu(convolve2(x)) | |
| def conv2(x): return(tf.nn.max_pool(h_conv2(x), ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')) | |
| # Flattening 2nd Layer | |
| W_fc1 = tf.Variable(tf.random.truncated_normal([7 * 7 * 64, 1024], stddev=0.1, seed = 2)) | |
| b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024])) # need 1024 biases for 1024 outputs | |
| def layer2_matrix(x): return tf.reshape(conv2(x), [-1, 7 * 7 * 64]) | |
| def fcl(x): return tf.matmul(layer2_matrix(x), W_fc1) + b_fc1 | |
| def h_fc1(x): return tf.nn.relu(fcl(x)) | |
| # Final readout layer, softmax | |
| W_fc2 = tf.Variable(tf.random.truncated_normal([1024, 10], stddev=0.1, seed = 2)) #1024 neurons | |
| b_fc2 = tf.Variable(tf.constant(0.1, shape=[10])) # 10 possibilities for digits [0,1,2,3,4,5,6,7,8,9] | |
| keep_prob=0.5 | |
| def layer_drop(x): return tf.nn.dropout(h_fc1(x), keep_prob) | |
| def fc(x): return tf.matmul(layer_drop(x), W_fc2) + b_fc2 | |
| # Final model | |
| def model(x): return tf.nn.softmax(fc(x)) | |
| # Network | |
| correct_prediction = tf.equal(tf.argmax(model(x_image_train), axis=1), tf.argmax(y_train, axis=1)) | |
| accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float32')) | |
| optimizer = tf.keras.optimizers.Adam(1e-4) | |
| def cross_entropy(y_label, y_pred): return (-tf.reduce_sum(y_label * tf.math.log(y_pred + 1.e-10))) | |
| variables = [W_conv1, b_conv1, | |
| W_conv2, b_conv2, | |
| W_fc1, b_fc1, | |
| W_fc2, b_fc2, ] | |
| def train_step(x, y): | |
| with tf.GradientTape() as tape: | |
| current_loss = cross_entropy(y, model(x)) | |
| grads = tape.gradient(current_loss, variables) | |
| optimizer.apply_gradients(zip(grads, variables)) | |
| return current_loss.numpy() | |
| # Training loop | |
| loss_values = [] | |
| accuracies = [] | |
| epochs = 10 | |
| for i in range(epochs): | |
| j = 0 | |
| # each batch has 50 examples | |
| for x_train_batch, y_train_batch in train_ds2: | |
| j += 1 | |
| current_loss = train_step(x_train_batch, y_train_batch) | |
| if j % 50 == 0: # reporting intermittent batch statistics | |
| # accuracy | |
| correct_prediction = tf.equal(tf.argmax(model(x_train_batch), axis=1), tf.argmax(y_train_batch, axis=1)) | |
| accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy() | |
| print("epoch ", str(i+1), "batch", str(j), "loss:", str(current_loss), "accuracy", str(accuracy)) | |
| current_loss = cross_entropy(y_train, model(x_image_train)).numpy() | |
| loss_values.append(current_loss) | |
| # accuracy | |
| correct_prediction = tf.equal(tf.argmax(model(x_image_train), axis=1), tf.argmax(y_train, axis=1)) | |
| accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy() | |
| accuracies.append(accuracy) | |
| print("end of epoch ", str(i+1), "loss", str(current_loss), "accuracy", str(accuracy)) | |
| # Evalutation | |
| j=0 | |
| acccuracies=[] | |
| # evaluate accuracy by batch and average...reporting every 100th batch | |
| for x_test_batch, y_test_batch in test_ds2: | |
| j+=1 | |
| correct_prediction = tf.equal(tf.argmax(model(x_test_batch), axis=1), tf.argmax(y_test_batch, axis=1)) | |
| accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy() | |
| accuracies.append(accuracy) | |
| if j%100==0: print("batch", str(j), "accuracy", str(accuracy) ) | |
| print("accuracy of entire set", str(np.mean(accuracies))) | |
| ################################################### | |
| ## Check model with a sample digital number | |
| ################################################### | |
| import numpy as np | |
| plt.rcParams['figure.figsize'] = (5.0, 5.0) | |
| sampleimage = np.array([x_image_train[0]]) | |
| plt.imshow(np.reshape(sampleimage,[28,28]), cmap="gray") | |
| # Result of 1st conv layer | |
| ActivatedUnits = convolve1(sampleimage) | |
| filters = ActivatedUnits.shape[3] | |
| plt.figure(1, figsize=(20,20)) | |
| n_columns = 6 | |
| n_rows = np.math.ceil(filters / n_columns) + 1 | |
| for i in range(filters): | |
| plt.subplot(n_rows, n_columns, i+1) | |
| plt.title('Filter ' + str(i)) | |
| plt.imshow(ActivatedUnits[0,:,:,i], interpolation="nearest", cmap="gray") | |
| # Result of 2nd conv layer | |
| ActivatedUnits = convolve2(sampleimage) | |
| filters = ActivatedUnits.shape[3] | |
| plt.figure(1, figsize=(20,20)) | |
| n_columns = 8 | |
| n_rows = np.math.ceil(filters / n_columns) + 1 | |
| for i in range(filters): | |
| plt.subplot(n_rows, n_columns, i+1) | |
| plt.title('Filter ' + str(i)) | |
| plt.imshow(ActivatedUnits[0,:,:,i], interpolation="nearest", cmap="gray") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment