XinyueZ · March 5, 2022 20:01
diff --git a/ml_tf_cnn_mnist.py b/ml_tf_cnn_mnist.py
 import tensorflow as tf
 from IPython.display import Markdown, display
 import numpy as np

 mnist = tf.keras.datasets.mnist
 (x_train, y_train), (x_test, y_test) = mnist.load_data()

 x_train, x_test = x_train / 255.0, x_test / 255.0

 y_train = tf.one_hot(y_train, 10)
 y_test = tf.one_hot(y_test, 10)
 
 width = 28 # width of the image in pixels 
 height = 28 # height of the image in pixels
 flat = width * height # number of pixels in one image 
 class_output = 10 # number of possible classifications for the problem

 x_image_train = tf.reshape(x_train, [-1,28,28,1])  
 x_image_train = tf.cast(x_image_train, 'float32') 

 x_image_test = tf.reshape(x_test, [-1,28,28,1]) 
 x_image_test = tf.cast(x_image_test, 'float32') 

 #creating new dataset with reshaped inputs
 train_ds2 = tf.data.Dataset.from_tensor_slices((x_image_train, y_train)).batch(50)
 test_ds2 = tf.data.Dataset.from_tensor_slices((x_image_test, y_test)).batch(50)

 # Convolutional 1st Layer 
 W_conv1 = tf.Variable(tf.random.truncated_normal([5, 5, 1, 32], stddev=0.1, seed=0))
 b_conv1 = tf.Variable(tf.constant(0.1, shape=[32])) # need 32 biases for 32 outputs
 def convolve1(x):return(tf.nn.conv2d(x, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1)
 def h_conv1(x): return(tf.nn.relu(convolve1(x)))
 # Apply the max pooling
 def conv1(x):return tf.nn.max_pool(h_conv1(x), ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

 # Convolutional 2nd Layer
 W_conv2 = tf.Variable(tf.random.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=1))
 b_conv2 = tf.Variable(tf.constant(0.1, shape=[64])) #need 64 biases for 64 outputs
 # Apply the max pooling
 def convolve2(x): return(tf.nn.conv2d(conv1(x), W_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2)
 def h_conv2(x): return tf.nn.relu(convolve2(x))
 def conv2(x): return(tf.nn.max_pool(h_conv2(x), ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME'))

 # Flattening 2nd Layer
 W_fc1 = tf.Variable(tf.random.truncated_normal([7 * 7 * 64, 1024], stddev=0.1, seed = 2))
 b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024])) # need 1024 biases for 1024 outputs
 def layer2_matrix(x): return tf.reshape(conv2(x), [-1, 7 * 7 * 64])
 def fcl(x): return tf.matmul(layer2_matrix(x), W_fc1) + b_fc1
 def h_fc1(x): return tf.nn.relu(fcl(x))

 # Final readout layer, softmax
 W_fc2 = tf.Variable(tf.random.truncated_normal([1024, 10], stddev=0.1, seed = 2)) #1024 neurons
 b_fc2 = tf.Variable(tf.constant(0.1, shape=[10])) # 10 possibilities for digits [0,1,2,3,4,5,6,7,8,9]
 keep_prob=0.5
 def layer_drop(x): return tf.nn.dropout(h_fc1(x), keep_prob)
 def fc(x): return tf.matmul(layer_drop(x), W_fc2) + b_fc2

 # Final model
 def model(x): return tf.nn.softmax(fc(x))

 # Network
 correct_prediction = tf.equal(tf.argmax(model(x_image_train), axis=1), tf.argmax(y_train, axis=1))
 accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float32'))
 optimizer = tf.keras.optimizers.Adam(1e-4)

 def cross_entropy(y_label, y_pred): return (-tf.reduce_sum(y_label * tf.math.log(y_pred + 1.e-10)))

 variables = [W_conv1, b_conv1,
             W_conv2, b_conv2,
             W_fc1, b_fc1,
             W_fc2, b_fc2, ]


 def train_step(x, y):
    with tf.GradientTape() as tape:
        current_loss = cross_entropy(y, model(x))
        grads = tape.gradient(current_loss, variables)
        optimizer.apply_gradients(zip(grads, variables))
        return current_loss.numpy()
      
 # Training loop
 loss_values = []
 accuracies = []
 epochs = 10

 for i in range(epochs):
    j = 0
    # each batch has 50 examples
    for x_train_batch, y_train_batch in train_ds2:
        j += 1
        current_loss = train_step(x_train_batch, y_train_batch)
        if j % 50 == 0:  # reporting intermittent batch statistics
             #  accuracy
            correct_prediction = tf.equal(tf.argmax(model(x_train_batch), axis=1), tf.argmax(y_train_batch, axis=1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()
            print("epoch ", str(i+1), "batch", str(j), "loss:", str(current_loss),  "accuracy", str(accuracy))

    current_loss = cross_entropy(y_train, model(x_image_train)).numpy()
    loss_values.append(current_loss)
    
    # accuracy
    correct_prediction = tf.equal(tf.argmax(model(x_image_train), axis=1), tf.argmax(y_train, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()
    accuracies.append(accuracy)
    print("end of epoch ", str(i+1), "loss", str(current_loss), "accuracy", str(accuracy))
    
 # Evalutation
 j=0
 acccuracies=[]
 # evaluate accuracy by batch and average...reporting every 100th batch
 for x_test_batch, y_test_batch in test_ds2:
        j+=1
        correct_prediction = tf.equal(tf.argmax(model(x_test_batch), axis=1), tf.argmax(y_test_batch, axis=1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()
        accuracies.append(accuracy)
        if j%100==0:  print("batch", str(j), "accuracy", str(accuracy) ) 

 print("accuracy of entire set", str(np.mean(accuracies)))   


 ###################################################
 ## Check model with a sample digital number
 ###################################################
 import numpy as np
 plt.rcParams['figure.figsize'] = (5.0, 5.0)
 sampleimage = np.array([x_image_train[0]])
 plt.imshow(np.reshape(sampleimage,[28,28]), cmap="gray")


 # Result of 1st conv layer
 ActivatedUnits = convolve1(sampleimage)
 filters = ActivatedUnits.shape[3]
 plt.figure(1, figsize=(20,20))
 n_columns = 6
 n_rows = np.math.ceil(filters / n_columns) + 1
 for i in range(filters):
    plt.subplot(n_rows, n_columns, i+1)
    plt.title('Filter ' + str(i))
    plt.imshow(ActivatedUnits[0,:,:,i], interpolation="nearest", cmap="gray")
  
 # Result of 2nd conv layer
 ActivatedUnits = convolve2(sampleimage)
 filters = ActivatedUnits.shape[3]
 plt.figure(1, figsize=(20,20))
 n_columns = 8
 n_rows = np.math.ceil(filters / n_columns) + 1
 for i in range(filters):
    plt.subplot(n_rows, n_columns, i+1)
    plt.title('Filter ' + str(i))
    plt.imshow(ActivatedUnits[0,:,:,i], interpolation="nearest", cmap="gray")
	import tensorflow as tf
	from IPython.display import Markdown, display
	import numpy as np

	mnist = tf.keras.datasets.mnist
	(x_train, y_train), (x_test, y_test) = mnist.load_data()

	x_train, x_test = x_train / 255.0, x_test / 255.0

	y_train = tf.one_hot(y_train, 10)
	y_test = tf.one_hot(y_test, 10)

	width = 28 # width of the image in pixels
	height = 28 # height of the image in pixels
	flat = width * height # number of pixels in one image
	class_output = 10 # number of possible classifications for the problem

	x_image_train = tf.reshape(x_train, [-1,28,28,1])
	x_image_train = tf.cast(x_image_train, 'float32')

	x_image_test = tf.reshape(x_test, [-1,28,28,1])
	x_image_test = tf.cast(x_image_test, 'float32')

	#creating new dataset with reshaped inputs
	train_ds2 = tf.data.Dataset.from_tensor_slices((x_image_train, y_train)).batch(50)
	test_ds2 = tf.data.Dataset.from_tensor_slices((x_image_test, y_test)).batch(50)

	# Convolutional 1st Layer
	W_conv1 = tf.Variable(tf.random.truncated_normal([5, 5, 1, 32], stddev=0.1, seed=0))
	b_conv1 = tf.Variable(tf.constant(0.1, shape=[32])) # need 32 biases for 32 outputs
	def convolve1(x):return(tf.nn.conv2d(x, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1)
	def h_conv1(x): return(tf.nn.relu(convolve1(x)))
	# Apply the max pooling
	def conv1(x):return tf.nn.max_pool(h_conv1(x), ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

	# Convolutional 2nd Layer
	W_conv2 = tf.Variable(tf.random.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=1))
	b_conv2 = tf.Variable(tf.constant(0.1, shape=[64])) #need 64 biases for 64 outputs
	# Apply the max pooling
	def convolve2(x): return(tf.nn.conv2d(conv1(x), W_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2)
	def h_conv2(x): return tf.nn.relu(convolve2(x))
	def conv2(x): return(tf.nn.max_pool(h_conv2(x), ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME'))

	# Flattening 2nd Layer
	W_fc1 = tf.Variable(tf.random.truncated_normal([7 * 7 * 64, 1024], stddev=0.1, seed = 2))
	b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024])) # need 1024 biases for 1024 outputs
	def layer2_matrix(x): return tf.reshape(conv2(x), [-1, 7 * 7 * 64])
	def fcl(x): return tf.matmul(layer2_matrix(x), W_fc1) + b_fc1
	def h_fc1(x): return tf.nn.relu(fcl(x))

	# Final readout layer, softmax
	W_fc2 = tf.Variable(tf.random.truncated_normal([1024, 10], stddev=0.1, seed = 2)) #1024 neurons
	b_fc2 = tf.Variable(tf.constant(0.1, shape=[10])) # 10 possibilities for digits [0,1,2,3,4,5,6,7,8,9]
	keep_prob=0.5
	def layer_drop(x): return tf.nn.dropout(h_fc1(x), keep_prob)
	def fc(x): return tf.matmul(layer_drop(x), W_fc2) + b_fc2

	# Final model
	def model(x): return tf.nn.softmax(fc(x))

	# Network
	correct_prediction = tf.equal(tf.argmax(model(x_image_train), axis=1), tf.argmax(y_train, axis=1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float32'))
	optimizer = tf.keras.optimizers.Adam(1e-4)

	def cross_entropy(y_label, y_pred): return (-tf.reduce_sum(y_label * tf.math.log(y_pred + 1.e-10)))

	variables = [W_conv1, b_conv1,
	W_conv2, b_conv2,
	W_fc1, b_fc1,
	W_fc2, b_fc2, ]


	def train_step(x, y):
	with tf.GradientTape() as tape:
	current_loss = cross_entropy(y, model(x))
	grads = tape.gradient(current_loss, variables)
	optimizer.apply_gradients(zip(grads, variables))
	return current_loss.numpy()

	# Training loop
	loss_values = []
	accuracies = []
	epochs = 10

	for i in range(epochs):
	j = 0
	# each batch has 50 examples
	for x_train_batch, y_train_batch in train_ds2:
	j += 1
	current_loss = train_step(x_train_batch, y_train_batch)
	if j % 50 == 0: # reporting intermittent batch statistics
	# accuracy
	correct_prediction = tf.equal(tf.argmax(model(x_train_batch), axis=1), tf.argmax(y_train_batch, axis=1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()
	print("epoch ", str(i+1), "batch", str(j), "loss:", str(current_loss), "accuracy", str(accuracy))

	current_loss = cross_entropy(y_train, model(x_image_train)).numpy()
	loss_values.append(current_loss)

	# accuracy
	correct_prediction = tf.equal(tf.argmax(model(x_image_train), axis=1), tf.argmax(y_train, axis=1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()
	accuracies.append(accuracy)
	print("end of epoch ", str(i+1), "loss", str(current_loss), "accuracy", str(accuracy))

	# Evalutation
	j=0
	acccuracies=[]
	# evaluate accuracy by batch and average...reporting every 100th batch
	for x_test_batch, y_test_batch in test_ds2:
	j+=1
	correct_prediction = tf.equal(tf.argmax(model(x_test_batch), axis=1), tf.argmax(y_test_batch, axis=1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()
	accuracies.append(accuracy)
	if j%100==0: print("batch", str(j), "accuracy", str(accuracy) )

	print("accuracy of entire set", str(np.mean(accuracies)))


	###################################################
	## Check model with a sample digital number
	###################################################
	import numpy as np
	plt.rcParams['figure.figsize'] = (5.0, 5.0)
	sampleimage = np.array([x_image_train[0]])
	plt.imshow(np.reshape(sampleimage,[28,28]), cmap="gray")


	# Result of 1st conv layer
	ActivatedUnits = convolve1(sampleimage)
	filters = ActivatedUnits.shape[3]
	plt.figure(1, figsize=(20,20))
	n_columns = 6
	n_rows = np.math.ceil(filters / n_columns) + 1
	for i in range(filters):
	plt.subplot(n_rows, n_columns, i+1)
	plt.title('Filter ' + str(i))
	plt.imshow(ActivatedUnits[0,:,:,i], interpolation="nearest", cmap="gray")

	# Result of 2nd conv layer
	ActivatedUnits = convolve2(sampleimage)
	filters = ActivatedUnits.shape[3]
	plt.figure(1, figsize=(20,20))
	n_columns = 8
	n_rows = np.math.ceil(filters / n_columns) + 1
	for i in range(filters):
	plt.subplot(n_rows, n_columns, i+1)
	plt.title('Filter ' + str(i))
	plt.imshow(ActivatedUnits[0,:,:,i], interpolation="nearest", cmap="gray")
No results found