NiloyPurkait · October 20, 2021 05:26 · msris108 · Nov 9, 2019 · Ben-Epstein · May 14, 2020
diff --git a/MNIST_Keras2DML.py b/MNIST_Keras2DML.py
 ################################### Keras2DML: Parallely training neural network with SystemML####################################### 
 import tensorflow as tf
 import keras
 from keras.models import Sequential
 from keras.layers import Input, Dense, Conv1D, Conv2D, MaxPooling2D, Dropout,Flatten
 from keras import backend as K
 from keras.models import Model
 import numpy as np
 import matplotlib.pyplot as plt


 from keras.datasets import mnist
 (X_train, y_train), (X_test, y_test) = mnist.load_data()


 # Expect to see a numpy n-dimentional array of (60000, 28, 28)

 type(X_train), X_train.shape, type(X_train)


 #This time however, we flatten each of our 28 X 28 images to a vector of 1, 784

 X_train = X_train.reshape(-1, 784)
 X_test = X_test.reshape(-1, 784)

 # expect to see a numpy n-dimentional array of : (60000, 784) for Traning Data shape and (10000, 784) for Test Data shape
 type(X_train), X_train.shape, X_test.shape


 #We also use sklearn's MinMaxScaler for normalizing

 from sklearn.preprocessing import MinMaxScaler
 def scaleData(data):
    # normalize features
    scaler = MinMaxScaler(feature_range=(0, 1))
    return scaler.fit_transform(data)

 X_train = scaleData(X_train)
 X_test = scaleData(X_test)


 # We define the same Keras model as earlier

 input_shape = (1,28,28) if K.image_data_format() == 'channels_first' else (28,28, 1)
 keras_model = Sequential()
 keras_model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', input_shape=input_shape, padding='same'))
 keras_model.add(MaxPooling2D(pool_size=(2, 2)))
 keras_model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
 keras_model.add(MaxPooling2D(pool_size=(2, 2)))
 keras_model.add(Flatten())
 keras_model.add(Dense(512, activation='relu'))
 keras_model.add(Dropout(0.5))
 keras_model.add(Dense(10, activation='softmax'))
 keras_model.summary()


 # Import the Keras to DML wrapper and define some basic variables

 from systemml.mllearn import Keras2DML
 epochs = 5
 batch_size = 100
 samples = 60000
 max_iter = int(epochs*math.ceil(samples/batch_size))

 # Now create a SystemML model by calling the Keras2DML method and feeding it your spark session, Keras model, its input shape, and the  # predefined variables. We also ask to be displayed the traning results every 10 iterations.

 sysml_model = Keras2DML(spark, keras_model, input_shape=(1,28,28), weights='weights_dir', batch_size=batch_size, max_iter=max_iter, test_interval=0, display=10)

 # Initiate traning. More spark workers and better machine configuration means faster training!

 sysml_model.fit(X_train, y_train)

 # Test your model's performance on the secluded test set, and re-iterate if required 
 sysml_model.score(X_test, y_test)
diff --git a/MNIST_Keras_only.py b/MNIST_Keras_only.py
 ################################### Keras Only: Training the same Neural network without SystemML####################################### 

 import tensorflow as tf
 import keras
 from keras.models import Sequential
 from keras.layers import Input, Dense, Conv1D, Conv2D, MaxPooling2D, Dropout,Flatten
 from keras import backend as K
 from keras.models import Model
 import numpy as np
 import matplotlib.pyplot as plt

 mnist = keras.datasets.mnist
 (X_train, y_train), (X_test, y_test) = mnist.load_data()

 # Check type and shape of your data
 type(X_train[0]), X_train.shape, y_train.shape, X_test.shape, y_test.shape   

 #Plot out the image to see the numbers
 plt.imshow(X_train[0])#, cmap = plt.cm.binary) 


 # Reshape data for Conv2D layer in our neural network, check keras.io/layers for details 
 X_train = np.expand_dims(X_train, axis=3)
 X_test = np.expand_dims(X_test, axis=3)


 # Check input shape again to confirm. Ideally, you should see: (numpy.ndarray, (60000, 28, 28, 1), (10000, 28, 28, 1))
 type(X_train[0]), X_train.shape, X_test.shape



 # scaling the pixel values that are usually between 0-255 to a value between 0 and 1.
 #This makes it easier for the network to learn, experiment without normalization, and youll see the difference in accuracy.
 
 x_train = tf.keras.utils.normalize(X_train, axis=1) 
 x_test = tf.keras.utils.normalize(X_test, axis=1) 




 # Design your neural network using a Keras Model, and pay attention to the input shape of your data. In our case, we are feeding our network 28X28 pixel vectors

 input_shape = (1,28,28) if K.image_data_format() == 'channels_first' else (28,28, 1)
 keras_model = Sequential()
 keras_model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', input_shape=input_shape, padding='same'))
 keras_model.add(MaxPooling2D(pool_size=(2, 2)))
 keras_model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
 keras_model.add(MaxPooling2D(pool_size=(2, 2)))
 keras_model.add(Flatten())
 keras_model.add(Dense(512, activation='relu'))
 keras_model.add(Dropout(0.5))
 keras_model.add(Dense(10, activation='softmax'))
 keras_model.summary()


 # Compile your model, choose an appropriate optimizer, loss function and metric to track.

 keras_model.compile(optimizer='adam',#'sgd'
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy']) 
             
             
 # Train your model, adjust batch size and epochs iteratively. Optionally time your training.
 
 import time
 start = time.time()
 keras_model.fit(x_train, y_train, epochs=5, batch_size=100) 
 end=time.time()
 print("training time:", (end-start))

 # Test your model on the secluded test set

 keras_model.evaluate(x_test, y_test)

 # Make predictions and Reshape your npndarrays to be able to verify your predictions by plotting out the image

 predictions = load_model.predict([x_test])

 x_test = np.squeeze(x_test)
 x_test.shape

 # Print the prediction value with the maximum probability assigned by the network for the first image.
 print(np.argmax(predictions[0]))

 # Plot out the first image and check if the network got it right
 plt.imshow(x_test[0])
	################################### Keras2DML: Parallely training neural network with SystemML#######################################
	import tensorflow as tf
	import keras
	from keras.models import Sequential
	from keras.layers import Input, Dense, Conv1D, Conv2D, MaxPooling2D, Dropout,Flatten
	from keras import backend as K
	from keras.models import Model
	import numpy as np
	import matplotlib.pyplot as plt


	from keras.datasets import mnist
	(X_train, y_train), (X_test, y_test) = mnist.load_data()


	# Expect to see a numpy n-dimentional array of (60000, 28, 28)

	type(X_train), X_train.shape, type(X_train)


	#This time however, we flatten each of our 28 X 28 images to a vector of 1, 784

	X_train = X_train.reshape(-1, 784)
	X_test = X_test.reshape(-1, 784)

	# expect to see a numpy n-dimentional array of : (60000, 784) for Traning Data shape and (10000, 784) for Test Data shape
	type(X_train), X_train.shape, X_test.shape


	#We also use sklearn's MinMaxScaler for normalizing

	from sklearn.preprocessing import MinMaxScaler
	def scaleData(data):
	# normalize features
	scaler = MinMaxScaler(feature_range=(0, 1))
	return scaler.fit_transform(data)

	X_train = scaleData(X_train)
	X_test = scaleData(X_test)


	# We define the same Keras model as earlier

	input_shape = (1,28,28) if K.image_data_format() == 'channels_first' else (28,28, 1)
	keras_model = Sequential()
	keras_model.add(Conv2D(32, kernel_size=(5, 5), activation='relu', input_shape=input_shape, padding='same'))
	keras_model.add(MaxPooling2D(pool_size=(2, 2)))
	keras_model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
	keras_model.add(MaxPooling2D(pool_size=(2, 2)))
	keras_model.add(Flatten())
	keras_model.add(Dense(512, activation='relu'))
	keras_model.add(Dropout(0.5))
	keras_model.add(Dense(10, activation='softmax'))
	keras_model.summary()


	# Import the Keras to DML wrapper and define some basic variables

	from systemml.mllearn import Keras2DML
	epochs = 5
	batch_size = 100
	samples = 60000
	max_iter = int(epochs*math.ceil(samples/batch_size))

	# Now create a SystemML model by calling the Keras2DML method and feeding it your spark session, Keras model, its input shape, and the # predefined variables. We also ask to be displayed the traning results every 10 iterations.

	sysml_model = Keras2DML(spark, keras_model, input_shape=(1,28,28), weights='weights_dir', batch_size=batch_size, max_iter=max_iter, test_interval=0, display=10)

	# Initiate traning. More spark workers and better machine configuration means faster training!

	sysml_model.fit(X_train, y_train)

	# Test your model's performance on the secluded test set, and re-iterate if required
	sysml_model.score(X_test, y_test)