saisumit · November 28, 2017 10:06
diff --git a/train.py b/train.py
 # import libraries
 from __future__ import print_function
 from __future__ import division

 import numpy as np 
 import pandas as pd
 import os
 import re

 import tensorflow as tf
 import tflearn
 from tflearn.layers.conv import conv_2d, max_pool_2d
 from tflearn.layers.core import input_data, dropout, fully_connected
 from tflearn.layers.estimator import regression
 from tflearn.layers.normalization import local_response_normalization

 import random
 from timeit import default_timer as timer

 import tsahelper as tsa


 #---------------------------------------------------------------------------------------
 # Constants
 #
 # INPUT_FOLDER:                 The folder that contains the source data
 #
 # PREPROCESSED_DATA_FOLDER:     The folder that contains preprocessed .npy files 
 # 
 # STAGE1_LABELS:                The CSV file containing the labels by subject
 #
 # THREAT_ZONE:                  Threat Zone to train on (actual number not 0 based)
 #
 # BATCH_SIZE:                   Number of Subjects per batch
 #
 # EXAMPLES_PER_SUBJECT          Number of examples generated per subject
 #
 # FILE_LIST:                    A list of the preprocessed .npy files to batch
 # 
 # TRAIN_TEST_SPLIT_RATIO:       Ratio to split the FILE_LIST between train and test
 #
 # TRAIN_SET_FILE_LIST:          The list of .npy files to be used for training
 #
 # TEST_SET_FILE_LIST:           The list of .npy files to be used for testing
 #
 # IMAGE_DIM:                    The height and width of the images in pixels
 #
 # LEARNING_RATE                 Learning rate for the neural network
 #
 # N_TRAIN_STEPS                 The number of train steps (epochs) to run
 #
 # TRAIN_PATH                    Place to store the tensorboard logs
 #
 # MODEL_PATH                    Path where model files are stored
 #
 # MODEL_NAME                    Name of the model files
 #
 #----------------------------------------------------------------------------------------
 INPUT_FOLDER = 'tsa_datasets/stage1/aps'
 PREPROCESSED_DATA_FOLDER = 'tsa_datasets/preprocessed/'
 STAGE1_LABELS = 'tsa_datasets/stage1_labels.csv'
 THREAT_ZONE = 1
 BATCH_SIZE = 16
 EXAMPLES_PER_SUBJECT = 182

 FILE_LIST = []
 TRAIN_TEST_SPLIT_RATIO = 0.2
 TRAIN_SET_FILE_LIST = []
 TEST_SET_FILE_LIST = []

 IMAGE_DIM = 250
 LEARNING_RATE = 1e-3
 N_TRAIN_STEPS = 1
 TRAIN_PATH = 'tsa_logs/train/'
 MODEL_PATH = 'tsa_logs/model/'
 MODEL_NAME = ('tsa-{}-lr-{}-{}-{}-tz-{}'.format('alexnet-v0.1', LEARNING_RATE, IMAGE_DIM, 
                                                IMAGE_DIM, THREAT_ZONE )) 





 #---------------------------------------------------------------------------------------
 # preprocess_tsa_data(): preprocesses the tsa datasets
 #
 # parameters:      none
 #
 # returns:         none
 #---------------------------------------------------------------------------------------

 def preprocess_tsa_data():
    
    # OPTION 1: get a list of all subjects for which there are labels
    #df = pd.read_csv(STAGE1_LABELS)
    #df['Subject'], df['Zone'] = df['Id'].str.split('_',1).str
    #SUBJECT_LIST = df['Subject'].unique()

    # OPTION 2: get a list of all subjects for whom there is data
    #SUBJECT_LIST = [os.path.splitext(subject)[0] for subject in os.listdir(INPUT_FOLDER)]
    
    # OPTION 3: get a list of subjects for small bore test purposes
    SUBJECT_LIST = ['00360f79fd6e02781457eda48f85da90','0043db5e8c819bffc15261b1f1ac5e42',
                    '0050492f92e22eed3474ae3a6fc907fa','006ec59fa59dd80a64c85347eef810c7',
                    '0097503ee9fa0606559c56458b281a08','011516ab0eca7cad7f5257672ddde70e']
    
    # intialize tracking and saving items
    batch_num = 1
    threat_zone_examples = []
    start_time = timer()
    
    for subject in SUBJECT_LIST:

        # read in the images
        print('--------------------------------------------------------------')
        print('t+> {:5.3f} |Reading images for subject #: {}'.format(timer()-start_time, 
                                                                     subject))
        print('--------------------------------------------------------------')
        images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps')

        # transpose so that the slice is the first dimension shape(16, 620, 512)
        images = images.transpose()

        # for each threat zone, loop through each image, mask off the zone and then crop it
        for tz_num, threat_zone_x_crop_dims in enumerate(zip(tsa.zone_slice_list, 
                                                             tsa.zone_crop_list)):

            threat_zone = threat_zone_x_crop_dims[0]
            crop_dims = threat_zone_x_crop_dims[1]

            # get label
            label = np.array(tsa.get_subject_zone_label(tz_num, 
                             tsa.get_subject_labels(STAGE1_LABELS, subject)))

            for img_num, img in enumerate(images):

                print('Threat Zone:Image -> {}:{}'.format(tz_num, img_num))
                print('Threat Zone Label -> {}'.format(label))
                
                if threat_zone[img_num] is not None:

                    # correct the orientation of the image
                    print('-> reorienting base image') 
                    base_img = np.flipud(img)
                    print('-> shape {}|mean={}'.format(base_img.shape, 
                                                       base_img.mean()))

                    # convert to grayscale
                    print('-> converting to grayscale')
                    rescaled_img = tsa.convert_to_grayscale(base_img)
                    print('-> shape {}|mean={}'.format(rescaled_img.shape, 
                                                       rescaled_img.mean()))

                    # spread the spectrum to improve contrast
                    print('-> spreading spectrum')
                    high_contrast_img = tsa.spread_spectrum(rescaled_img)
                    print('-> shape {}|mean={}'.format(high_contrast_img.shape,
                                                       high_contrast_img.mean()))

                    # get the masked image
                    print('-> masking image')
                    masked_img = tsa.roi(high_contrast_img, threat_zone[img_num])
                    print('-> shape {}|mean={}'.format(masked_img.shape, 
                                                       masked_img.mean()))

                    # crop the image
                    print('-> cropping image')
                    cropped_img = tsa.crop(masked_img, crop_dims[img_num])
                    print('-> shape {}|mean={}'.format(cropped_img.shape, 
                                                       cropped_img.mean()))

                    # normalize the image
                    print('-> normalizing image')
                    normalized_img = tsa.normalize(cropped_img)
                    print('-> shape {}|mean={}'.format(normalized_img.shape, 
                                                       normalized_img.mean()))

                    # zero center the image
                    print('-> zero centering')
                    zero_centered_img = tsa.zero_center(normalized_img)
                    print('-> shape {}|mean={}'.format(zero_centered_img.shape, 
                                                       zero_centered_img.mean()))

                    # append the features and labels to this threat zone's example array
                    print ('-> appending example to threat zone {}'.format(tz_num))
                    threat_zone_examples.append([[tz_num], zero_centered_img, label])
                    print ('-> shape {:d}:{:d}:{:d}:{:d}:{:d}:{:d}'.format(
                                                         len(threat_zone_examples),
                                                         len(threat_zone_examples[0]),
                                                         len(threat_zone_examples[0][0]),
                                                         len(threat_zone_examples[0][1][0]),
                                                         len(threat_zone_examples[0][1][1]),
                                                         len(threat_zone_examples[0][2])))
                else:
                    print('-> No view of tz:{} in img:{}. Skipping to next...'.format( 
                                tz_num, img_num))
                print('------------------------------------------------')

        # each subject gets EXAMPLES_PER_SUBJECT number of examples (182 to be exact, 
        # so this section just writes out the the data once there is a full minibatch 
        # complete.
        if ((len(threat_zone_examples) % (BATCH_SIZE * EXAMPLES_PER_SUBJECT)) == 0):
            for tz_num, tz in enumerate(tsa.zone_slice_list):

                tz_examples_to_save = []

                # write out the batch and reset
                print(' -> writing: ' + PREPROCESSED_DATA_FOLDER + 
                                        'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format( 
                                        tz_num+1,
                                        len(threat_zone_examples[0][1][0]),
                                        len(threat_zone_examples[0][1][1]), 
                                        batch_num))

                # get this tz's examples
                tz_examples = [example for example in threat_zone_examples if example[0] == 
                               [tz_num]]

                # drop unused columns
                tz_examples_to_save.append([[features_label[1], features_label[2]] 
                                            for features_label in tz_examples])

                # save batch.  Note that the trainer looks for tz{} where {} is a 
                # tz_num 1 based in the minibatch file to select which batches to 
                # use for training a given threat zone
                np.save(PREPROCESSED_DATA_FOLDER + 
                        'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format(tz_num+1, 
                                                         len(threat_zone_examples[0][1][0]),
                                                         len(threat_zone_examples[0][1][1]), 
                                                         batch_num), 
                                                         tz_examples_to_save)
                del tz_examples_to_save

            #reset for next batch 
            del threat_zone_examples
            threat_zone_examples = []
            batch_num += 1
    
    # we may run out of subjects before we finish a batch, so we write out 
    # the last batch stub
    if (len(threat_zone_examples) > 0):
        for tz_num, tz in enumerate(tsa.zone_slice_list):

            tz_examples_to_save = []

            # write out the batch and reset
            print(' -> writing: ' + PREPROCESSED_DATA_FOLDER 
                    + 'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format(tz_num+1, 
                      len(threat_zone_examples[0][1][0]),
                      len(threat_zone_examples[0][1][1]), 
                      batch_num))

            # get this tz's examples
            tz_examples = [example for example in threat_zone_examples if example[0] == 
                           [tz_num]]

            # drop unused columns
            tz_examples_to_save.append([[features_label[1], features_label[2]] 
                                        for features_label in tz_examples])

            #save batch
            np.save(PREPROCESSED_DATA_FOLDER + 
                    'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format(tz_num+1, 
                                                     len(threat_zone_examples[0][1][0]),
                                                     len(threat_zone_examples[0][1][1]), 
                                                     batch_num), 
                                                     tz_examples_to_save)
 # unit test ---------------------------------------
 #preprocess_tsa_data()


 #---------------------------------------------------------------------------------------
 # get_train_test_file_list(): gets the batch file list, splits between train and test
 #
 # parameters:      none
 #
 # returns:         none
 #
 #-------------------------------------------------------------------------------------

 def get_train_test_file_list():
    
    global FILE_LIST
    global TRAIN_SET_FILE_LIST
    global TEST_SET_FILE_LIST

    if os.listdir(PREPROCESSED_DATA_FOLDER) == []:
        print ('No preprocessed data available.  Skipping preprocessed data setup..')
    else:
        FILE_LIST = [f for f in os.listdir(PREPROCESSED_DATA_FOLDER) 
                     if re.search(re.compile('-tz' + str(THREAT_ZONE) + '-'), f)]
        train_test_split = len(FILE_LIST) - \
                           max(int(len(FILE_LIST)*TRAIN_TEST_SPLIT_RATIO),1)
        TRAIN_SET_FILE_LIST = FILE_LIST[:train_test_split]
        TEST_SET_FILE_LIST = FILE_LIST[train_test_split:]
        print('Train/Test Split -> {} file(s) of {} used for testing'.format( 
              len(FILE_LIST) - train_test_split, len(FILE_LIST)))
        
 # unit test ----------------------------
 #get_train_test_file_list()

 #---------------------------------------------------------------------------------------
 # input_pipeline(filename, path): prepares a batch of features and labels for training
 #
 # parameters:      filename - the file to be batched into the model
 #                  path - the folder where filename resides
 #
 # returns:         feature_batch - a batch of features to train or test on
 #                  label_batch - a batch of labels related to the feature_batch
 #
 #---------------------------------------------------------------------------------------

 def input_pipeline(filename, path):

    preprocessed_tz_scans = []
    feature_batch = []
    label_batch = []
    
    #Load a batch of preprocessed tz scans
    preprocessed_tz_scans = np.load(os.path.join(path, filename))
        
    #Shuffle to randomize for input into the model
    np.random.shuffle(preprocessed_tz_scans)
    
    # separate features and labels
    for example_list in preprocessed_tz_scans:
        for example in example_list:
            feature_batch.append(example[0])
            label_batch.append(example[1])
    
    feature_batch = np.asarray(feature_batch, dtype=np.float32)
    label_batch = np.asarray(label_batch, dtype=np.float32)
    
    return feature_batch, label_batch
  
 # unit test ------------------------------------------------------------------------
 #print ('Train Set -----------------------------')
 #for f_in in TRAIN_SET_FILE_LIST:
 #    feature_batch, label_batch = input_pipeline(f_in, PREPROCESSED_DATA_FOLDER)
 #    print (' -> features shape {}:{}:{}'.format(len(feature_batch), 
 #                                                len(feature_batch[0]), 
 #                                                len(feature_batch[0][0])))
 #    print (' -> labels shape   {}:{}'.format(len(label_batch), len(label_batch[0])))
    
 #print ('Test Set -----------------------------')
 #for f_in in TEST_SET_FILE_LIST:
 #    feature_batch, label_batch = input_pipeline(f_in, PREPROCESSED_DATA_FOLDER)
 #    print (' -> features shape {}:{}:{}'.format(len(feature_batch), 
 #                                                len(feature_batch[0]), 
 #                                                len(feature_batch[0][0])))
 #    print (' -> labels shape   {}:{}'.format(len(label_batch), len(label_batch[0])))

 #---------------------------------------------------------------------------------------
 # shuffle_train_set(): shuffle the list of batch files so that each train step
 #                      receives them in a different order since the TRAIN_SET_FILE_LIST
 #                      is a global
 #
 # parameters:      train_set - the file listing to be shuffled
 #
 # returns:         none
 #
 #-------------------------------------------------------------------------------------

 def shuffle_train_set(train_set):
    sorted_file_list = random.shuffle(train_set)
    TRAIN_SET_FILE_LIST = sorted_file_list
    
 # Unit test ---------------
 #print ('Before Shuffling ->', TRAIN_SET_FILE_LIST)
 #shuffle_train_set(TRAIN_SET_FILE_LIST)
 #print ('After Shuffling ->', TRAIN_SET_FILE_LIST)


 #---------------------------------------------------------------------------------------
 # alexnet(width, height, lr): defines the alexnet
 #
 # parameters:      width - width of the input image
 #                  height - height of the input image
 #                  lr - learning rate
 #
 # returns:         none
 #
 #-------------------------------------------------------------------------------------

 def alexnet(width, height, lr):
    network = input_data(shape=[None, width, height, 1], name='features')
    network = conv_2d(network, 96, 11, strides=4, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = conv_2d(network, 256, 5, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = conv_2d(network, 384, 3, activation='relu')
    network = conv_2d(network, 384, 3, activation='relu')
    network = conv_2d(network, 256, 3, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = fully_connected(network, 4096, activation='tanh')
    network = dropout(network, 0.5)
    network = fully_connected(network, 4096, activation='tanh')
    network = dropout(network, 0.5)
    network = fully_connected(network, 2, activation='softmax')
    network = regression(network, optimizer='momentum', loss='categorical_crossentropy', 
                         learning_rate=lr, name='labels')

    model = tflearn.DNN(network, checkpoint_path=MODEL_PATH + MODEL_NAME, 
                        tensorboard_dir=TRAIN_PATH, tensorboard_verbose=3, max_checkpoints=1)

    return model

 #---------------------------------------------------------------------------------------
 # train_conv_net(): runs the train op
 #
 # parameters:      none
 #
 # returns:         none
 #
 #-------------------------------------------------------------------------------------

 def train_conv_net():
    
    val_features = []
    val_labels = []
    
    # get train and test batches
    get_train_test_file_list()
    
    # instantiate model
    model = alexnet(IMAGE_DIM, IMAGE_DIM, LEARNING_RATE)
    
    # read in the validation test set
    for j, test_f_in in enumerate(TEST_SET_FILE_LIST):
        if j == 0:
            val_features, val_labels = input_pipeline(test_f_in, PREPROCESSED_DATA_FOLDER)
        else:
            tmp_feature_batch, tmp_label_batch = input_pipeline(test_f_in, 
                                                                PREPROCESSED_DATA_FOLDER)
            val_features = np.concatenate((tmp_feature_batch, val_features), axis=0)
            val_labels = np.concatenate((tmp_label_batch, val_labels), axis=0)

    val_features = val_features.reshape(-1, IMAGE_DIM, IMAGE_DIM, 1)

    
    
    # start training process
    for i in range(N_TRAIN_STEPS):

        # shuffle the train set files before each step
        shuffle_train_set(TRAIN_SET_FILE_LIST)
        
        # run through every batch in the training set
        for f_in in TRAIN_SET_FILE_LIST:
            
            # read in a batch of features and labels for training
            feature_batch, label_batch = input_pipeline(f_in, PREPROCESSED_DATA_FOLDER)
            feature_batch = feature_batch.reshape(-1, IMAGE_DIM, IMAGE_DIM, 1)
            #print ('Feature Batch Shape ->', feature_batch.shape)                
                
            # run the fit operation
            model.fit({'features': feature_batch}, {'labels': label_batch}, n_epoch=1, 
                      validation_set=({'features': val_features}, {'labels': val_labels}), 
                      shuffle=True, snapshot_step=None, show_metric=True, 
                      run_id=MODEL_NAME)
            
 # unit test -----------------------------------
 #train_conv_net()
	# import libraries
	from __future__ import print_function
	from __future__ import division

	import numpy as np
	import pandas as pd
	import os
	import re

	import tensorflow as tf
	import tflearn
	from tflearn.layers.conv import conv_2d, max_pool_2d
	from tflearn.layers.core import input_data, dropout, fully_connected
	from tflearn.layers.estimator import regression
	from tflearn.layers.normalization import local_response_normalization

	import random
	from timeit import default_timer as timer

	import tsahelper as tsa


	#---------------------------------------------------------------------------------------
	# Constants
	#
	# INPUT_FOLDER: The folder that contains the source data
	#
	# PREPROCESSED_DATA_FOLDER: The folder that contains preprocessed .npy files
	#
	# STAGE1_LABELS: The CSV file containing the labels by subject
	#
	# THREAT_ZONE: Threat Zone to train on (actual number not 0 based)
	#
	# BATCH_SIZE: Number of Subjects per batch
	#
	# EXAMPLES_PER_SUBJECT Number of examples generated per subject
	#
	# FILE_LIST: A list of the preprocessed .npy files to batch
	#
	# TRAIN_TEST_SPLIT_RATIO: Ratio to split the FILE_LIST between train and test
	#
	# TRAIN_SET_FILE_LIST: The list of .npy files to be used for training
	#
	# TEST_SET_FILE_LIST: The list of .npy files to be used for testing
	#
	# IMAGE_DIM: The height and width of the images in pixels
	#
	# LEARNING_RATE Learning rate for the neural network
	#
	# N_TRAIN_STEPS The number of train steps (epochs) to run
	#
	# TRAIN_PATH Place to store the tensorboard logs
	#
	# MODEL_PATH Path where model files are stored
	#
	# MODEL_NAME Name of the model files
	#
	#----------------------------------------------------------------------------------------
	INPUT_FOLDER = 'tsa_datasets/stage1/aps'
	PREPROCESSED_DATA_FOLDER = 'tsa_datasets/preprocessed/'
	STAGE1_LABELS = 'tsa_datasets/stage1_labels.csv'
	THREAT_ZONE = 1
	BATCH_SIZE = 16
	EXAMPLES_PER_SUBJECT = 182

	FILE_LIST = []
	TRAIN_TEST_SPLIT_RATIO = 0.2
	TRAIN_SET_FILE_LIST = []
	TEST_SET_FILE_LIST = []

	IMAGE_DIM = 250
	LEARNING_RATE = 1e-3
	N_TRAIN_STEPS = 1
	TRAIN_PATH = 'tsa_logs/train/'
	MODEL_PATH = 'tsa_logs/model/'
	MODEL_NAME = ('tsa-{}-lr-{}-{}-{}-tz-{}'.format('alexnet-v0.1', LEARNING_RATE, IMAGE_DIM,
	IMAGE_DIM, THREAT_ZONE ))





	#---------------------------------------------------------------------------------------
	# preprocess_tsa_data(): preprocesses the tsa datasets
	#
	# parameters: none
	#
	# returns: none
	#---------------------------------------------------------------------------------------

	def preprocess_tsa_data():

	# OPTION 1: get a list of all subjects for which there are labels
	#df = pd.read_csv(STAGE1_LABELS)
	#df['Subject'], df['Zone'] = df['Id'].str.split('_',1).str
	#SUBJECT_LIST = df['Subject'].unique()

	# OPTION 2: get a list of all subjects for whom there is data
	#SUBJECT_LIST = [os.path.splitext(subject)[0] for subject in os.listdir(INPUT_FOLDER)]

	# OPTION 3: get a list of subjects for small bore test purposes
	SUBJECT_LIST = ['00360f79fd6e02781457eda48f85da90','0043db5e8c819bffc15261b1f1ac5e42',
	'0050492f92e22eed3474ae3a6fc907fa','006ec59fa59dd80a64c85347eef810c7',
	'0097503ee9fa0606559c56458b281a08','011516ab0eca7cad7f5257672ddde70e']

	# intialize tracking and saving items
	batch_num = 1
	threat_zone_examples = []
	start_time = timer()

	for subject in SUBJECT_LIST:

	# read in the images
	print('--------------------------------------------------------------')
	print('t+> {:5.3f} \|Reading images for subject #: {}'.format(timer()-start_time,
	subject))
	print('--------------------------------------------------------------')
	images = tsa.read_data(INPUT_FOLDER + '/' + subject + '.aps')

	# transpose so that the slice is the first dimension shape(16, 620, 512)
	images = images.transpose()

	# for each threat zone, loop through each image, mask off the zone and then crop it
	for tz_num, threat_zone_x_crop_dims in enumerate(zip(tsa.zone_slice_list,
	tsa.zone_crop_list)):

	threat_zone = threat_zone_x_crop_dims[0]
	crop_dims = threat_zone_x_crop_dims[1]

	# get label
	label = np.array(tsa.get_subject_zone_label(tz_num,
	tsa.get_subject_labels(STAGE1_LABELS, subject)))

	for img_num, img in enumerate(images):

	print('Threat Zone:Image -> {}:{}'.format(tz_num, img_num))
	print('Threat Zone Label -> {}'.format(label))

	if threat_zone[img_num] is not None:

	# correct the orientation of the image
	print('-> reorienting base image')
	base_img = np.flipud(img)
	print('-> shape {}\|mean={}'.format(base_img.shape,
	base_img.mean()))

	# convert to grayscale
	print('-> converting to grayscale')
	rescaled_img = tsa.convert_to_grayscale(base_img)
	print('-> shape {}\|mean={}'.format(rescaled_img.shape,
	rescaled_img.mean()))

	# spread the spectrum to improve contrast
	print('-> spreading spectrum')
	high_contrast_img = tsa.spread_spectrum(rescaled_img)
	print('-> shape {}\|mean={}'.format(high_contrast_img.shape,
	high_contrast_img.mean()))

	# get the masked image
	print('-> masking image')
	masked_img = tsa.roi(high_contrast_img, threat_zone[img_num])
	print('-> shape {}\|mean={}'.format(masked_img.shape,
	masked_img.mean()))

	# crop the image
	print('-> cropping image')
	cropped_img = tsa.crop(masked_img, crop_dims[img_num])
	print('-> shape {}\|mean={}'.format(cropped_img.shape,
	cropped_img.mean()))

	# normalize the image
	print('-> normalizing image')
	normalized_img = tsa.normalize(cropped_img)
	print('-> shape {}\|mean={}'.format(normalized_img.shape,
	normalized_img.mean()))

	# zero center the image
	print('-> zero centering')
	zero_centered_img = tsa.zero_center(normalized_img)
	print('-> shape {}\|mean={}'.format(zero_centered_img.shape,
	zero_centered_img.mean()))

	# append the features and labels to this threat zone's example array
	print ('-> appending example to threat zone {}'.format(tz_num))
	threat_zone_examples.append([[tz_num], zero_centered_img, label])
	print ('-> shape {:d}:{:d}:{:d}:{:d}:{:d}:{:d}'.format(
	len(threat_zone_examples),
	len(threat_zone_examples[0]),
	len(threat_zone_examples[0][0]),
	len(threat_zone_examples[0][1][0]),
	len(threat_zone_examples[0][1][1]),
	len(threat_zone_examples[0][2])))
	else:
	print('-> No view of tz:{} in img:{}. Skipping to next...'.format(
	tz_num, img_num))
	print('------------------------------------------------')

	# each subject gets EXAMPLES_PER_SUBJECT number of examples (182 to be exact,
	# so this section just writes out the the data once there is a full minibatch
	# complete.
	if ((len(threat_zone_examples) % (BATCH_SIZE * EXAMPLES_PER_SUBJECT)) == 0):
	for tz_num, tz in enumerate(tsa.zone_slice_list):

	tz_examples_to_save = []

	# write out the batch and reset
	print(' -> writing: ' + PREPROCESSED_DATA_FOLDER +
	'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format(
	tz_num+1,
	len(threat_zone_examples[0][1][0]),
	len(threat_zone_examples[0][1][1]),
	batch_num))

	# get this tz's examples
	tz_examples = [example for example in threat_zone_examples if example[0] ==
	[tz_num]]

	# drop unused columns
	tz_examples_to_save.append([[features_label[1], features_label[2]]
	for features_label in tz_examples])

	# save batch. Note that the trainer looks for tz{} where {} is a
	# tz_num 1 based in the minibatch file to select which batches to
	# use for training a given threat zone
	np.save(PREPROCESSED_DATA_FOLDER +
	'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format(tz_num+1,
	len(threat_zone_examples[0][1][0]),
	len(threat_zone_examples[0][1][1]),
	batch_num),
	tz_examples_to_save)
	del tz_examples_to_save

	#reset for next batch
	del threat_zone_examples
	threat_zone_examples = []
	batch_num += 1

	# we may run out of subjects before we finish a batch, so we write out
	# the last batch stub
	if (len(threat_zone_examples) > 0):
	for tz_num, tz in enumerate(tsa.zone_slice_list):

	tz_examples_to_save = []

	# write out the batch and reset
	print(' -> writing: ' + PREPROCESSED_DATA_FOLDER
	+ 'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format(tz_num+1,
	len(threat_zone_examples[0][1][0]),
	len(threat_zone_examples[0][1][1]),
	batch_num))

	# get this tz's examples
	tz_examples = [example for example in threat_zone_examples if example[0] ==
	[tz_num]]

	# drop unused columns
	tz_examples_to_save.append([[features_label[1], features_label[2]]
	for features_label in tz_examples])

	#save batch
	np.save(PREPROCESSED_DATA_FOLDER +
	'preprocessed_TSA_scans-tz{}-{}-{}-b{}.npy'.format(tz_num+1,
	len(threat_zone_examples[0][1][0]),
	len(threat_zone_examples[0][1][1]),
	batch_num),
	tz_examples_to_save)
	# unit test ---------------------------------------
	#preprocess_tsa_data()


	#---------------------------------------------------------------------------------------
	# get_train_test_file_list(): gets the batch file list, splits between train and test
	#
	# parameters: none
	#
	# returns: none
	#
	#-------------------------------------------------------------------------------------

	def get_train_test_file_list():

	global FILE_LIST
	global TRAIN_SET_FILE_LIST
	global TEST_SET_FILE_LIST

	if os.listdir(PREPROCESSED_DATA_FOLDER) == []:
	print ('No preprocessed data available. Skipping preprocessed data setup..')
	else:
	FILE_LIST = [f for f in os.listdir(PREPROCESSED_DATA_FOLDER)
	if re.search(re.compile('-tz' + str(THREAT_ZONE) + '-'), f)]
	train_test_split = len(FILE_LIST) - \
	max(int(len(FILE_LIST)*TRAIN_TEST_SPLIT_RATIO),1)
	TRAIN_SET_FILE_LIST = FILE_LIST[:train_test_split]
	TEST_SET_FILE_LIST = FILE_LIST[train_test_split:]
	print('Train/Test Split -> {} file(s) of {} used for testing'.format(
	len(FILE_LIST) - train_test_split, len(FILE_LIST)))

	# unit test ----------------------------
	#get_train_test_file_list()

	#---------------------------------------------------------------------------------------
	# input_pipeline(filename, path): prepares a batch of features and labels for training
	#
	# parameters: filename - the file to be batched into the model
	# path - the folder where filename resides
	#
	# returns: feature_batch - a batch of features to train or test on
	# label_batch - a batch of labels related to the feature_batch
	#
	#---------------------------------------------------------------------------------------

	def input_pipeline(filename, path):

	preprocessed_tz_scans = []
	feature_batch = []
	label_batch = []

	#Load a batch of preprocessed tz scans
	preprocessed_tz_scans = np.load(os.path.join(path, filename))

	#Shuffle to randomize for input into the model
	np.random.shuffle(preprocessed_tz_scans)

	# separate features and labels
	for example_list in preprocessed_tz_scans:
	for example in example_list:
	feature_batch.append(example[0])
	label_batch.append(example[1])

	feature_batch = np.asarray(feature_batch, dtype=np.float32)
	label_batch = np.asarray(label_batch, dtype=np.float32)

	return feature_batch, label_batch

	# unit test ------------------------------------------------------------------------
	#print ('Train Set -----------------------------')
	#for f_in in TRAIN_SET_FILE_LIST:
	# feature_batch, label_batch = input_pipeline(f_in, PREPROCESSED_DATA_FOLDER)
	# print (' -> features shape {}:{}:{}'.format(len(feature_batch),
	# len(feature_batch[0]),
	# len(feature_batch[0][0])))
	# print (' -> labels shape {}:{}'.format(len(label_batch), len(label_batch[0])))

	#print ('Test Set -----------------------------')
	#for f_in in TEST_SET_FILE_LIST:
	# feature_batch, label_batch = input_pipeline(f_in, PREPROCESSED_DATA_FOLDER)
	# print (' -> features shape {}:{}:{}'.format(len(feature_batch),
	# len(feature_batch[0]),
	# len(feature_batch[0][0])))
	# print (' -> labels shape {}:{}'.format(len(label_batch), len(label_batch[0])))

	#---------------------------------------------------------------------------------------
	# shuffle_train_set(): shuffle the list of batch files so that each train step
	# receives them in a different order since the TRAIN_SET_FILE_LIST
	# is a global
	#
	# parameters: train_set - the file listing to be shuffled
	#
	# returns: none
	#
	#-------------------------------------------------------------------------------------

	def shuffle_train_set(train_set):
	sorted_file_list = random.shuffle(train_set)
	TRAIN_SET_FILE_LIST = sorted_file_list

	# Unit test ---------------
	#print ('Before Shuffling ->', TRAIN_SET_FILE_LIST)
	#shuffle_train_set(TRAIN_SET_FILE_LIST)
	#print ('After Shuffling ->', TRAIN_SET_FILE_LIST)


	#---------------------------------------------------------------------------------------
	# alexnet(width, height, lr): defines the alexnet
	#
	# parameters: width - width of the input image
	# height - height of the input image
	# lr - learning rate
	#
	# returns: none
	#
	#-------------------------------------------------------------------------------------

	def alexnet(width, height, lr):
	network = input_data(shape=[None, width, height, 1], name='features')
	network = conv_2d(network, 96, 11, strides=4, activation='relu')
	network = max_pool_2d(network, 3, strides=2)
	network = local_response_normalization(network)
	network = conv_2d(network, 256, 5, activation='relu')
	network = max_pool_2d(network, 3, strides=2)
	network = local_response_normalization(network)
	network = conv_2d(network, 384, 3, activation='relu')
	network = conv_2d(network, 384, 3, activation='relu')
	network = conv_2d(network, 256, 3, activation='relu')
	network = max_pool_2d(network, 3, strides=2)
	network = local_response_normalization(network)
	network = fully_connected(network, 4096, activation='tanh')
	network = dropout(network, 0.5)
	network = fully_connected(network, 4096, activation='tanh')
	network = dropout(network, 0.5)
	network = fully_connected(network, 2, activation='softmax')
	network = regression(network, optimizer='momentum', loss='categorical_crossentropy',
	learning_rate=lr, name='labels')

	model = tflearn.DNN(network, checkpoint_path=MODEL_PATH + MODEL_NAME,
	tensorboard_dir=TRAIN_PATH, tensorboard_verbose=3, max_checkpoints=1)

	return model

	#---------------------------------------------------------------------------------------
	# train_conv_net(): runs the train op
	#
	# parameters: none
	#
	# returns: none
	#
	#-------------------------------------------------------------------------------------

	def train_conv_net():

	val_features = []
	val_labels = []

	# get train and test batches
	get_train_test_file_list()

	# instantiate model
	model = alexnet(IMAGE_DIM, IMAGE_DIM, LEARNING_RATE)

	# read in the validation test set
	for j, test_f_in in enumerate(TEST_SET_FILE_LIST):
	if j == 0:
	val_features, val_labels = input_pipeline(test_f_in, PREPROCESSED_DATA_FOLDER)
	else:
	tmp_feature_batch, tmp_label_batch = input_pipeline(test_f_in,
	PREPROCESSED_DATA_FOLDER)
	val_features = np.concatenate((tmp_feature_batch, val_features), axis=0)
	val_labels = np.concatenate((tmp_label_batch, val_labels), axis=0)

	val_features = val_features.reshape(-1, IMAGE_DIM, IMAGE_DIM, 1)



	# start training process
	for i in range(N_TRAIN_STEPS):

	# shuffle the train set files before each step
	shuffle_train_set(TRAIN_SET_FILE_LIST)

	# run through every batch in the training set
	for f_in in TRAIN_SET_FILE_LIST:

	# read in a batch of features and labels for training
	feature_batch, label_batch = input_pipeline(f_in, PREPROCESSED_DATA_FOLDER)
	feature_batch = feature_batch.reshape(-1, IMAGE_DIM, IMAGE_DIM, 1)
	#print ('Feature Batch Shape ->', feature_batch.shape)

	# run the fit operation
	model.fit({'features': feature_batch}, {'labels': label_batch}, n_epoch=1,
	validation_set=({'features': val_features}, {'labels': val_labels}),
	shuffle=True, snapshot_step=None, show_metric=True,
	run_id=MODEL_NAME)

	# unit test -----------------------------------
	#train_conv_net()