- 
      
- 
        Save amir-rahnama/408301bc5bc07bc5afa8748513ab9477 to your computer and use it in GitHub Desktop. 
| """A generic module to read data.""" | |
| import numpy | |
| import collections | |
| from tensorflow.python.framework import dtypes | |
| class DataSet(object): | |
| """Dataset class object.""" | |
| def __init__(self, | |
| images, | |
| labels, | |
| fake_data=False, | |
| one_hot=False, | |
| dtype=dtypes.float64, | |
| reshape=True): | |
| """Initialize the class.""" | |
| if reshape: | |
| assert images.shape[3] == 1 | |
| images = images.reshape(images.shape[0], | |
| images.shape[1] * images.shape[2]) | |
| self._images = images | |
| self._num_examples = images.shape[0] | |
| self._labels = labels | |
| self._epochs_completed = 0 | |
| self._index_in_epoch = 0 | |
| @property | |
| def images(self): | |
| return self._images | |
| @property | |
| def labels(self): | |
| return self._labels | |
| @property | |
| def num_examples(self): | |
| return self._num_examples | |
| @property | |
| def epochs_completed(self): | |
| return self._epochs_completed | |
| def next_batch(self, batch_size, fake_data=False): | |
| """Return the next `batch_size` examples from this data set.""" | |
| start = self._index_in_epoch | |
| self._index_in_epoch += batch_size | |
| if self._index_in_epoch > self._num_examples: | |
| # Finished epoch | |
| self._epochs_completed += 1 | |
| # Shuffle the data | |
| perm = numpy.arange(self._num_examples) | |
| numpy.random.shuffle(perm) | |
| self._images = self._images[perm] | |
| self._labels = self._labels[perm] | |
| # Start next epoch | |
| start = 0 | |
| self._index_in_epoch = batch_size | |
| assert batch_size <= self._num_examples | |
| end = self._index_in_epoch | |
| return self._images[start:end], self._labels[start:end] | |
| def read_data_sets(train_dir, fake_data=False, one_hot=False, | |
| dtype=dtypes.float64, reshape=True, | |
| validation_size=5000): | |
| """Set the images and labels.""" | |
| num_training = 3000 | |
| num_validation = 1000 | |
| num_test = 1000 | |
| all_images = numpy.load('./npy/grey.npy') | |
| all_images = all_images.reshape(all_images.shape[0], | |
| all_images.shape[1], all_images.shape[2], 1) | |
| train_labels_original = numpy.load('./npy/label.npy') | |
| all_labels = numpy.asarray(range(0, len(train_labels_original))) | |
| all_labels = dense_to_one_hot(all_labels, len(all_labels)) | |
| mask = range(num_training) | |
| train_images = all_images[mask] | |
| train_labels = all_labels[mask] | |
| mask = range(num_training, num_training + num_validation) | |
| validation_images = all_images[mask] | |
| validation_labels = all_labels[mask] | |
| mask = range(num_training + num_validation, num_training + num_validation + num_test) | |
| test_images = all_images[mask] | |
| test_labels = all_labels[mask] | |
| train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) | |
| validation = DataSet(validation_images, validation_labels, dtype=dtype, | |
| reshape=reshape) | |
| test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) | |
| ds = collections.namedtuple('Datasets', ['train', 'validation', 'test']) | |
| return ds(train=train, validation=validation, test=test) | |
| def dense_to_one_hot(labels_dense, num_classes): | |
| """Convert class labels from scalars to one-hot vectors.""" | |
| num_labels = labels_dense.shape[0] | |
| index_offset = numpy.arange(num_labels) * num_classes | |
| labels_one_hot = numpy.zeros((num_labels, num_classes)) | |
| labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 | |
| return labels_one_hot | 
| from __future__ import division | |
| from __future__ import print_function | |
| import argparse | |
| import sys | |
| import dataset | |
| import tensorflow as tf | |
| FLAGS = None | |
| def main(_): | |
| """Run the NN.""" | |
| mnist = dataset.read_data_sets(FLAGS.data_dir, one_hot=True) | |
| x = tf.placeholder(tf.float32, [None, 10000]) | |
| w = tf.Variable(tf.zeros([10000, 5000])) | |
| b = tf.Variable(tf.zeros([5000])) | |
| y = tf.matmul(x, w) + b | |
| # Define loss and optimizer | |
| y_ = tf.placeholder(tf.float32, [None, 5000]) | |
| # The raw formulation of cross-entropy, | |
| # | |
| # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)), | |
| # reduction_indices=[1])) | |
| # | |
| # can be numerically unstable. | |
| # | |
| # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw | |
| # outputs of 'y', and then average across the batch. | |
| cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y,y_)) | |
| train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) | |
| sess = tf.InteractiveSession() | |
| tf.global_variables_initializer().run() | |
| # Train | |
| for _ in range(1000): | |
| batch_xs, batch_ys = mnist.train.next_batch(100) | |
| print(batch_xs.shape) | |
| print(batch_ys.shape) | |
| sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) | |
| # Test trained model | |
| correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) | |
| accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) | |
| print(sess.run(accuracy, feed_dict={x: mnist.test.images, | |
| y_: mnist.test.labels})) | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/ \ | |
| input_data', help='Directory for storing input data') | |
| FLAGS, unparsed = parser.parse_known_args() | |
| tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) | 
where do in the code do i nee to change it so it loads the data from my own directory ? i have a folder that contains 2 subduer of classes of images i want to use to train a neural net. Will your code automatically create the test set as well ? sorry still a bit confused.
@pierrepook This is where you can change the folder path: https://gist.github.com/ambodi/408301bc5bc07bc5afa8748513ab9477#file-dataset-py-L74
You can change the number of training examples here: https://gist.github.com/ambodi/408301bc5bc07bc5afa8748513ab9477#file-dataset-py-L82
what if I have .jpg files instead of .npy format?
how do I insert the images to all_images and its labels to all_labels?
thanks for the code, btw. really helps a lot! :)
@tandcredosouza
- Convert your jpg's to npy's. You can do this using opencv by first loading the images with cv2.imread(...) and then
 converting them as np.array(...) with a for loop
- Append the npy's into one big npy: Initialize bigNpy = [], then run a for loop in which u load each npy and append it to bigNpy using bigNpy.append(...). Then save it using np.save(...)
What if I wanted to load the images on the fly? like during training, instead of loading all the data beforehand.
I also added the
nn.pyto show how to use thedataset.py.