-
-
Save BenjaminFraser/64f3929b61395f99799f2ab4b5def04f to your computer and use it in GitHub Desktop.
For reading datasets and converting to numpy files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Arda Mavi | |
import os | |
import numpy as np | |
from os import listdir | |
from scipy.misc import imread, imresize | |
from keras.utils import to_categorical | |
from sklearn.model_selection import train_test_split | |
# Settings: | |
img_size = 64 | |
grayscale_images = True | |
num_class = 10 | |
test_size = 0.2 | |
def get_img(data_path): | |
# Getting image array from path: | |
img = imread(data_path, flatten=grayscale_images) | |
img = imresize(img, (img_size, img_size, 1 if grayscale_images else 3)) | |
return img | |
def get_dataset(dataset_path='Dataset'): | |
# Getting all data from data path: | |
try: | |
X = np.load('npy_dataset/X.npy') | |
Y = np.load('npy_dataset/Y.npy') | |
except: | |
labels = sorted(listdir(dataset_path)) # Geting labels | |
X = [] | |
Y = [] | |
for i, label in enumerate(labels): | |
datas_path = dataset_path+'/'+label | |
for data in listdir(datas_path): | |
img = get_img(datas_path+'/'+data) | |
X.append(img) | |
Y.append(i) | |
# Create dateset: | |
X = 1-np.array(X).astype('float32')/255. | |
Y = np.array(Y).astype('float32') | |
Y = to_categorical(Y, num_class) | |
if not os.path.exists('npy_dataset/'): | |
os.makedirs('npy_dataset/') | |
np.save('npy_dataset/X.npy', X) | |
np.save('npy_dataset/Y.npy', Y) | |
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42) | |
return X, X_test, Y, Y_test | |
if __name__ == '__main__': | |
get_dataset() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Changed line 28 so that it sorts the output label directories in numerical order, to ensure easy data mapping after preprocessing.