Last active
June 7, 2023 02:20
-
-
Save ardamavi/a7d06ff8a315308771c70006cf494d69 to your computer and use it in GitHub Desktop.
For reading datasets and converting to numpy files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Arda Mavi | |
import os | |
import numpy as np | |
from os import listdir | |
from scipy.misc import imread, imresize | |
from keras.utils import to_categorical | |
from sklearn.model_selection import train_test_split | |
# Settings: | |
img_size = 64 | |
grayscale_images = True | |
num_class = 10 | |
test_size = 0.2 | |
def get_img(data_path): | |
# Getting image array from path: | |
img = imread(data_path, flatten=grayscale_images) | |
img = imresize(img, (img_size, img_size, 1 if grayscale_images else 3)) | |
return img | |
def get_dataset(dataset_path='Dataset'): | |
# Getting all data from data path: | |
try: | |
X = np.load('npy_dataset/X.npy') | |
Y = np.load('npy_dataset/Y.npy') | |
except: | |
labels = listdir(dataset_path) # Geting labels | |
X = [] | |
Y = [] | |
for i, label in enumerate(labels): | |
datas_path = dataset_path+'/'+label | |
for data in listdir(datas_path): | |
img = get_img(datas_path+'/'+data) | |
X.append(img) | |
Y.append(i) | |
# Create dateset: | |
X = 1-np.array(X).astype('float32')/255. | |
Y = np.array(Y).astype('float32') | |
Y = to_categorical(Y, num_class) | |
if not os.path.exists('npy_dataset/'): | |
os.makedirs('npy_dataset/') | |
np.save('npy_dataset/X.npy', X) | |
np.save('npy_dataset/Y.npy', Y) | |
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42) | |
return X, X_test, Y, Y_test | |
if __name__ == '__main__': | |
get_dataset() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You can use Kaggle API to download and use the dataset.