Last active
June 7, 2023 02:20
-
-
Save ardamavi/a7d06ff8a315308771c70006cf494d69 to your computer and use it in GitHub Desktop.
For reading datasets and converting to numpy files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Arda Mavi | |
import os | |
import numpy as np | |
from os import listdir | |
from scipy.misc import imread, imresize | |
from keras.utils import to_categorical | |
from sklearn.model_selection import train_test_split | |
# Settings: | |
img_size = 64 | |
grayscale_images = True | |
num_class = 10 | |
test_size = 0.2 | |
def get_img(data_path): | |
# Getting image array from path: | |
img = imread(data_path, flatten=grayscale_images) | |
img = imresize(img, (img_size, img_size, 1 if grayscale_images else 3)) | |
return img | |
def get_dataset(dataset_path='Dataset'): | |
# Getting all data from data path: | |
try: | |
X = np.load('npy_dataset/X.npy') | |
Y = np.load('npy_dataset/Y.npy') | |
except: | |
labels = listdir(dataset_path) # Geting labels | |
X = [] | |
Y = [] | |
for i, label in enumerate(labels): | |
datas_path = dataset_path+'/'+label | |
for data in listdir(datas_path): | |
img = get_img(datas_path+'/'+data) | |
X.append(img) | |
Y.append(i) | |
# Create dateset: | |
X = 1-np.array(X).astype('float32')/255. | |
Y = np.array(Y).astype('float32') | |
Y = to_categorical(Y, num_class) | |
if not os.path.exists('npy_dataset/'): | |
os.makedirs('npy_dataset/') | |
np.save('npy_dataset/X.npy', X) | |
np.save('npy_dataset/Y.npy', Y) | |
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42) | |
return X, X_test, Y, Y_test | |
if __name__ == '__main__': | |
get_dataset() |
I think some of the functions are removed from the used libraries, so this is an updated version. Hope it works.
import os import numpy as np from os import listdir from matplotlib.pyplot import imread from skimage.transform import resize from keras.utils import to_categorical from sklearn.model_selection import train_test_split # Settings: img_size = 64 grayscale_images = True num_class = 10 test_size = 0.2 def get_img(data_path): # Getting image array from path: img = imread(data_path) img = resize(img, (img_size, img_size, 1 if grayscale_images else 3)) return img def get_dataset(dataset_path='Dataset'): # Getting all data from data path: try: X = np.load('npy_dataset/X.npy') Y = np.load('npy_dataset/Y.npy') except: labels = listdir(dataset_path) # Geting labels X = [] Y = [] for i, label in enumerate(labels): datas_path = dataset_path + '/' + label for data in listdir(datas_path): img = get_img(datas_path + '/' + data) X.append(img) Y.append(i) # Create dateset: X = 1 - np.array(X).astype('float32') / 255. Y = np.array(Y).astype('float32') Y = to_categorical(Y, num_class) if not os.path.exists('npy_dataset/'): os.makedirs('npy_dataset/') np.save('npy_dataset/X.npy', X) np.save('npy_dataset/Y.npy', Y) X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42) return X, X_test, Y, Y_test if __name__ == '__main__': X, X_test, Y, Y_test = get_dataset() print(X) print(X_test) print(Y) print(Y_test)
To make this code works fine, you should change the following lines:
- Change the line "labels = listdir(dataset_path)" with "labels = ['0','1','2','3','4','5','6','7','8','9']"
- Change the line "X = 1 - np.array(X).astype('float32') / 255." with "X = np.array(X).astype('float32')"
how to download this dataset " sign language digit dataset".
I tried this code on google colab, but it gave errors.
Please help
how to download this dataset " sign language digit dataset".
I tried this code on google colab, but it gave errors.
Please help
You can use Kaggle API to download and use the dataset.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I think some of the functions are removed from the used libraries, so this is an updated version. Hope it works.