Skip to content

Instantly share code, notes, and snippets.

@ardamavi
Last active March 1, 2025 05:45
Show Gist options
  • Save ardamavi/a7d06ff8a315308771c70006cf494d69 to your computer and use it in GitHub Desktop.
Save ardamavi/a7d06ff8a315308771c70006cf494d69 to your computer and use it in GitHub Desktop.
For reading datasets and converting to numpy files.
# Arda Mavi
import os
import numpy as np
from os import listdir
from scipy.misc import imread, imresize
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
# Settings:
img_size = 64
grayscale_images = True
num_class = 10
test_size = 0.2
def get_img(data_path):
# Getting image array from path:
img = imread(data_path, flatten=grayscale_images)
img = imresize(img, (img_size, img_size, 1 if grayscale_images else 3))
return img
def get_dataset(dataset_path='Dataset'):
# Getting all data from data path:
try:
X = np.load('npy_dataset/X.npy')
Y = np.load('npy_dataset/Y.npy')
except:
labels = listdir(dataset_path) # Geting labels
X = []
Y = []
for i, label in enumerate(labels):
datas_path = dataset_path+'/'+label
for data in listdir(datas_path):
img = get_img(datas_path+'/'+data)
X.append(img)
Y.append(i)
# Create dateset:
X = 1-np.array(X).astype('float32')/255.
Y = np.array(Y).astype('float32')
Y = to_categorical(Y, num_class)
if not os.path.exists('npy_dataset/'):
os.makedirs('npy_dataset/')
np.save('npy_dataset/X.npy', X)
np.save('npy_dataset/Y.npy', Y)
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
return X, X_test, Y, Y_test
if __name__ == '__main__':
get_dataset()
@BhoomiBM
Copy link

how to download this dataset " sign language digit dataset".
I tried this code on google colab, but it gave errors.
Please help

@ardamavi
Copy link
Author

how to download this dataset " sign language digit dataset".
I tried this code on google colab, but it gave errors.
Please help

You can use Kaggle API to download and use the dataset.

@PoornimaRadhaKrishnanD
Copy link

after downloading how to preview the dataset

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment