Skip to content

Instantly share code, notes, and snippets.

@CookieLau
Created April 5, 2023 03:07
Show Gist options
  • Save CookieLau/b077489c0df1fd3b1724855b682f61fb to your computer and use it in GitHub Desktop.
Save CookieLau/b077489c0df1fd3b1724855b682f61fb to your computer and use it in GitHub Desktop.
If you would like to process CIFAR dataset like the ImageNet-1K, i.e. split the train and test dataset and every category as an individual subdirectory, try this gist!
import cv2
import numpy as np
import pickle
import os
import shutil
from torchvision import datasets
# %%
def unpickle(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
def cifar10():
_ = datasets.CIFAR10(root='./data', train=True, download=True)
root = os.getcwd()
os.chdir("./data/cifar-10-batches-py")
def process(filenames, folder_name):
os.makedirs(folder_name, exist_ok=True)
# make directory for each class
meta = unpickle('batches.meta')
label_names = meta[b'label_names']
for label in label_names:
os.makedirs(folder_name + "/" + label.decode('utf-8'), exist_ok=True)
for index, batch in enumerate(filenames):
data = unpickle(batch)
images = data[b'data']
size = images.shape[0]
images = images.reshape(size, 3, 32, 32).transpose(0,2,3,1).astype("uint8")
label = data[b'labels']
# resize dataset to 224x224
images224 = np.zeros((size, 224, 224, 3), dtype=np.uint8)
for i in range(size):
images224[i] = cv2.cvtColor((cv2.resize(images[i], (224, 224), interpolation=cv2.INTER_CUBIC)), cv2.COLOR_BGR2RGB)
# save images to each directory
for i in range(size):
cv2.imwrite(folder_name + "/" + label_names[label[i]].decode('utf-8') + '/batch_' + str(index) + "_" + str(i) + '.jpg', images224[i])
process(['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5'], "train")
process(['test_batch'], "test")
os.chdir(root)
# %%
def cifar100():
## download training dataset
_ = datasets.CIFAR100(root='./data', train=False, download=True)
root = os.getcwd()
os.chdir("./data/cifar-100-python")
os.rename("test", "test.pkl")
os.rename("train", "train.pkl")
os.rename("meta", "meta.pkl")
def process(pkl_file, folder_name):
images = pkl_file[b'data']
size = images.shape[0]
# %%
images = images.reshape(size, 3, 32, 32).transpose(0,2,3,1).astype("uint8")
# %%
# resize dataset to 224x224
images224 = np.zeros((size, 224, 224, 3), dtype=np.uint8)
for i in range(size):
images224[i] = cv2.cvtColor((cv2.resize(images[i], (224, 224), interpolation=cv2.INTER_CUBIC)), cv2.COLOR_BGR2RGB)
os.makedirs(folder_name, exist_ok=True)
# save dataset to jpeg files
for i in range(size):
cv2.imwrite(folder_name + '/' + str(i) + '.jpg', images224[i])
# %%
# save the 224x224 images into each class folder
# create a folder for each class
labels = pkl_file[b'fine_labels']
class_names = unpickle("meta.pkl")[b'fine_label_names']
for i in range(100):
# os.makedirs("train/" + class_names[i].decode("utf-8"))
os.makedirs(folder_name + "/" + class_names[i].decode("utf-8"), exist_ok=True)
for i in range(size):
shutil.move(folder_name + "/" + str(i) + ".jpg", folder_name + "/" + class_names[labels[i]].decode("utf-8") + "/" + str(i) + ".png")
process(unpickle("train.pkl"), "train")
process(unpickle("test.pkl"), "test")
os.chdir(root)
# %%
if __name__ == "__main__":
cifar10()
cifar100()
@CookieLau
Copy link
Author

The final directory tree looks like this:
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment