Created
February 16, 2018 15:13
-
-
Save NISH1001/ac18381016b70d02bdc272b5afcdf285 to your computer and use it in GitHub Desktop.
create dataset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import cv2 | |
from shutil import copyfile | |
from random import randint,shuffle | |
import string | |
def create_dir(path): | |
if not os.path.exists(path): | |
os.makedirs(path) | |
def create_dirs(path): | |
vowels = "अ आ इ ई उ ऊ ए ऐ ओ औ अं अ:".split() | |
consonants = "क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह क्ष त्र ज्ञ".split() | |
nums = [ str(i) for i in range(0, 10) ] | |
dirs = vowels + consonants + nums | |
for d in dirs: | |
create_dir(path + '/' + d) | |
def create_pathmap(): | |
root = 'ncr-original/' | |
dst = 'data-keras/' | |
create_dirs(dst + 'train') | |
create_dirs(dst + 'val') | |
classes = os.listdir(root) | |
pathmap = {} | |
# dtype => vowels, consonants, nums | |
for dtype in classes: | |
# labels => क, ख, etc... | |
labels = os.listdir(root + dtype) | |
for label in labels: | |
img_path = root + dtype + '/' + label + '/' | |
for image in os.listdir(img_path): | |
load_path = img_path + image | |
try: | |
l = pathmap[label] | |
except KeyError: | |
pathmap[label] = [] | |
pathmap[label].append(load_path) | |
return pathmap | |
def create_dataset2(pathmap, train=0.8, val=0.2): | |
root = 'ncr-original/' | |
dst = 'data-keras/' | |
dst_train = dst + 'train' | |
dst_val = dst + 'val' | |
create_dirs(dst_train) | |
create_dirs(dst_val) | |
for label in pathmap: | |
load_paths_all = pathmap[label] | |
images_num = len(load_paths_all) | |
train_idx = int(images_num * train) | |
val_idx = int(images_num * val) | |
load_paths_train = load_paths_all[:train_idx] | |
load_paths_val = load_paths_all[train_idx : train_idx + val_idx] | |
print(label, images_num, len(load_paths_train), len(load_paths_val)) | |
# train set | |
for img_path in load_paths_train: | |
fname = str(randint(0,999999999999)) | |
img = cv2.imread(img_path) | |
write_path = dst_train + '/' + label + '/' + fname + '.jpg' | |
print(write_path) | |
cv2.imwrite(write_path, img) | |
# val set | |
for img_path in load_paths_val: | |
fname = str(randint(0,999999999999)) | |
img = cv2.imread(img_path) | |
write_path = dst_val + '/' + label + '/' + fname + '.jpg' | |
print(write_path) | |
cv2.imwrite(write_path, img) | |
def main(): | |
pathmap = create_pathmap() | |
create_dataset2(pathmap, train=0.8, val=0.2) | |
if __name__=='__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment