Skip to content

Instantly share code, notes, and snippets.

@NISH1001
Created February 16, 2018 15:13
Show Gist options
  • Save NISH1001/ac18381016b70d02bdc272b5afcdf285 to your computer and use it in GitHub Desktop.
Save NISH1001/ac18381016b70d02bdc272b5afcdf285 to your computer and use it in GitHub Desktop.
create dataset
import os
import cv2
from shutil import copyfile
from random import randint,shuffle
import string
def create_dir(path):
if not os.path.exists(path):
os.makedirs(path)
def create_dirs(path):
vowels = "अ आ इ ई उ ऊ ए ऐ ओ औ अं अ:".split()
consonants = "क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह क्ष त्र ज्ञ".split()
nums = [ str(i) for i in range(0, 10) ]
dirs = vowels + consonants + nums
for d in dirs:
create_dir(path + '/' + d)
def create_pathmap():
root = 'ncr-original/'
dst = 'data-keras/'
create_dirs(dst + 'train')
create_dirs(dst + 'val')
classes = os.listdir(root)
pathmap = {}
# dtype => vowels, consonants, nums
for dtype in classes:
# labels => क, ख, etc...
labels = os.listdir(root + dtype)
for label in labels:
img_path = root + dtype + '/' + label + '/'
for image in os.listdir(img_path):
load_path = img_path + image
try:
l = pathmap[label]
except KeyError:
pathmap[label] = []
pathmap[label].append(load_path)
return pathmap
def create_dataset2(pathmap, train=0.8, val=0.2):
root = 'ncr-original/'
dst = 'data-keras/'
dst_train = dst + 'train'
dst_val = dst + 'val'
create_dirs(dst_train)
create_dirs(dst_val)
for label in pathmap:
load_paths_all = pathmap[label]
images_num = len(load_paths_all)
train_idx = int(images_num * train)
val_idx = int(images_num * val)
load_paths_train = load_paths_all[:train_idx]
load_paths_val = load_paths_all[train_idx : train_idx + val_idx]
print(label, images_num, len(load_paths_train), len(load_paths_val))
# train set
for img_path in load_paths_train:
fname = str(randint(0,999999999999))
img = cv2.imread(img_path)
write_path = dst_train + '/' + label + '/' + fname + '.jpg'
print(write_path)
cv2.imwrite(write_path, img)
# val set
for img_path in load_paths_val:
fname = str(randint(0,999999999999))
img = cv2.imread(img_path)
write_path = dst_val + '/' + label + '/' + fname + '.jpg'
print(write_path)
cv2.imwrite(write_path, img)
def main():
pathmap = create_pathmap()
create_dataset2(pathmap, train=0.8, val=0.2)
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment