harshilpatel312 · April 5, 2021 00:49
diff --git a/tf_object_detection.md b/tf_object_detection.md
diff --git a/train_test_splitter.py b/train_test_splitter.py
 '''
 Purpose: Split the dataset into train and test sets
 '''
 import os
 from sklearn.cross_validation import train_test_split
 import argparse
 from tqdm import tqdm
 from shutil import copyfile

 argparser = argparse.ArgumentParser(description='Split dataset into train and test set')
 argparser.add_argument('-a', '--annotations',
                       help='path to annotations\' directory')
 argparser.add_argument('-i', '--images',
                       help='path to images\' directory')
 argparser.add_argument('-o', '--outputdir',
                       help='where do you want your train and test directories?')
 argparser.add_argument('-s', '--testsize',
                       help='test set size % (0 to 1)',
 			default=0.1)

 args = argparser.parse_args()

 # parse arguments
 ANNOTATIONS = args.annotations
 IMAGES = args.images
 OUTPUT_DIR = args.outputdir
 TEST_SET_SIZE = float(args.testsize)

 # create train and test directories
 if not os.path.isdir(os.path.join(OUTPUT_DIR, "train")):
    os.makedirs(os.path.join(OUTPUT_DIR, "train"))
    print("\nCreated {} directory\n".format(os.path.join(OUTPUT_DIR, "train")))

 if not os.path.isdir(os.path.join(OUTPUT_DIR, "test")):
    os.makedirs(os.path.join(OUTPUT_DIR, "test"))
    print("\nCreated {} directory\n".format(os.path.join(OUTPUT_DIR, "test")))

 # get annotations only ending with '.xml'
 annots = []
 for filename in os.listdir(ANNOTATIONS):
 	if filename.endswith('.xml'):
 		annots.append(filename)

 # split the data into test and train
 X = y = annots
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SET_SIZE, random_state=None)

 print("\nTraining set size: ", len(X_train), "\nTest set size: ", len(X_test))

 # copy the files according to the split
 bad_files = 0
 pbar = tqdm(total=len(X_train), position=1, desc="Copying train set..")
 for f in X_train:
    pbar.update(1)

    try:
        # copy annotation
        copyfile(os.path.join(ANNOTATIONS, f), os.path.join(OUTPUT_DIR, 'train', f))

        # copy image
        img_file = f.replace(".xml", ".jpg")
        copyfile(os.path.join(IMAGES, img_file), os.path.join(OUTPUT_DIR, 'train', img_file))
    except:
        bad_files += 1

 pbar = tqdm(total=len(X_test), position=3, desc="Copying test set..")
 for f in X_test:
    pbar.update(1)

    try:
        # copy annotations
        copyfile(os.path.join(ANNOTATIONS, f), os.path.join(OUTPUT_DIR, 'test', f))

        # copy image
        img_file = f.replace(".xml", ".jpg")
        copyfile(os.path.join(IMAGES, img_file), os.path.join(OUTPUT_DIR, 'test', img_file))
    except:
        bad_files += 1

 print("\n\nBad files count: ", bad_files)
	'''
	Purpose: Split the dataset into train and test sets
	'''
	import os
	from sklearn.cross_validation import train_test_split
	import argparse
	from tqdm import tqdm
	from shutil import copyfile

	argparser = argparse.ArgumentParser(description='Split dataset into train and test set')
	argparser.add_argument('-a', '--annotations',
	help='path to annotations\' directory')
	argparser.add_argument('-i', '--images',
	help='path to images\' directory')
	argparser.add_argument('-o', '--outputdir',
	help='where do you want your train and test directories?')
	argparser.add_argument('-s', '--testsize',
	help='test set size % (0 to 1)',
	default=0.1)

	args = argparser.parse_args()

	# parse arguments
	ANNOTATIONS = args.annotations
	IMAGES = args.images
	OUTPUT_DIR = args.outputdir
	TEST_SET_SIZE = float(args.testsize)

	# create train and test directories
	if not os.path.isdir(os.path.join(OUTPUT_DIR, "train")):
	os.makedirs(os.path.join(OUTPUT_DIR, "train"))
	print("\nCreated {} directory\n".format(os.path.join(OUTPUT_DIR, "train")))

	if not os.path.isdir(os.path.join(OUTPUT_DIR, "test")):
	os.makedirs(os.path.join(OUTPUT_DIR, "test"))
	print("\nCreated {} directory\n".format(os.path.join(OUTPUT_DIR, "test")))

	# get annotations only ending with '.xml'
	annots = []
	for filename in os.listdir(ANNOTATIONS):
	if filename.endswith('.xml'):
	annots.append(filename)

	# split the data into test and train
	X = y = annots
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SET_SIZE, random_state=None)

	print("\nTraining set size: ", len(X_train), "\nTest set size: ", len(X_test))

	# copy the files according to the split
	bad_files = 0
	pbar = tqdm(total=len(X_train), position=1, desc="Copying train set..")
	for f in X_train:
	pbar.update(1)

	try:
	# copy annotation
	copyfile(os.path.join(ANNOTATIONS, f), os.path.join(OUTPUT_DIR, 'train', f))

	# copy image
	img_file = f.replace(".xml", ".jpg")
	copyfile(os.path.join(IMAGES, img_file), os.path.join(OUTPUT_DIR, 'train', img_file))
	except:
	bad_files += 1

	pbar = tqdm(total=len(X_test), position=3, desc="Copying test set..")
	for f in X_test:
	pbar.update(1)

	try:
	# copy annotations
	copyfile(os.path.join(ANNOTATIONS, f), os.path.join(OUTPUT_DIR, 'test', f))

	# copy image
	img_file = f.replace(".xml", ".jpg")
	copyfile(os.path.join(IMAGES, img_file), os.path.join(OUTPUT_DIR, 'test', img_file))
	except:
	bad_files += 1

	print("\n\nBad files count: ", bad_files)