jszym · June 4, 2020 17:38
diff --git a/split.py b/split.py
 # a library for discovering paths
 from glob import glob
 from sklearn.model_selection import train_test_split

 # you may need to look up the documentation for glob
 # "*" is a stand=in for any string
 # this assumes that the subfolders are in the same folder as the script
 # if the subfolders were in a folder "data", the argument to glob would be
 # "./data/*.png"
 paths = glob("./*/*.png")

 # >>> paths[:3]
 # ['.\\A\\a29ydW5pc2hpLnR0Zg==.png', '.\\A\\a2F6b28udHRm.png', '.\\A\\a2FpcmVlLnR0Zg==.png']
 # The double backslashes is because I'm on a PC but they would be forward slashes on mac/linux

 # we need to seperately generate the labels.
 # to do this, we need to get the labels from the path.
 # I'll just split based the backslashes "\\", use "/" for mac/linux
 # with these paths, it's the second element that has the class
 # If the subfolders are in another folder, you might need to use e.g. the third element
 labels = [path.split("\\")[1] for path in paths]

 # now we can use sklearn's split method
 x_train, x_test, y_train, y_test = train_test_split(paths, labels, test_size=0.2, random_state=42)
	# a library for discovering paths
	from glob import glob
	from sklearn.model_selection import train_test_split

	# you may need to look up the documentation for glob
	# "*" is a stand=in for any string
	# this assumes that the subfolders are in the same folder as the script
	# if the subfolders were in a folder "data", the argument to glob would be
	# "./data/*.png"
	paths = glob(".//.png")

	# >>> paths[:3]
	# ['.\\A\\a29ydW5pc2hpLnR0Zg==.png', '.\\A\\a2F6b28udHRm.png', '.\\A\\a2FpcmVlLnR0Zg==.png']
	# The double backslashes is because I'm on a PC but they would be forward slashes on mac/linux

	# we need to seperately generate the labels.
	# to do this, we need to get the labels from the path.
	# I'll just split based the backslashes "\\", use "/" for mac/linux
	# with these paths, it's the second element that has the class
	# If the subfolders are in another folder, you might need to use e.g. the third element
	labels = [path.split("\\")[1] for path in paths]

	# now we can use sklearn's split method
	x_train, x_test, y_train, y_test = train_test_split(paths, labels, test_size=0.2, random_state=42)