timehaven’s gists

timehaven / akmtdfgen.py

Last active August 24, 2023 17:14

kmtdfgen: Keras multithreaded dataframe generator

	"""akmtdfgen: A Keras multithreaded dataframe generator.

	Works with Python 2.7 and Keras 2.x.

	For Python 3.x, need to fiddle with the threadsafe generator code.


	Test the generator_from_df() functions by running this file:

	python akmtdfgen.py

timehaven / file_path_from_db_id.py

Created July 19, 2017 15:29

	def file_path_from_db_id(db_id, pattern="blah_%d.png", top="/path/to/imgs"):
	"""Return file path /top/yyy/xx/blah_zzzxxyyy.png for db_id zzzxxyyy.

	The idea is to hash into 1k top level dirs, 000 - 999, then 100
	second level dirs, 00-99, so that the following database ids
	result in the associated file paths:

	1234567 /path/to/imgs/567/34/blah_1234567.png
	432 /path/to/imgs/432/00/blah_432.png
	29847 /path/to/imgs/847/29/blah_29847.png

timehaven / run0.sh

Created July 19, 2017 15:31

	cd /path/to/demo/data/.. # To be consistent with above.

	# If you are on an AWS Deep Learning AMI, Keras must be upgraded to 2.x:
	# sudo pip install keras --upgrade --no-deps

	curl -o classifier_from_little_data_script_1.py \
	https://gist.githubusercontent.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d/raw/47d3e33764c902ed33a64f35f5f68d911de05d8d/classifier_from_little_data_script_1.py

	python2 classifier_from_little_data_script_1.py

timehaven / run1.sh

Created July 19, 2017 15:32

	df_train.sample(5)

	imgpath target orig label
	object_id
	1797 /tmp/path/to/imgs/797/01/cat_1797.jpg 0 train/cats/cat.1797.jpg cat
	1678 /tmp/path/to/imgs/678/01/cat_1678.jpg 0 train/cats/cat.1678.jpg cat
	1348 /tmp/path/to/imgs/348/01/dog_1348.jpg 1 train/dogs/dog.1348.jpg dog
	1430 /tmp/path/to/imgs/430/01/cat_1430.jpg 0 train/cats/cat.1430.jpg cat
	1664 /tmp/path/to/imgs/664/01/cat_1664.jpg 0 train/cats/cat.1664.jpg cat
	In [124]:

timehaven / run2.py

Last active July 19, 2017 15:38

	train_datagen = ImageDataGenerator(...)
	test_datagen = ImageDataGenerator(rescale=1. / 255)
	train_generator = train_datagen.flow_from_directory(train_data_dir, ...)
	validation_generator = test_datagen.flow_from_directory(validation_data_dir, ...)

timehaven / run3.py

Created July 19, 2017 15:34

	#
	# New generator with file path list in DataFrame.
	#
	from akmtdfgen import get_demo_data
	from akmtdfgen import generator_from_df

	df_train, df_valid = get_demo_data()

	target_size = (img_width, img_height)
	train_generator = generator_from_df(df_train, batch_size, target_size)

timehaven / run4.sh

Created July 19, 2017 15:34

	git clone https://gist.github.com/257eef5b0e2d9e2625a9eb812ca2226b.git akmtdfgen
	mv data akmtdfgen
	cd akmtdfgen
	python classifier_from_little_data_script_dfgen.py

	Using TensorFlow backend.
	Some samples:

	df_train:
	object_id imgpath target orig label

timehaven / run5.sh

Created July 19, 2017 15:39

	python classifier_from_little_data_script_dfgen.py
	Using TensorFlow backend.
	9936 train dog
	9936 train cat
	2564 validation dog
	2564 validation cat
	Some samples:

	df_train:
	object_id imgpath target orig label

timehaven / mlasciiart.txt

Created July 19, 2017 15:42

timehaven / output0.sh

Created July 19, 2017 15:44

	df['path'] = df.object_id.apply(file_path_from_db_id)
	df

	object_id bi multi path
	index
	0 461756 dog white /path/to/imgs/756/61/blah_461756.png
	1 1161756 cat black /path/to/imgs/756/61/blah_1161756.png
	2 3303651 dog white /path/to/imgs/651/03/blah_3303651.png
	3 3367756 dog grey /path/to/imgs/756/67/blah_3367756.png
	4 3767756 dog grey /path/to/imgs/756/67/blah_3767756.png