Created
June 2, 2021 21:20
-
-
Save nateraw/1ff06cf09ac6ffe0a2bf8b84f82a4632 to your computer and use it in GitHub Desktop.
Read in cats and dogs, convert images to bytes, save as pickle to upload to HuggingFace's Datasets Hub.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
from pathlib import Path | |
import tensorflow as tf | |
root = Path('./PetImages') | |
num_skipped = 0 | |
examples = [] | |
for folder_name in ("Cat", "Dog"): | |
for fpath in (root / folder_name).glob('*'): | |
with fpath.open('rb') as f: | |
is_jfif = tf.compat.as_bytes('JFIF') in f.peek(10) | |
if is_jfif: | |
examples.append({'img_bytes': f.read(), 'labels': folder_name.lower()}) | |
continue | |
num_skipped+=1 | |
fpath.unlink() | |
print("Deleted %d images" % num_skipped) | |
with Path("train.pt").open('wb') as f: | |
pickle.dump(examples, f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment