Last active
April 5, 2020 14:31
-
-
Save Ehsan1997/e49cc97d1e2f6ece090a437a604a059c to your computer and use it in GitHub Desktop.
A small code to create a new dataset with smaller patches for semantic segmentation dataset. Another code to read the data from the new dataset.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This function can be used to read the images from the dataset created by the patchify_dataset.py. | |
Example Usage: | |
X_test, y_test = get_images("ModMonuSeg/Test/", (im_width, im_height), gt_extension='png') | |
""" | |
def get_images(parent_dir, im_shape, img_folder="TissueImages/", gt_folder="GroundTruth/", gt_extension=None): | |
tissue_dir = parent_dir + img_folder | |
gt_dir = parent_dir + gt_folder | |
im_width, im_height = im_shape | |
ids = next(os.walk(tissue_dir))[2] | |
print("No. of images = ", len(ids)) | |
X = np.zeros((len(ids), im_height, im_width, 3), dtype=np.float32) | |
y = np.zeros((len(ids), im_height, im_width, 1), dtype=np.float32) | |
# tqdm is used to display the progress bar | |
for n, id_ in tqdm_notebook(enumerate(ids), total=len(ids)): | |
# Load images | |
img = load_img(tissue_dir+id_) | |
x_img = img_to_array(img) | |
x_img = resize(x_img, (im_width, im_height, 3), mode = 'constant', preserve_range = True) | |
# Load masks | |
mask_id_ = id_.split('.')[0] | |
mask_id_ = '-'.join(id_.split('-')[:-1]) + '_bin_mask-'+ id_.split('-')[-1] | |
if gt_extension: | |
mask_id_ = mask_id_.split('.')[0] + '.' + gt_extension | |
mask = img_to_array(load_img(gt_dir+mask_id_, grayscale=True)) | |
mask = resize(mask, (im_width, im_height, 1), mode = 'constant', preserve_range = True) | |
# Save images | |
X[n] = x_img/255.0 | |
y[n] = mask/255.0 | |
return X, y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Used to patchify the dataset of Semantic Segmentation | |
Set the directory for the current dataset, be sure to change the parameters. | |
Give new Dataset Dir, also specify the sub directories. | |
Patch size and Stride can be changed. | |
""" | |
import os | |
from skimage.io import imread, imsave | |
from skimage.util.shape import view_as_windows | |
# Parameters to change. | |
curr_dataset_dir = "MonuSeg/" | |
new_dataset_dir = "ModMonuSeg/" | |
sub_dirs = ["Training/TissueImages/", "Training/GroundTruth/", | |
"Test/TissueImages/", "Test/GroundTruth/"] | |
patch_size = {'rgb': (256,256, 3), 'grayscale': (256,256)} | |
stride = {'rgb': (256//2, 256//2, 3), 'grayscale': (256//2, 256//2)} | |
# Create the new dataset directory | |
os.makedirs(new_dataset_dir, exist_ok=True) | |
for sub_dir in sub_dirs: | |
new_dir_ = new_dataset_dir + sub_dir | |
# Create sub_dir in the new dataset directory | |
os.makedirs(new_dir_, exist_ok=True) | |
dir_ = curr_dataset_dir + sub_dir | |
ids = next(os.walk(dir_))[2] | |
for id_ in ids: | |
img = imread(dir_ + id_) | |
img_type = 'rgb' if img.ndim == 3 else 'grayscale' | |
new_imgs = view_as_windows(img, patch_size[img_type], stride[img_type]) | |
new_imgs = new_imgs.reshape(-1, 256, 256, 3) if img.ndim == 3 else new_imgs.reshape(-1, 256, 256) | |
for i, patch in enumerate(new_imgs): | |
file_name, extension = id_.split('.') | |
new_file_name = file_name + f'-PCH{i}.' + extension | |
imsave(new_dir_ + new_file_name, patch) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment