Last active
December 18, 2017 22:43
-
-
Save ahmedhosny/508633da85df164cc0763e3be38b72cd to your computer and use it in GitHub Desktop.
Converts nrrd to hdf5 (without metadata)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# assumes CT image nrrds with minimum value of -1024 | |
# assumes CT mask nrrds with 0's and 1's | |
# assumes the shape of each patient data (iamge and mask) are different - therefore, | |
# this will pad all images and masks to the size of the largest | |
# does not preform any interpolation to isotrpic voxels or any normalization | |
# only saves the image and mask, therefore the metadata and pixel spacing is lost | |
import nrrd # pip install pynrrd # probably better performance with sitk | |
import numpy as np | |
import glob | |
import h5py | |
# input | |
# both folder should have the same number of files in the same order.. obviously.. | |
# folder with image nrrd files | |
image_nrrd_folder = '/path/..' | |
# folder with mask nrrd files | |
mask_nrrd_folder = '/path/..' | |
# output | |
output = 'output.hdf5' | |
# dataset name | |
dataset = "someName" | |
# substitute value if larger | |
def addLargest(value,variable): | |
if value>variable: | |
variable=value | |
return variable | |
# globs | |
images = glob.glob(image_nrrd_folder + '/*') | |
masks = glob.glob(mask_nrrd_folder + '/*') | |
# sanity | |
assert(len(images)==len(masks)) | |
# | |
# suggest interpolation and normalization to happen here | |
# | |
#get largest dims | |
largest_dim_0 = 0 | |
largest_dim_1 = 0 | |
largest_dim_2 = 0 | |
# loop through both folders to get the largest shape | |
for image,mask in zip(images,masks): | |
# read image and mask nrrd | |
image_nrrd = nrrd.read(image) | |
mask_nrrd = nrrd.read(mask) | |
# sanity | |
assert(image_nrrd[0].shape==mask_nrrd[0].shape) | |
# add largest | |
largest_dim_0 = addLargest(image_nrrd[0].shape[0],largest_dim_0) | |
largest_dim_1 = addLargest(image_nrrd[0].shape[1],largest_dim_1) | |
largest_dim_2 = addLargest(image_nrrd[0].shape[2],largest_dim_2) | |
print 'largest shape: ', largest_dim_0, largest_dim_1, largest_dim_2 | |
X = [] | |
Y = [] | |
# loop through both folders to populate the h5 | |
for image,mask in zip(images,masks): | |
# read image and mask nrrd | |
image_nrrd = nrrd.read(image) | |
mask_nrrd = nrrd.read(mask) | |
shape = image_nrrd[0].shape | |
# make new -1024 and zero arrays | |
image_arr = np.full((largest_dim_0, largest_dim_1, largest_dim_2),-1024,dtype=np.float32) | |
mask_arr = np.zeros((largest_dim_0, largest_dim_1, largest_dim_2),dtype=np.int16) | |
# pad them and append | |
image_arr[0:shape[0],0:shape[1],0:shape[2]] = image_nrrd[0] | |
mask_arr[0:shape[0],0:shape[1],0:shape[2]] = mask_nrrd[0] | |
X.append(image_arr) | |
Y.append(mask_arr) | |
print image, " shape is ", shape | |
# | |
# write h5 | |
# | |
h5 = h5py.File(output, "w") | |
# highest level heirarchy | |
h5.create_dataset('X', dtype=np.float32, data=X) | |
h5.create_dataset('Y', dtype=np.int16, data=Y) | |
# close | |
h5.close() | |
# | |
# read h5 | |
# | |
h5 = h5py.File(output, "r") | |
# convert into list of arrays + add a single channel | |
obj = {'X':[],'Y':[]} | |
obj['X']= list( np.expand_dims(np.array(h5['X']), axis=4).astype(np.float32) ) | |
obj['Y']= list( np.expand_dims(np.array(h5['Y']), axis=4).astype(np.int16) ) | |
print len(obj['X']), obj['X'][0].shape, len(obj['Y']), obj['Y'][0].shape | |
# 2 (512, 512, 658, 1) 2 (512, 512, 658, 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment