Skip to content

Instantly share code, notes, and snippets.

@ahmedhosny
Last active December 18, 2017 22:43
Show Gist options
  • Save ahmedhosny/508633da85df164cc0763e3be38b72cd to your computer and use it in GitHub Desktop.
Save ahmedhosny/508633da85df164cc0763e3be38b72cd to your computer and use it in GitHub Desktop.
Converts nrrd to hdf5 (without metadata)
# assumes CT image nrrds with minimum value of -1024
# assumes CT mask nrrds with 0's and 1's
# assumes the shape of each patient data (iamge and mask) are different - therefore,
# this will pad all images and masks to the size of the largest
# does not preform any interpolation to isotrpic voxels or any normalization
# only saves the image and mask, therefore the metadata and pixel spacing is lost
import nrrd # pip install pynrrd # probably better performance with sitk
import numpy as np
import glob
import h5py
# input
# both folder should have the same number of files in the same order.. obviously..
# folder with image nrrd files
image_nrrd_folder = '/path/..'
# folder with mask nrrd files
mask_nrrd_folder = '/path/..'
# output
output = 'output.hdf5'
# dataset name
dataset = "someName"
# substitute value if larger
def addLargest(value,variable):
if value>variable:
variable=value
return variable
# globs
images = glob.glob(image_nrrd_folder + '/*')
masks = glob.glob(mask_nrrd_folder + '/*')
# sanity
assert(len(images)==len(masks))
#
# suggest interpolation and normalization to happen here
#
#get largest dims
largest_dim_0 = 0
largest_dim_1 = 0
largest_dim_2 = 0
# loop through both folders to get the largest shape
for image,mask in zip(images,masks):
# read image and mask nrrd
image_nrrd = nrrd.read(image)
mask_nrrd = nrrd.read(mask)
# sanity
assert(image_nrrd[0].shape==mask_nrrd[0].shape)
# add largest
largest_dim_0 = addLargest(image_nrrd[0].shape[0],largest_dim_0)
largest_dim_1 = addLargest(image_nrrd[0].shape[1],largest_dim_1)
largest_dim_2 = addLargest(image_nrrd[0].shape[2],largest_dim_2)
print 'largest shape: ', largest_dim_0, largest_dim_1, largest_dim_2
X = []
Y = []
# loop through both folders to populate the h5
for image,mask in zip(images,masks):
# read image and mask nrrd
image_nrrd = nrrd.read(image)
mask_nrrd = nrrd.read(mask)
shape = image_nrrd[0].shape
# make new -1024 and zero arrays
image_arr = np.full((largest_dim_0, largest_dim_1, largest_dim_2),-1024,dtype=np.float32)
mask_arr = np.zeros((largest_dim_0, largest_dim_1, largest_dim_2),dtype=np.int16)
# pad them and append
image_arr[0:shape[0],0:shape[1],0:shape[2]] = image_nrrd[0]
mask_arr[0:shape[0],0:shape[1],0:shape[2]] = mask_nrrd[0]
X.append(image_arr)
Y.append(mask_arr)
print image, " shape is ", shape
#
# write h5
#
h5 = h5py.File(output, "w")
# highest level heirarchy
h5.create_dataset('X', dtype=np.float32, data=X)
h5.create_dataset('Y', dtype=np.int16, data=Y)
# close
h5.close()
#
# read h5
#
h5 = h5py.File(output, "r")
# convert into list of arrays + add a single channel
obj = {'X':[],'Y':[]}
obj['X']= list( np.expand_dims(np.array(h5['X']), axis=4).astype(np.float32) )
obj['Y']= list( np.expand_dims(np.array(h5['Y']), axis=4).astype(np.int16) )
print len(obj['X']), obj['X'][0].shape, len(obj['Y']), obj['Y'][0].shape
# 2 (512, 512, 658, 1) 2 (512, 512, 658, 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment