Created
April 3, 2019 07:24
-
-
Save arcaduf/860ae7d2fef2a8b4f9e806a65efd514b to your computer and use it in GitHub Desktop.
Prepare Kaggle Ultrasound Nerve Segmentation dataset for DL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Link image data to ground truth | |
''' | |
from __future__ import print_function | |
import glob , os | |
import pandas as pd | |
import numpy as np | |
# User input | |
path_in = '<path to>/ultrasound_data/train/' | |
path_out = '<output path>' | |
SEP = ',' | |
# Collect all images and masks in given path | |
list_all = sorted( glob.glob( os.path.join( path_in , '*.tif' ) ) ) | |
list_masks = sorted( glob.glob( os.path.join( path_in , '*mask*.tif' ) ) ) | |
list_imgs = sorted( list( set( list_all ) - set( list_masks ) ) ) | |
print( '\nFound ', len( list_imgs ),' images' ) | |
print( 'Found ', len( list_masks ) , ' masks' ) | |
# Collect IDs and image acquisition numbers from all images and masks | |
list_ids_imgs = []; list_nums_imgs = [] | |
list_ids_masks = []; list_nums_masks = [] | |
get_id = lambda file_name: np.int( os.path.basename( file_name ).split( '_' )[0] ) | |
def get_num( file_name ): | |
if 'mask' in file_name: | |
return np.int( os.path.basename( file_name ).split( '_' )[1] ) | |
else: | |
return np.int( os.path.basename( file_name ).split( '_' )[1].split( '.' )[0] ) | |
for i in range( len( list_imgs ) ): | |
list_ids_imgs.append( get_id( list_imgs[i] ) ) | |
list_nums_imgs.append( get_num( list_imgs[i] ) ) | |
list_ids_masks.append( get_id( list_masks[i] ) ) | |
list_nums_masks.append( get_num( list_masks[i] ) ) | |
# Create a data frame for images and one for masks, then merge | |
df_imgs = pd.DataFrame( { 'id' : list_ids_imgs , | |
'image' : list_imgs , | |
'number' : list_nums_imgs } ) | |
df_masks = pd.DataFrame( { 'id' : list_ids_masks , | |
'mask' : list_masks , | |
'number' : list_nums_masks } ) | |
df = pd.merge( df_imgs , df_masks , on=[ 'id' , 'number' ] ) | |
print( '\nMerge data frame shape: ', df.shape ) | |
# Save merged data frame | |
file_out = os.path.join( path_out , 'master_index_imgs_and_masks.csv' ) | |
df.to_csv( file_out , sep=SEP , index=False ) | |
print( '\n\n' ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment