PranjalDureja0002 · May 7, 2021 11:23
diff --git a/mask.py b/mask.py
 destination_t = '\content\drive\MyDrive\cs2_table'
 destination_c = '\content\drive\MyDrive\cs2_col'


 for i in df_org['filename'].unique():

    # for each unique file, we take the height,width,depth from dataframe
    file_width = int(df_org[df_org['filename']==i]['width'].unique())
    file_height = int(df_org[df_org['filename']==i]['height'].unique())
    # Creating an image array of dtype int32
    col_mask = np.zeros((file_height,file_width), dtype=np.int32)
    table_mask = np.zeros((file_height,file_width), dtype = np.int32) 
    
    #All the tabular coordinates assiciated with each unique file get added to lists
    xmin_ = df_org[df_org['filename']==i]['xmin'].tolist()
    ymin_ = df_org[df_org['filename']==i]['ymin'].tolist()
    xmax_ = df_org[df_org['filename']==i]['xmax'].tolist()
    ymax_ = df_org[df_org['filename']==i]['ymax'].tolist()
    
    # Corresponding min/max is performed to finalize the coordinates for each unique file
    xmin_resultant = int(min(df_org[df_org['filename']==i]['xmin'].tolist()))
    ymin_resultant = int(min(df_org[df_org['filename']==i]['ymin'].tolist()))
    xmax_resultant = int(max(df_org[df_org['filename']==i]['xmax'].tolist()))
    ymax_resultant = int(max(df_org[df_org['filename']==i]['ymax'].tolist()))
    
    # Here we leverage the min/max bndbox coordinates and the masked portion of image(table),
    # is given the value 255 as compared to the rest of the part having value 0   
    # For column detection within tables, we take into account all the bndbox coordinates in the lists we formed
    # Just like table masks, here we too give value 255 for the masked portion 
          
    for k in range(len(ymin_)):
        col_mask[ymin_[k]:ymax_[k],xmin_[k]:xmax_[k]]=255
        
    table_mask[ymin_resultant:ymax_resultant,xmin_resultant:xmax_resultant]=255
    
    #Using imageio to save the resultant table masks and column masks
    imageio.imsave(destination_t + '\\' + i[:-4] + '.jpeg',table_mask)  
    imageio.imsave(destination_c + '\\' + i[:-4] + '.jpeg',column_mask)
	destination_t = '\content\drive\MyDrive\cs2_table'
	destination_c = '\content\drive\MyDrive\cs2_col'


	for i in df_org['filename'].unique():

	# for each unique file, we take the height,width,depth from dataframe
	file_width = int(df_org[df_org['filename']==i]['width'].unique())
	file_height = int(df_org[df_org['filename']==i]['height'].unique())
	# Creating an image array of dtype int32
	col_mask = np.zeros((file_height,file_width), dtype=np.int32)
	table_mask = np.zeros((file_height,file_width), dtype = np.int32)

	#All the tabular coordinates assiciated with each unique file get added to lists
	xmin_ = df_org[df_org['filename']==i]['xmin'].tolist()
	ymin_ = df_org[df_org['filename']==i]['ymin'].tolist()
	xmax_ = df_org[df_org['filename']==i]['xmax'].tolist()
	ymax_ = df_org[df_org['filename']==i]['ymax'].tolist()

	# Corresponding min/max is performed to finalize the coordinates for each unique file
	xmin_resultant = int(min(df_org[df_org['filename']==i]['xmin'].tolist()))
	ymin_resultant = int(min(df_org[df_org['filename']==i]['ymin'].tolist()))
	xmax_resultant = int(max(df_org[df_org['filename']==i]['xmax'].tolist()))
	ymax_resultant = int(max(df_org[df_org['filename']==i]['ymax'].tolist()))

	# Here we leverage the min/max bndbox coordinates and the masked portion of image(table),
	# is given the value 255 as compared to the rest of the part having value 0
	# For column detection within tables, we take into account all the bndbox coordinates in the lists we formed
	# Just like table masks, here we too give value 255 for the masked portion

	for k in range(len(ymin_)):
	col_mask[ymin_[k]:ymax_[k],xmin_[k]:xmax_[k]]=255

	table_mask[ymin_resultant:ymax_resultant,xmin_resultant:xmax_resultant]=255

	#Using imageio to save the resultant table masks and column masks
	imageio.imsave(destination_t + '\\' + i[:-4] + '.jpeg',table_mask)
	imageio.imsave(destination_c + '\\' + i[:-4] + '.jpeg',column_mask)