Skip to content

Instantly share code, notes, and snippets.

@PranjalDureja0002
Created May 7, 2021 11:23
Show Gist options
  • Save PranjalDureja0002/7a3ba031dce37485a599d2104126bf00 to your computer and use it in GitHub Desktop.
Save PranjalDureja0002/7a3ba031dce37485a599d2104126bf00 to your computer and use it in GitHub Desktop.
mask
destination_t = '\content\drive\MyDrive\cs2_table'
destination_c = '\content\drive\MyDrive\cs2_col'
for i in df_org['filename'].unique():
# for each unique file, we take the height,width,depth from dataframe
file_width = int(df_org[df_org['filename']==i]['width'].unique())
file_height = int(df_org[df_org['filename']==i]['height'].unique())
# Creating an image array of dtype int32
col_mask = np.zeros((file_height,file_width), dtype=np.int32)
table_mask = np.zeros((file_height,file_width), dtype = np.int32)
#All the tabular coordinates assiciated with each unique file get added to lists
xmin_ = df_org[df_org['filename']==i]['xmin'].tolist()
ymin_ = df_org[df_org['filename']==i]['ymin'].tolist()
xmax_ = df_org[df_org['filename']==i]['xmax'].tolist()
ymax_ = df_org[df_org['filename']==i]['ymax'].tolist()
# Corresponding min/max is performed to finalize the coordinates for each unique file
xmin_resultant = int(min(df_org[df_org['filename']==i]['xmin'].tolist()))
ymin_resultant = int(min(df_org[df_org['filename']==i]['ymin'].tolist()))
xmax_resultant = int(max(df_org[df_org['filename']==i]['xmax'].tolist()))
ymax_resultant = int(max(df_org[df_org['filename']==i]['ymax'].tolist()))
# Here we leverage the min/max bndbox coordinates and the masked portion of image(table),
# is given the value 255 as compared to the rest of the part having value 0
# For column detection within tables, we take into account all the bndbox coordinates in the lists we formed
# Just like table masks, here we too give value 255 for the masked portion
for k in range(len(ymin_)):
col_mask[ymin_[k]:ymax_[k],xmin_[k]:xmax_[k]]=255
table_mask[ymin_resultant:ymax_resultant,xmin_resultant:xmax_resultant]=255
#Using imageio to save the resultant table masks and column masks
imageio.imsave(destination_t + '\\' + i[:-4] + '.jpeg',table_mask)
imageio.imsave(destination_c + '\\' + i[:-4] + '.jpeg',column_mask)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment