Created
November 29, 2017 22:28
-
-
Save mpeven/f23eeda2ac91f494da6257de5e0791b0 to your computer and use it in GitHub Desktop.
oh no, this algo, for voxel flow, is too slow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Total time: 425.367 s | |
File: ntu_rgb.py | |
Function: get_voxel_flow_full at line 699 | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
699 @profile | |
700 def get_voxel_flow_full(self, vid_id): | |
701 ''' Create a voxel grid with displacement vectors ''' | |
702 | |
703 ############################################################## | |
704 # Create a map from rgb pixels to the depth camera xyz coordinate at | |
705 # that pixel. | |
706 # | |
707 # rgb_xyz : ndarray, shape [#frames, 1080, 1920, 3] | |
708 ############################################################## | |
709 | |
710 # Get metadata - for rotation and translation matrices | |
711 1 3 3.0 0.0 for metadatum in self.metadata: | |
712 1 2 2.0 0.0 if metadatum['video_index'] == vid_id: | |
713 1 2 2.0 0.0 m = metadatum | |
714 1 1 1.0 0.0 break | |
715 | |
716 # Get depth images | |
717 1 98035 98035.0 0.0 depth_ims = self.get_depth_images(vid_id) | |
718 1 15116 15116.0 0.0 depth_ims = depth_ims.astype(np.float32)/1000.0 | |
719 | |
720 # Make background negative so can discriminate between background | |
721 # and empty values | |
722 1 21748 21748.0 0.0 depth_ims[depth_ims == 0] = -1000 | |
723 | |
724 # Constants - image size | |
725 1 4 4.0 0.0 frames, H_depth, W_depth = depth_ims.shape | |
726 1 1 1.0 0.0 W_rgb, H_rgb = 1920, 1080 | |
727 | |
728 # Depth --> Depth-camera coordinates | |
729 1 1279 1279.0 0.0 Y, X = np.mgrid[0:H_depth, 0:W_depth] | |
730 1 38508 38508.0 0.0 x_3D = (X - cx_d) * depth_ims / fx_d | |
731 1 38214 38214.0 0.0 y_3D = (Y - cy_d) * depth_ims / fy_d | |
732 | |
733 # Apply rotation and translation | |
734 1 127127 127127.0 0.0 xyz_d = np.stack([x_3D, y_3D, depth_ims], axis=3) | |
735 1 395056 395056.0 0.1 xyz_rgb = m['T']*m['scale'] + m['R'] @ xyz_d[:,:,:,:,np.newaxis] | |
736 | |
737 # RGB-camera coordinates --> RGB pixel coordinates | |
738 1 81545 81545.0 0.0 x_rgb = (xyz_rgb[:,:,:,0] * rgb_mat[0,0] / xyz_rgb[:,:,:,2]) + rgb_mat[0,2] | |
739 1 81560 81560.0 0.0 y_rgb = (xyz_rgb[:,:,:,1] * rgb_mat[1,1] / xyz_rgb[:,:,:,2]) + rgb_mat[1,2] | |
740 1 10551 10551.0 0.0 x_rgb[x_rgb >= W_rgb] = 0 | |
741 1 13393 13393.0 0.0 y_rgb[y_rgb >= H_rgb] = 0 | |
742 | |
743 # Fill in sparse array | |
744 1 19 19.0 0.0 rgb_xyz_sparse = np.zeros([frames, H_rgb, W_rgb, 3]) | |
745 104 158 1.5 0.0 for frame in range(frames): | |
746 43775 63296 1.4 0.0 for y in range(H_depth): | |
747 22403736 32579589 1.5 7.7 for x in range(W_depth): | |
748 22360064 57692722 2.6 13.6 rgb_xyz_sparse[frame, int(y_rgb[frame,y,x]), int(x_rgb[frame,y,x])] = xyz_d[frame, y, x] | |
749 | |
750 # Fill in the rest of the sparse matrix | |
751 1 262385 262385.0 0.1 invalid = (rgb_xyz_sparse == 0) | |
752 1 98149523 98149523.0 23.1 ind = scipy.ndimage.distance_transform_edt(invalid, return_distances=False, return_indices=True) | |
753 1 5763444 5763444.0 1.4 rgb_xyz = rgb_xyz_sparse[tuple(ind)] | |
754 | |
755 # Remove background values by zeroing them out | |
756 1 3941410 3941410.0 0.9 rgb_xyz[rgb_xyz[:,:,:,2] < 0] = 0 | |
757 | |
758 ############################################################## | |
759 ############################################################## | |
760 | |
761 | |
762 | |
763 | |
764 ############################################################## | |
765 # Create 2D optical flow vectors for a video in an ndarray of size: | |
766 # [video_frames - 1 * 2 * vid_height * vid_width] | |
767 ############################################################## | |
768 | |
769 1 748449 748449.0 0.2 vid = self.get_rgb_vid_images(vid_id, True) | |
770 1 5 5.0 0.0 prev_frame = vid[0] | |
771 1 2 2.0 0.0 flow = None | |
772 1 18 18.0 0.0 op_flow_2D = np.zeros([len(vid) - 1, 2, vid.shape[1], vid.shape[2]]) | |
773 102 11969 117.3 0.0 for kk in tqdm(range(1,len(vid)-1), "Building 2D optical flow tensor"): | |
774 101 590 5.8 0.0 flow = cv2.calcOpticalFlowFarneback(vid[kk-1], vid[kk], flow, 0.4, | |
775 101 44015172 435793.8 10.3 1, 15, 3, 8, 1.2, cv2.OPTFLOW_FARNEBACK_GAUSSIAN) | |
776 101 285744 2829.1 0.1 op_flow_2D[kk-1,0,:,:] = flow[:,:,0].copy() | |
777 101 287546 2847.0 0.1 op_flow_2D[kk-1,1,:,:] = flow[:,:,1].copy() | |
778 | |
779 ############################################################## | |
780 ############################################################## | |
781 | |
782 | |
783 | |
784 | |
785 ############################################################## | |
786 # Create 3D Optical flow | |
787 # | |
788 # op_flow_3D : list (length = #frames in video-1) of ndarrays | |
789 # (shape: optical_flow_arrows * 6) (6 --> x,y,z,dx,dy,dz) | |
790 ############################################################## | |
791 | |
792 # Build list of framewise 3D optical flow vectors | |
793 1 2 2.0 0.0 op_flow_3D = [] | |
794 | |
795 # Note: starting at frame 2 (flow maps start at previous frame) | |
796 102 11455 112.3 0.0 for frame in tqdm(range(1, op_flow_2D.shape[0]), "Building 3D optical flow tensor"): | |
797 # Only look at non-zero rgb points | |
798 101 989773 9799.7 0.2 rgb_nonzero = np.nonzero(rgb_xyz[frame,:,:,2]) | |
799 101 401693 3977.2 0.1 flow_vectors = [] | |
800 | |
801 4655492 7990925 1.7 1.9 for u, v in zip(rgb_nonzero[1], rgb_nonzero[0]): | |
802 # Get optical flow vector | |
803 4655391 13235971 2.8 3.1 du, dv = op_flow_2D[frame, :, v, u] | |
804 | |
805 # Get start and end position in 3D using the flow map vector | |
806 4655391 28725156 6.2 6.8 p0 = rgb_xyz[frame - 1, int(v - dv), int(u - du)] | |
807 4655391 8821552 1.9 2.1 if p0[2] == 0: continue # Only want vectors that started at a non-zero point | |
808 4436983 7847816 1.8 1.8 p1 = rgb_xyz[frame, v, u] | |
809 | |
810 # Get displacement vector (if norm is larger than theshold) | |
811 4436983 55542843 12.5 13.1 dp = np.array([0,0,0]) if np.linalg.norm([du,dv]) < 1.0 else p1 - p0 | |
812 | |
813 4436983 12725870 2.9 3.0 flow_vectors.append(np.concatenate([p0, dp])) | |
814 | |
815 # Stack list of flow vectors into one array | |
816 101 4575558 45302.6 1.1 op_flow_3D.append(np.stack(flow_vectors)) | |
817 | |
818 # Zero mean x y & z (the starting point) | |
819 1 22026 22026.0 0.0 all_vecs = np.concatenate(op_flow_3D) | |
820 1 46273 46273.0 0.0 m = np.mean(all_vecs, axis=0) | |
821 102 160 1.6 0.0 for frame in range(len(op_flow_3D)): | |
822 101 15932 157.7 0.0 op_flow_3D[frame][:,0] -= m[0] | |
823 101 14144 140.0 0.0 op_flow_3D[frame][:,1] -= m[1] | |
824 101 4915 48.7 0.0 op_flow_3D[frame][:,2] -= m[2] | |
825 | |
826 ############################################################## | |
827 ############################################################## | |
828 | |
829 | |
830 | |
831 | |
832 | |
833 ############################################################## | |
834 # Map optical flow to a voxel grid | |
835 # | |
836 # voxel_flow : ndarray, shape [#frames, 100, 100, 100, 4] | |
837 ############################################################## | |
838 | |
839 1 2 2.0 0.0 VOXEL_SIZE = 100 | |
840 | |
841 # Pull useful stats out of optical flow | |
842 1 3 3.0 0.0 num_frames = len(op_flow_3D) | |
843 1 2354924 2354924.0 0.6 all_xyz = np.array([flow[0:3] for frame in op_flow_3D for flow in frame]) | |
844 1 53003 53003.0 0.0 max_x, max_y, max_z = np.max(all_xyz, axis=0) + 0.00001 | |
845 1 33738 33738.0 0.0 min_x, min_y, min_z = np.min(all_xyz, axis=0) | |
846 | |
847 1 1 1.0 0.0 voxel_flow_dicts = [] | |
848 # Fill in the voxel grid | |
849 102 11245 110.2 0.0 for frame in tqdm(range(num_frames), "Filling in Voxel Grid"): | |
850 | |
851 # Interpolate and discretize location of the voxels in the grid | |
852 101 31419 311.1 0.0 vox_x = np.floor((op_flow_3D[frame][:,0] - min_x)/(max_x - min_x) * VOXEL_SIZE).astype(int) | |
853 101 27231 269.6 0.0 vox_y = np.floor((op_flow_3D[frame][:,1] - min_y)/(max_y - min_y) * VOXEL_SIZE).astype(int) | |
854 101 21219 210.1 0.0 vox_z = np.floor((op_flow_3D[frame][:,2] - min_z)/(max_z - min_z) * VOXEL_SIZE).astype(int) | |
855 | |
856 # Get unique tuples of voxels, then average the flow vectors at each voxel | |
857 101 3845030 38069.6 0.9 filled_voxels = set([(a,b,c) for a,b,c in np.stack([vox_x,vox_y,vox_z]).T]) | |
858 | |
859 # Get the average displacement vector in each voxel | |
860 101 116088 1149.4 0.0 num_disp_vecs = {tup: 0.0 for tup in filled_voxels} | |
861 101 246463 2440.2 0.1 sum_disp_vecs = {tup: np.zeros([3]) for tup in filled_voxels} | |
862 101 200634 1986.5 0.0 avg_disp_vecs = {tup: np.zeros([3]) for tup in filled_voxels} | |
863 4437084 6637683 1.5 1.6 for i in range(len(op_flow_3D[frame])): | |
864 4436983 8799706 2.0 2.1 num_disp_vecs[(vox_x[i], vox_y[i], vox_z[i])] += 1.0 | |
865 4436983 12491019 2.8 2.9 sum_disp_vecs[(vox_x[i], vox_y[i], vox_z[i])] += op_flow_3D[frame][i,3:] | |
866 | |
867 496048 763359 1.5 0.2 for tup in filled_voxels: | |
868 495947 1185821 2.4 0.3 avg_disp_vecs[tup] = sum_disp_vecs[tup]/num_disp_vecs[tup] | |
869 | |
870 101 343 3.4 0.0 voxel_flow_dicts.append(avg_disp_vecs) | |
871 | |
872 # Turn voxel flow into numpy tensor | |
873 1 24 24.0 0.0 voxel_flow_tensor = np.zeros([num_frames, VOXEL_SIZE, VOXEL_SIZE, VOXEL_SIZE, 4]) | |
874 102 196 1.9 0.0 for frame in range(num_frames): | |
875 496048 779448 1.6 0.2 for vox, disp_vec in voxel_flow_dicts[frame].items(): | |
876 495947 1077802 2.2 0.3 voxel_flow_tensor[frame, vox[0], vox[1], vox[2], 0] = 1.0 | |
877 495947 994696 2.0 0.2 voxel_flow_tensor[frame, vox[0], vox[1], vox[2], 1:] = disp_vec | |
878 | |
879 1 2 2.0 0.0 return voxel_flow_tensor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_voxel_flow_full(self, vid_id): | |
''' Create a voxel grid with displacement vectors ''' | |
############################################################## | |
# Create a map from rgb pixels to the depth camera xyz coordinate at | |
# that pixel. | |
# | |
# rgb_xyz : ndarray, shape [#frames, 1080, 1920, 3] | |
############################################################## | |
# Get metadata - for rotation and translation matrices | |
for metadatum in self.metadata: | |
if metadatum['video_index'] == vid_id: | |
m = metadatum | |
break | |
# Get depth images | |
depth_ims = self.get_depth_images(vid_id) | |
depth_ims = depth_ims.astype(np.float32)/1000.0 | |
# Make background negative so can discriminate between background | |
# and empty values | |
depth_ims[depth_ims == 0] = -1000 | |
# Constants - image size | |
frames, H_depth, W_depth = depth_ims.shape | |
W_rgb, H_rgb = 1920, 1080 | |
# Depth --> Depth-camera coordinates | |
Y, X = np.mgrid[0:H_depth, 0:W_depth] | |
x_3D = (X - cx_d) * depth_ims / fx_d | |
y_3D = (Y - cy_d) * depth_ims / fy_d | |
# Apply rotation and translation | |
xyz_d = np.stack([x_3D, y_3D, depth_ims], axis=3) | |
xyz_rgb = m['T']*m['scale'] + m['R'] @ xyz_d[:,:,:,:,np.newaxis] | |
# RGB-camera coordinates --> RGB pixel coordinates | |
x_rgb = (xyz_rgb[:,:,:,0] * rgb_mat[0,0] / xyz_rgb[:,:,:,2]) + rgb_mat[0,2] | |
y_rgb = (xyz_rgb[:,:,:,1] * rgb_mat[1,1] / xyz_rgb[:,:,:,2]) + rgb_mat[1,2] | |
x_rgb[x_rgb >= W_rgb] = 0 | |
y_rgb[y_rgb >= H_rgb] = 0 | |
# Fill in sparse array | |
rgb_xyz_sparse = np.zeros([frames, H_rgb, W_rgb, 3]) | |
for frame in range(frames): | |
for y in range(H_depth): | |
for x in range(W_depth): | |
rgb_xyz_sparse[frame, int(y_rgb[frame,y,x]), int(x_rgb[frame,y,x])] = xyz_d[frame, y, x] | |
# Fill in the rest of the sparse matrix | |
invalid = (rgb_xyz_sparse == 0) | |
ind = scipy.ndimage.distance_transform_edt(invalid, return_distances=False, return_indices=True) | |
rgb_xyz = rgb_xyz_sparse[tuple(ind)] | |
# Remove background values by zeroing them out | |
rgb_xyz[rgb_xyz[:,:,:,2] < 0] = 0 | |
############################################################## | |
############################################################## | |
############################################################## | |
# Create 2D optical flow vectors for a video in an ndarray of size: | |
# [video_frames - 1 * 2 * vid_height * vid_width] | |
############################################################## | |
vid = self.get_rgb_vid_images(vid_id, True) | |
prev_frame = vid[0] | |
flow = None | |
op_flow_2D = np.zeros([len(vid) - 1, 2, vid.shape[1], vid.shape[2]]) | |
for kk in tqdm(range(1,len(vid)-1), "Building 2D optical flow tensor"): | |
flow = cv2.calcOpticalFlowFarneback(vid[kk-1], vid[kk], flow, 0.4, | |
1, 15, 3, 8, 1.2, cv2.OPTFLOW_FARNEBACK_GAUSSIAN) | |
op_flow_2D[kk-1,0,:,:] = flow[:,:,0].copy() | |
op_flow_2D[kk-1,1,:,:] = flow[:,:,1].copy() | |
############################################################## | |
############################################################## | |
############################################################## | |
# Create 3D Optical flow | |
# | |
# op_flow_3D : list (length = #frames in video-1) of ndarrays | |
# (shape: optical_flow_arrows * 6) (6 --> x,y,z,dx,dy,dz) | |
############################################################## | |
# Build list of framewise 3D optical flow vectors | |
op_flow_3D = [] | |
# Note: starting at frame 2 (flow maps start at previous frame) | |
for frame in tqdm(range(1, op_flow_2D.shape[0]), "Building 3D optical flow tensor"): | |
# Only look at non-zero rgb points | |
rgb_nonzero = np.nonzero(rgb_xyz[frame,:,:,2]) | |
flow_vectors = [] | |
for u, v in zip(rgb_nonzero[1], rgb_nonzero[0]): | |
# Get optical flow vector | |
du, dv = op_flow_2D[frame, :, v, u] | |
# Get start and end position in 3D using the flow map vector | |
p0 = rgb_xyz[frame - 1, int(v - dv), int(u - du)] | |
if p0[2] == 0: continue # Only want vectors that started at a non-zero point | |
p1 = rgb_xyz[frame, v, u] | |
# Get displacement vector (if norm is larger than theshold) | |
dp = np.array([0,0,0]) if np.linalg.norm([du,dv]) < 1.0 else p1 - p0 | |
flow_vectors.append(np.concatenate([p0, dp])) | |
# Stack list of flow vectors into one array | |
op_flow_3D.append(np.stack(flow_vectors)) | |
# Zero mean x y & z (the starting point) | |
all_vecs = np.concatenate(op_flow_3D) | |
m = np.mean(all_vecs, axis=0) | |
for frame in range(len(op_flow_3D)): | |
op_flow_3D[frame][:,0] -= m[0] | |
op_flow_3D[frame][:,1] -= m[1] | |
op_flow_3D[frame][:,2] -= m[2] | |
############################################################## | |
############################################################## | |
############################################################## | |
# Map optical flow to a voxel grid | |
# | |
# voxel_flow : ndarray, shape [#frames, 100, 100, 100, 4] | |
############################################################## | |
VOXEL_SIZE = 100 | |
# Pull useful stats out of optical flow | |
num_frames = len(op_flow_3D) | |
all_xyz = np.array([flow[0:3] for frame in op_flow_3D for flow in frame]) | |
max_x, max_y, max_z = np.max(all_xyz, axis=0) + 0.00001 | |
min_x, min_y, min_z = np.min(all_xyz, axis=0) | |
voxel_flow_dicts = [] | |
# Fill in the voxel grid | |
for frame in tqdm(range(num_frames), "Filling in Voxel Grid"): | |
# Interpolate and discretize location of the voxels in the grid | |
vox_x = np.floor((op_flow_3D[frame][:,0] - min_x)/(max_x - min_x) * VOXEL_SIZE).astype(int) | |
vox_y = np.floor((op_flow_3D[frame][:,1] - min_y)/(max_y - min_y) * VOXEL_SIZE).astype(int) | |
vox_z = np.floor((op_flow_3D[frame][:,2] - min_z)/(max_z - min_z) * VOXEL_SIZE).astype(int) | |
# Get unique tuples of voxels, then average the flow vectors at each voxel | |
filled_voxels = set([(a,b,c) for a,b,c in np.stack([vox_x,vox_y,vox_z]).T]) | |
# Get the average displacement vector in each voxel | |
num_disp_vecs = {tup: 0.0 for tup in filled_voxels} | |
sum_disp_vecs = {tup: np.zeros([3]) for tup in filled_voxels} | |
avg_disp_vecs = {tup: np.zeros([3]) for tup in filled_voxels} | |
for i in range(len(op_flow_3D[frame])): | |
num_disp_vecs[(vox_x[i], vox_y[i], vox_z[i])] += 1.0 | |
sum_disp_vecs[(vox_x[i], vox_y[i], vox_z[i])] += op_flow_3D[frame][i,3:] | |
for tup in filled_voxels: | |
avg_disp_vecs[tup] = sum_disp_vecs[tup]/num_disp_vecs[tup] | |
voxel_flow_dicts.append(avg_disp_vecs) | |
# Turn voxel flow into numpy tensor | |
voxel_flow_tensor = np.zeros([num_frames, VOXEL_SIZE, VOXEL_SIZE, VOXEL_SIZE, 4]) | |
for frame in range(num_frames): | |
for vox, disp_vec in voxel_flow_dicts[frame].items(): | |
voxel_flow_tensor[frame, vox[0], vox[1], vox[2], 0] = 1.0 | |
voxel_flow_tensor[frame, vox[0], vox[1], vox[2], 1:] = disp_vec | |
return voxel_flow_tensor | |
if __name__ == '__main__': | |
dataset = NTU() | |
# for vid in range(dataset.num_vids): | |
# dataset.get_voxel_flow(vid) | |
dataset.get_voxel_flow_full(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment