Skip to content

Instantly share code, notes, and snippets.

@mpeven
Created November 29, 2017 22:28
Show Gist options
  • Save mpeven/f23eeda2ac91f494da6257de5e0791b0 to your computer and use it in GitHub Desktop.
Save mpeven/f23eeda2ac91f494da6257de5e0791b0 to your computer and use it in GitHub Desktop.
oh no, this algo, for voxel flow, is too slow
Total time: 425.367 s
File: ntu_rgb.py
Function: get_voxel_flow_full at line 699
Line # Hits Time Per Hit % Time Line Contents
==============================================================
699 @profile
700 def get_voxel_flow_full(self, vid_id):
701 ''' Create a voxel grid with displacement vectors '''
702
703 ##############################################################
704 # Create a map from rgb pixels to the depth camera xyz coordinate at
705 # that pixel.
706 #
707 # rgb_xyz : ndarray, shape [#frames, 1080, 1920, 3]
708 ##############################################################
709
710 # Get metadata - for rotation and translation matrices
711 1 3 3.0 0.0 for metadatum in self.metadata:
712 1 2 2.0 0.0 if metadatum['video_index'] == vid_id:
713 1 2 2.0 0.0 m = metadatum
714 1 1 1.0 0.0 break
715
716 # Get depth images
717 1 98035 98035.0 0.0 depth_ims = self.get_depth_images(vid_id)
718 1 15116 15116.0 0.0 depth_ims = depth_ims.astype(np.float32)/1000.0
719
720 # Make background negative so can discriminate between background
721 # and empty values
722 1 21748 21748.0 0.0 depth_ims[depth_ims == 0] = -1000
723
724 # Constants - image size
725 1 4 4.0 0.0 frames, H_depth, W_depth = depth_ims.shape
726 1 1 1.0 0.0 W_rgb, H_rgb = 1920, 1080
727
728 # Depth --> Depth-camera coordinates
729 1 1279 1279.0 0.0 Y, X = np.mgrid[0:H_depth, 0:W_depth]
730 1 38508 38508.0 0.0 x_3D = (X - cx_d) * depth_ims / fx_d
731 1 38214 38214.0 0.0 y_3D = (Y - cy_d) * depth_ims / fy_d
732
733 # Apply rotation and translation
734 1 127127 127127.0 0.0 xyz_d = np.stack([x_3D, y_3D, depth_ims], axis=3)
735 1 395056 395056.0 0.1 xyz_rgb = m['T']*m['scale'] + m['R'] @ xyz_d[:,:,:,:,np.newaxis]
736
737 # RGB-camera coordinates --> RGB pixel coordinates
738 1 81545 81545.0 0.0 x_rgb = (xyz_rgb[:,:,:,0] * rgb_mat[0,0] / xyz_rgb[:,:,:,2]) + rgb_mat[0,2]
739 1 81560 81560.0 0.0 y_rgb = (xyz_rgb[:,:,:,1] * rgb_mat[1,1] / xyz_rgb[:,:,:,2]) + rgb_mat[1,2]
740 1 10551 10551.0 0.0 x_rgb[x_rgb >= W_rgb] = 0
741 1 13393 13393.0 0.0 y_rgb[y_rgb >= H_rgb] = 0
742
743 # Fill in sparse array
744 1 19 19.0 0.0 rgb_xyz_sparse = np.zeros([frames, H_rgb, W_rgb, 3])
745 104 158 1.5 0.0 for frame in range(frames):
746 43775 63296 1.4 0.0 for y in range(H_depth):
747 22403736 32579589 1.5 7.7 for x in range(W_depth):
748 22360064 57692722 2.6 13.6 rgb_xyz_sparse[frame, int(y_rgb[frame,y,x]), int(x_rgb[frame,y,x])] = xyz_d[frame, y, x]
749
750 # Fill in the rest of the sparse matrix
751 1 262385 262385.0 0.1 invalid = (rgb_xyz_sparse == 0)
752 1 98149523 98149523.0 23.1 ind = scipy.ndimage.distance_transform_edt(invalid, return_distances=False, return_indices=True)
753 1 5763444 5763444.0 1.4 rgb_xyz = rgb_xyz_sparse[tuple(ind)]
754
755 # Remove background values by zeroing them out
756 1 3941410 3941410.0 0.9 rgb_xyz[rgb_xyz[:,:,:,2] < 0] = 0
757
758 ##############################################################
759 ##############################################################
760
761
762
763
764 ##############################################################
765 # Create 2D optical flow vectors for a video in an ndarray of size:
766 # [video_frames - 1 * 2 * vid_height * vid_width]
767 ##############################################################
768
769 1 748449 748449.0 0.2 vid = self.get_rgb_vid_images(vid_id, True)
770 1 5 5.0 0.0 prev_frame = vid[0]
771 1 2 2.0 0.0 flow = None
772 1 18 18.0 0.0 op_flow_2D = np.zeros([len(vid) - 1, 2, vid.shape[1], vid.shape[2]])
773 102 11969 117.3 0.0 for kk in tqdm(range(1,len(vid)-1), "Building 2D optical flow tensor"):
774 101 590 5.8 0.0 flow = cv2.calcOpticalFlowFarneback(vid[kk-1], vid[kk], flow, 0.4,
775 101 44015172 435793.8 10.3 1, 15, 3, 8, 1.2, cv2.OPTFLOW_FARNEBACK_GAUSSIAN)
776 101 285744 2829.1 0.1 op_flow_2D[kk-1,0,:,:] = flow[:,:,0].copy()
777 101 287546 2847.0 0.1 op_flow_2D[kk-1,1,:,:] = flow[:,:,1].copy()
778
779 ##############################################################
780 ##############################################################
781
782
783
784
785 ##############################################################
786 # Create 3D Optical flow
787 #
788 # op_flow_3D : list (length = #frames in video-1) of ndarrays
789 # (shape: optical_flow_arrows * 6) (6 --> x,y,z,dx,dy,dz)
790 ##############################################################
791
792 # Build list of framewise 3D optical flow vectors
793 1 2 2.0 0.0 op_flow_3D = []
794
795 # Note: starting at frame 2 (flow maps start at previous frame)
796 102 11455 112.3 0.0 for frame in tqdm(range(1, op_flow_2D.shape[0]), "Building 3D optical flow tensor"):
797 # Only look at non-zero rgb points
798 101 989773 9799.7 0.2 rgb_nonzero = np.nonzero(rgb_xyz[frame,:,:,2])
799 101 401693 3977.2 0.1 flow_vectors = []
800
801 4655492 7990925 1.7 1.9 for u, v in zip(rgb_nonzero[1], rgb_nonzero[0]):
802 # Get optical flow vector
803 4655391 13235971 2.8 3.1 du, dv = op_flow_2D[frame, :, v, u]
804
805 # Get start and end position in 3D using the flow map vector
806 4655391 28725156 6.2 6.8 p0 = rgb_xyz[frame - 1, int(v - dv), int(u - du)]
807 4655391 8821552 1.9 2.1 if p0[2] == 0: continue # Only want vectors that started at a non-zero point
808 4436983 7847816 1.8 1.8 p1 = rgb_xyz[frame, v, u]
809
810 # Get displacement vector (if norm is larger than theshold)
811 4436983 55542843 12.5 13.1 dp = np.array([0,0,0]) if np.linalg.norm([du,dv]) < 1.0 else p1 - p0
812
813 4436983 12725870 2.9 3.0 flow_vectors.append(np.concatenate([p0, dp]))
814
815 # Stack list of flow vectors into one array
816 101 4575558 45302.6 1.1 op_flow_3D.append(np.stack(flow_vectors))
817
818 # Zero mean x y & z (the starting point)
819 1 22026 22026.0 0.0 all_vecs = np.concatenate(op_flow_3D)
820 1 46273 46273.0 0.0 m = np.mean(all_vecs, axis=0)
821 102 160 1.6 0.0 for frame in range(len(op_flow_3D)):
822 101 15932 157.7 0.0 op_flow_3D[frame][:,0] -= m[0]
823 101 14144 140.0 0.0 op_flow_3D[frame][:,1] -= m[1]
824 101 4915 48.7 0.0 op_flow_3D[frame][:,2] -= m[2]
825
826 ##############################################################
827 ##############################################################
828
829
830
831
832
833 ##############################################################
834 # Map optical flow to a voxel grid
835 #
836 # voxel_flow : ndarray, shape [#frames, 100, 100, 100, 4]
837 ##############################################################
838
839 1 2 2.0 0.0 VOXEL_SIZE = 100
840
841 # Pull useful stats out of optical flow
842 1 3 3.0 0.0 num_frames = len(op_flow_3D)
843 1 2354924 2354924.0 0.6 all_xyz = np.array([flow[0:3] for frame in op_flow_3D for flow in frame])
844 1 53003 53003.0 0.0 max_x, max_y, max_z = np.max(all_xyz, axis=0) + 0.00001
845 1 33738 33738.0 0.0 min_x, min_y, min_z = np.min(all_xyz, axis=0)
846
847 1 1 1.0 0.0 voxel_flow_dicts = []
848 # Fill in the voxel grid
849 102 11245 110.2 0.0 for frame in tqdm(range(num_frames), "Filling in Voxel Grid"):
850
851 # Interpolate and discretize location of the voxels in the grid
852 101 31419 311.1 0.0 vox_x = np.floor((op_flow_3D[frame][:,0] - min_x)/(max_x - min_x) * VOXEL_SIZE).astype(int)
853 101 27231 269.6 0.0 vox_y = np.floor((op_flow_3D[frame][:,1] - min_y)/(max_y - min_y) * VOXEL_SIZE).astype(int)
854 101 21219 210.1 0.0 vox_z = np.floor((op_flow_3D[frame][:,2] - min_z)/(max_z - min_z) * VOXEL_SIZE).astype(int)
855
856 # Get unique tuples of voxels, then average the flow vectors at each voxel
857 101 3845030 38069.6 0.9 filled_voxels = set([(a,b,c) for a,b,c in np.stack([vox_x,vox_y,vox_z]).T])
858
859 # Get the average displacement vector in each voxel
860 101 116088 1149.4 0.0 num_disp_vecs = {tup: 0.0 for tup in filled_voxels}
861 101 246463 2440.2 0.1 sum_disp_vecs = {tup: np.zeros([3]) for tup in filled_voxels}
862 101 200634 1986.5 0.0 avg_disp_vecs = {tup: np.zeros([3]) for tup in filled_voxels}
863 4437084 6637683 1.5 1.6 for i in range(len(op_flow_3D[frame])):
864 4436983 8799706 2.0 2.1 num_disp_vecs[(vox_x[i], vox_y[i], vox_z[i])] += 1.0
865 4436983 12491019 2.8 2.9 sum_disp_vecs[(vox_x[i], vox_y[i], vox_z[i])] += op_flow_3D[frame][i,3:]
866
867 496048 763359 1.5 0.2 for tup in filled_voxels:
868 495947 1185821 2.4 0.3 avg_disp_vecs[tup] = sum_disp_vecs[tup]/num_disp_vecs[tup]
869
870 101 343 3.4 0.0 voxel_flow_dicts.append(avg_disp_vecs)
871
872 # Turn voxel flow into numpy tensor
873 1 24 24.0 0.0 voxel_flow_tensor = np.zeros([num_frames, VOXEL_SIZE, VOXEL_SIZE, VOXEL_SIZE, 4])
874 102 196 1.9 0.0 for frame in range(num_frames):
875 496048 779448 1.6 0.2 for vox, disp_vec in voxel_flow_dicts[frame].items():
876 495947 1077802 2.2 0.3 voxel_flow_tensor[frame, vox[0], vox[1], vox[2], 0] = 1.0
877 495947 994696 2.0 0.2 voxel_flow_tensor[frame, vox[0], vox[1], vox[2], 1:] = disp_vec
878
879 1 2 2.0 0.0 return voxel_flow_tensor
def get_voxel_flow_full(self, vid_id):
''' Create a voxel grid with displacement vectors '''
##############################################################
# Create a map from rgb pixels to the depth camera xyz coordinate at
# that pixel.
#
# rgb_xyz : ndarray, shape [#frames, 1080, 1920, 3]
##############################################################
# Get metadata - for rotation and translation matrices
for metadatum in self.metadata:
if metadatum['video_index'] == vid_id:
m = metadatum
break
# Get depth images
depth_ims = self.get_depth_images(vid_id)
depth_ims = depth_ims.astype(np.float32)/1000.0
# Make background negative so can discriminate between background
# and empty values
depth_ims[depth_ims == 0] = -1000
# Constants - image size
frames, H_depth, W_depth = depth_ims.shape
W_rgb, H_rgb = 1920, 1080
# Depth --> Depth-camera coordinates
Y, X = np.mgrid[0:H_depth, 0:W_depth]
x_3D = (X - cx_d) * depth_ims / fx_d
y_3D = (Y - cy_d) * depth_ims / fy_d
# Apply rotation and translation
xyz_d = np.stack([x_3D, y_3D, depth_ims], axis=3)
xyz_rgb = m['T']*m['scale'] + m['R'] @ xyz_d[:,:,:,:,np.newaxis]
# RGB-camera coordinates --> RGB pixel coordinates
x_rgb = (xyz_rgb[:,:,:,0] * rgb_mat[0,0] / xyz_rgb[:,:,:,2]) + rgb_mat[0,2]
y_rgb = (xyz_rgb[:,:,:,1] * rgb_mat[1,1] / xyz_rgb[:,:,:,2]) + rgb_mat[1,2]
x_rgb[x_rgb >= W_rgb] = 0
y_rgb[y_rgb >= H_rgb] = 0
# Fill in sparse array
rgb_xyz_sparse = np.zeros([frames, H_rgb, W_rgb, 3])
for frame in range(frames):
for y in range(H_depth):
for x in range(W_depth):
rgb_xyz_sparse[frame, int(y_rgb[frame,y,x]), int(x_rgb[frame,y,x])] = xyz_d[frame, y, x]
# Fill in the rest of the sparse matrix
invalid = (rgb_xyz_sparse == 0)
ind = scipy.ndimage.distance_transform_edt(invalid, return_distances=False, return_indices=True)
rgb_xyz = rgb_xyz_sparse[tuple(ind)]
# Remove background values by zeroing them out
rgb_xyz[rgb_xyz[:,:,:,2] < 0] = 0
##############################################################
##############################################################
##############################################################
# Create 2D optical flow vectors for a video in an ndarray of size:
# [video_frames - 1 * 2 * vid_height * vid_width]
##############################################################
vid = self.get_rgb_vid_images(vid_id, True)
prev_frame = vid[0]
flow = None
op_flow_2D = np.zeros([len(vid) - 1, 2, vid.shape[1], vid.shape[2]])
for kk in tqdm(range(1,len(vid)-1), "Building 2D optical flow tensor"):
flow = cv2.calcOpticalFlowFarneback(vid[kk-1], vid[kk], flow, 0.4,
1, 15, 3, 8, 1.2, cv2.OPTFLOW_FARNEBACK_GAUSSIAN)
op_flow_2D[kk-1,0,:,:] = flow[:,:,0].copy()
op_flow_2D[kk-1,1,:,:] = flow[:,:,1].copy()
##############################################################
##############################################################
##############################################################
# Create 3D Optical flow
#
# op_flow_3D : list (length = #frames in video-1) of ndarrays
# (shape: optical_flow_arrows * 6) (6 --> x,y,z,dx,dy,dz)
##############################################################
# Build list of framewise 3D optical flow vectors
op_flow_3D = []
# Note: starting at frame 2 (flow maps start at previous frame)
for frame in tqdm(range(1, op_flow_2D.shape[0]), "Building 3D optical flow tensor"):
# Only look at non-zero rgb points
rgb_nonzero = np.nonzero(rgb_xyz[frame,:,:,2])
flow_vectors = []
for u, v in zip(rgb_nonzero[1], rgb_nonzero[0]):
# Get optical flow vector
du, dv = op_flow_2D[frame, :, v, u]
# Get start and end position in 3D using the flow map vector
p0 = rgb_xyz[frame - 1, int(v - dv), int(u - du)]
if p0[2] == 0: continue # Only want vectors that started at a non-zero point
p1 = rgb_xyz[frame, v, u]
# Get displacement vector (if norm is larger than theshold)
dp = np.array([0,0,0]) if np.linalg.norm([du,dv]) < 1.0 else p1 - p0
flow_vectors.append(np.concatenate([p0, dp]))
# Stack list of flow vectors into one array
op_flow_3D.append(np.stack(flow_vectors))
# Zero mean x y & z (the starting point)
all_vecs = np.concatenate(op_flow_3D)
m = np.mean(all_vecs, axis=0)
for frame in range(len(op_flow_3D)):
op_flow_3D[frame][:,0] -= m[0]
op_flow_3D[frame][:,1] -= m[1]
op_flow_3D[frame][:,2] -= m[2]
##############################################################
##############################################################
##############################################################
# Map optical flow to a voxel grid
#
# voxel_flow : ndarray, shape [#frames, 100, 100, 100, 4]
##############################################################
VOXEL_SIZE = 100
# Pull useful stats out of optical flow
num_frames = len(op_flow_3D)
all_xyz = np.array([flow[0:3] for frame in op_flow_3D for flow in frame])
max_x, max_y, max_z = np.max(all_xyz, axis=0) + 0.00001
min_x, min_y, min_z = np.min(all_xyz, axis=0)
voxel_flow_dicts = []
# Fill in the voxel grid
for frame in tqdm(range(num_frames), "Filling in Voxel Grid"):
# Interpolate and discretize location of the voxels in the grid
vox_x = np.floor((op_flow_3D[frame][:,0] - min_x)/(max_x - min_x) * VOXEL_SIZE).astype(int)
vox_y = np.floor((op_flow_3D[frame][:,1] - min_y)/(max_y - min_y) * VOXEL_SIZE).astype(int)
vox_z = np.floor((op_flow_3D[frame][:,2] - min_z)/(max_z - min_z) * VOXEL_SIZE).astype(int)
# Get unique tuples of voxels, then average the flow vectors at each voxel
filled_voxels = set([(a,b,c) for a,b,c in np.stack([vox_x,vox_y,vox_z]).T])
# Get the average displacement vector in each voxel
num_disp_vecs = {tup: 0.0 for tup in filled_voxels}
sum_disp_vecs = {tup: np.zeros([3]) for tup in filled_voxels}
avg_disp_vecs = {tup: np.zeros([3]) for tup in filled_voxels}
for i in range(len(op_flow_3D[frame])):
num_disp_vecs[(vox_x[i], vox_y[i], vox_z[i])] += 1.0
sum_disp_vecs[(vox_x[i], vox_y[i], vox_z[i])] += op_flow_3D[frame][i,3:]
for tup in filled_voxels:
avg_disp_vecs[tup] = sum_disp_vecs[tup]/num_disp_vecs[tup]
voxel_flow_dicts.append(avg_disp_vecs)
# Turn voxel flow into numpy tensor
voxel_flow_tensor = np.zeros([num_frames, VOXEL_SIZE, VOXEL_SIZE, VOXEL_SIZE, 4])
for frame in range(num_frames):
for vox, disp_vec in voxel_flow_dicts[frame].items():
voxel_flow_tensor[frame, vox[0], vox[1], vox[2], 0] = 1.0
voxel_flow_tensor[frame, vox[0], vox[1], vox[2], 1:] = disp_vec
return voxel_flow_tensor
if __name__ == '__main__':
dataset = NTU()
# for vid in range(dataset.num_vids):
# dataset.get_voxel_flow(vid)
dataset.get_voxel_flow_full(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment