|
// If you change these two values below, change in SiftMatchCL.cpp also |
|
#define USE_LOCAL_MEMORY 1 |
|
#define WORKGROUP_SIZE 32 |
|
#define VECTOR_TYPE uchar16 |
|
#define VECTOR_LEN 16 |
|
#define VECTOR_ITS (128 / VECTOR_LEN) |
|
__kernel void FindBestMatchOneWay_Kernel(__global int *result, __global VECTOR_TYPE *des1, __global VECTOR_TYPE *des2, int num1, int num2, float distmax, float ratiomax) |
|
{ |
|
#if USE_LOCAL_MEMORY |
|
__local VECTOR_TYPE desc[WORKGROUP_SIZE][VECTOR_ITS+1]; |
|
#endif |
|
const int i = get_global_id(0); |
|
if (i < num1) |
|
{ |
|
// If we're using local memory, then copy this work-item's |
|
// feature descriptor into a local memory cache. |
|
#if USE_LOCAL_MEMORY |
|
const int lid = get_local_id(0); |
|
for (int k=0; k < VECTOR_ITS; ++k) |
|
desc[lid][k] = des1[i*VECTOR_ITS + k]; |
|
#endif |
|
int best_j = -1; |
|
int best_dist = 0; |
|
int second_best_dist = 0; |
|
// Search the vector des2 for the corresponding descriptor |
|
// that has the highest dot product with descriptor "i" of des1. |
|
for (int j=0; j < num2; ++j) |
|
{ |
|
// Compute the dot product for this pair of descriptors |
|
uint dot = 0; |
|
for (int d=0; d<VECTOR_ITS; ++d) { |
|
#if USE_LOCAL_MEMORY |
|
dot += desc[lid][d].s0 * des2[j*VECTOR_ITS + d].s0; |
|
dot += desc[lid][d].s1 * des2[j*VECTOR_ITS + d].s1; |
|
dot += desc[lid][d].s2 * des2[j*VECTOR_ITS + d].s2; |
|
dot += desc[lid][d].s3 * des2[j*VECTOR_ITS + d].s3; |
|
dot += desc[lid][d].s4 * des2[j*VECTOR_ITS + d].s4; |
|
dot += desc[lid][d].s5 * des2[j*VECTOR_ITS + d].s5; |
|
dot += desc[lid][d].s6 * des2[j*VECTOR_ITS + d].s6; |
|
dot += desc[lid][d].s7 * des2[j*VECTOR_ITS + d].s7; |
|
dot += desc[lid][d].s8 * des2[j*VECTOR_ITS + d].s8; |
|
dot += desc[lid][d].s9 * des2[j*VECTOR_ITS + d].s9; |
|
dot += desc[lid][d].sA * des2[j*VECTOR_ITS + d].sA; |
|
dot += desc[lid][d].sB * des2[j*VECTOR_ITS + d].sB; |
|
dot += desc[lid][d].sC * des2[j*VECTOR_ITS + d].sC; |
|
dot += desc[lid][d].sD * des2[j*VECTOR_ITS + d].sD; |
|
dot += desc[lid][d].sE * des2[j*VECTOR_ITS + d].sE; |
|
dot += desc[lid][d].sF * des2[j*VECTOR_ITS + d].sF; |
|
#else |
|
dot += des1[i*VECTOR_ITS + d].s0 * des2[j*VECTOR_ITS + d].s0; |
|
dot += des1[i*VECTOR_ITS + d].s1 * des2[j*VECTOR_ITS + d].s1; |
|
dot += des1[i*VECTOR_ITS + d].s2 * des2[j*VECTOR_ITS + d].s2; |
|
dot += des1[i*VECTOR_ITS + d].s3 * des2[j*VECTOR_ITS + d].s3; |
|
dot += des1[i*VECTOR_ITS + d].s4 * des2[j*VECTOR_ITS + d].s4; |
|
dot += des1[i*VECTOR_ITS + d].s5 * des2[j*VECTOR_ITS + d].s5; |
|
dot += des1[i*VECTOR_ITS + d].s6 * des2[j*VECTOR_ITS + d].s6; |
|
dot += des1[i*VECTOR_ITS + d].s7 * des2[j*VECTOR_ITS + d].s7; |
|
dot += des1[i*VECTOR_ITS + d].s8 * des2[j*VECTOR_ITS + d].s8; |
|
dot += des1[i*VECTOR_ITS + d].s9 * des2[j*VECTOR_ITS + d].s9; |
|
dot += des1[i*VECTOR_ITS + d].sA * des2[j*VECTOR_ITS + d].sA; |
|
dot += des1[i*VECTOR_ITS + d].sB * des2[j*VECTOR_ITS + d].sB; |
|
dot += des1[i*VECTOR_ITS + d].sC * des2[j*VECTOR_ITS + d].sC; |
|
dot += des1[i*VECTOR_ITS + d].sD * des2[j*VECTOR_ITS + d].sD; |
|
dot += des1[i*VECTOR_ITS + d].sE * des2[j*VECTOR_ITS + d].sE; |
|
dot += des1[i*VECTOR_ITS + d].sF * des2[j*VECTOR_ITS + d].sF; |
|
#endif |
|
} |
|
if (dot > best_dist) { |
|
best_j = j; |
|
second_best_dist = best_dist; |
|
best_dist = dot; |
|
} |
|
else if (dot > second_best_dist) { |
|
second_best_dist = dot; |
|
} |
|
} |
|
// The SIFT descriptor vectors are normalized to length 512, so |
|
// we scale distance by 1/512^2. |
|
const float best_dist_normed = |
|
acos(fmin(best_dist * 0.000003814697265625f, 1.0f)); |
|
// Check if match distance fails threshold |
|
if (best_dist_normed > distmax) { |
|
result[i] = -1; |
|
return; |
|
} |
|
const float second_best_dist_normed = |
|
acos(fmin(second_best_dist * 0.000003814697265625f, 1.0f)); |
|
// Check if match fails ratio test |
|
if (best_dist_normed >= ratiomax * second_best_dist_normed) { |
|
result[i] = -1; |
|
return; |
|
} |
|
// Record the match as valid! |
|
result[i] = best_j; |
|
} |
|
} |
|
// Guided filter as implemented in the COLMAP CPU version. Parameter "m" |
|
// is a 3x3 fundamental matrix or homography matrix, depending on the |
|
// value of "guided". Parameter "errormax" is the squared maximum |
|
// residual threshold. |
|
bool Guided(float x1, float y1, float x2, float y2, int guided, __constant float *m, float errormax) |
|
{ |
|
float residual2; |
|
const float a0 = m[0]*x1 + m[1]*y1 + m[2]; |
|
const float a1 = m[3]*x1 + m[4]*y1 + m[5]; |
|
const float a2 = m[6]*x1 + m[7]*y1 + m[8]; |
|
if (!(guided & 1)) // CALIBRATED or UNCALIBRATED two-view geometry |
|
{ |
|
const float b0 = m[0]*x2 + m[3]*y2 + m[6]; |
|
const float b1 = m[1]*x2 + m[4]*y2 + m[7]; |
|
const float c = x2*a0 + y2*a1 + a2; |
|
residual2 = c * c / (a0*a0 + a1*a1 + b0*b0 + b1*b1); |
|
} |
|
else // PLANAR or PANORAMIC two-view geometry |
|
{ |
|
const float b0 = a0 / a2 - x2; |
|
const float b1 = a1 / a2 - y2; |
|
residual2 = b0*b0 + b1*b1; |
|
} |
|
return residual2 > errormax; |
|
__kernel void FindGuidedBestMatchOneWay_Kernel(__global int *result, __global VECTOR_TYPE *des1, __global VECTOR_TYPE *des2, __global float *loc1, __global float *loc2, int num1, int num2, int guided, // 0 = Fundamental matrix, first pass |
|
// 1 = Homography matrix, first pass |
|
// 2 = Fundamental matrix, second pass of cross-check |
|
// 3 = Homography matrix, second pass of cross-check |
|
__constant float *gm, // Fundamental or homography matrix for guided filter |
|
float distmax, float ratiomax, float errormax) |
|
#if USE_LOCAL_MEMORY |
|
__local VECTOR_TYPE desc[WORKGROUP_SIZE][VECTOR_ITS+1]; |
|
#endif |
|
const int i = get_global_id(0); |
|
if (i < num1) |
|
{ |
|
// If we're using local memory, then copy this work-item's |
|
// feature descriptor into a local memory cache. |
|
#if USE_LOCAL_MEMORY |
|
const int lid = get_local_id(0); |
|
for (int k=0; k < VECTOR_ITS; ++k) |
|
desc[lid][k] = des1[i*VECTOR_ITS + k]; |
|
#endif |
|
int best_j = -1; |
|
int best_dist = 0; |
|
int second_best_dist = 0; |
|
// Search the vector des2 for the corresponding descriptor |
|
// that has the highest dot product with descriptor "i" of des1. |
|
for (int j=0; j < num2; ++j) |
|
{ |
|
float x1, y1, x2, y2; |
|
uint dot = 0; |
|
|
|
// If this is the cross-check pass, we swap the order of the |
|
// pixel coordinates, since the gm matrix was computed as the |
|
// transform from the first image to the second. |
|
if (guided & 2) |
|
{ |
|
x1 = loc2[2*j]; |
|
y1 = loc2[2*j+1]; |
|
x2 = loc1[2*i]; |
|
y2 = loc1[2*i+1]; |
|
} |
|
else |
|
{ |
|
x1 = loc1[2*i]; |
|
y1 = loc1[2*i+1]; |
|
x2 = loc2[2*j]; |
|
y2 = loc2[2*j+1]; |
|
} |
|
// Now apply the guided filter, and if the feature pair is not |
|
// filtered out, then go ahead and compute the matching score. |
|
if (!Guided(x1, y1, x2, y2, guided, gm, errormax)) |
|
{ |
|
// Compute the dot product for this pair of descriptors |
|
for (int d=0; d<VECTOR_ITS; ++d) { |
|
#if USE_LOCAL_MEMORY |
|
dot += desc[lid][d].s0 * des2[j*VECTOR_ITS + d].s0; |
|
dot += desc[lid][d].s1 * des2[j*VECTOR_ITS + d].s1; |
|
dot += desc[lid][d].s2 * des2[j*VECTOR_ITS + d].s2; |
|
dot += desc[lid][d].s3 * des2[j*VECTOR_ITS + d].s3; |
|
dot += desc[lid][d].s4 * des2[j*VECTOR_ITS + d].s4; |
|
dot += desc[lid][d].s5 * des2[j*VECTOR_ITS + d].s5; |
|
dot += desc[lid][d].s6 * des2[j*VECTOR_ITS + d].s6; |
|
dot += desc[lid][d].s7 * des2[j*VECTOR_ITS + d].s7; |
|
dot += desc[lid][d].s8 * des2[j*VECTOR_ITS + d].s8; |
|
dot += desc[lid][d].s9 * des2[j*VECTOR_ITS + d].s9; |
|
dot += desc[lid][d].sA * des2[j*VECTOR_ITS + d].sA; |
|
dot += desc[lid][d].sB * des2[j*VECTOR_ITS + d].sB; |
|
dot += desc[lid][d].sC * des2[j*VECTOR_ITS + d].sC; |
|
dot += desc[lid][d].sD * des2[j*VECTOR_ITS + d].sD; |
|
dot += desc[lid][d].sE * des2[j*VECTOR_ITS + d].sE; |
|
dot += desc[lid][d].sF * des2[j*VECTOR_ITS + d].sF; |
|
#else |
|
dot += des1[i*VECTOR_ITS + d].s0 * des2[j*VECTOR_ITS + d].s0; |
|
dot += des1[i*VECTOR_ITS + d].s1 * des2[j*VECTOR_ITS + d].s1; |
|
dot += des1[i*VECTOR_ITS + d].s2 * des2[j*VECTOR_ITS + d].s2; |
|
dot += des1[i*VECTOR_ITS + d].s3 * des2[j*VECTOR_ITS + d].s3; |
|
dot += des1[i*VECTOR_ITS + d].s4 * des2[j*VECTOR_ITS + d].s4; |
|
dot += des1[i*VECTOR_ITS + d].s5 * des2[j*VECTOR_ITS + d].s5; |
|
dot += des1[i*VECTOR_ITS + d].s6 * des2[j*VECTOR_ITS + d].s6; |
|
dot += des1[i*VECTOR_ITS + d].s7 * des2[j*VECTOR_ITS + d].s7; |
|
dot += des1[i*VECTOR_ITS + d].s8 * des2[j*VECTOR_ITS + d].s8; |
|
dot += des1[i*VECTOR_ITS + d].s9 * des2[j*VECTOR_ITS + d].s9; |
|
dot += des1[i*VECTOR_ITS + d].sA * des2[j*VECTOR_ITS + d].sA; |
|
dot += des1[i*VECTOR_ITS + d].sB * des2[j*VECTOR_ITS + d].sB; |
|
dot += des1[i*VECTOR_ITS + d].sC * des2[j*VECTOR_ITS + d].sC; |
|
dot += des1[i*VECTOR_ITS + d].sD * des2[j*VECTOR_ITS + d].sD; |
|
dot += des1[i*VECTOR_ITS + d].sE * des2[j*VECTOR_ITS + d].sE; |
|
dot += des1[i*VECTOR_ITS + d].sF * des2[j*VECTOR_ITS + d].sF; |
|
#endif |
|
} |
|
if (dot > best_dist) { |
|
|
|
best_j = j; |
|
second_best_dist = best_dist; |
|
best_dist = dot; |
|
} |
|
else if (dot > second_best_dist) { |
|
second_best_dist = dot; |
|
} |
|
} |
|
// The SIFT descriptor vectors are normalized to length 512, so |
|
// we scale distance by 1/512^2. |
|
const float best_dist_normed = |
|
acos(fmin(best_dist * 0.000003814697265625f, 1.0f)); |
|
// Check if match distance fails threshold |
|
if (best_dist_normed > distmax) { |
|
result[i] = -1; |
|
return; |
|
} |
|
const float second_best_dist_normed = |
|
acos(fmin(second_best_dist * 0.000003814697265625f, 1.0f)); |
|
// Check if match fails ratio test |
|
if (best_dist_normed >= ratiomax * second_best_dist_normed) { |
|
result[i] = -1; |
|
return; |
|
} |
|
// Record the match as valid! |
|
result[i] = best_j; |
|
} |
|
} |
|
|
|
|
|
|
|
#define KWINDOWSIZE (KWINDOWRADIUS*2 + 1) |
|
static inline void Mat33DotVec3(const float mat[9], const float vec[3], float result[3]) { |
|
result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2]; |
|
result[1] = mat[3] * vec[0] + mat[4] * vec[1] + mat[5] * vec[2]; |
|
result[2] = mat[6] * vec[0] + mat[7] * vec[1] + mat[8] * vec[2]; |
|
} |
|
static inline void Mat33DotVec3Homogeneous(const float mat[9], const float vec[2], float result[2]) { |
|
const float inv_z = 1.0f / (mat[6] * vec[0] + mat[7] * vec[1] + mat[8]); |
|
result[0] = inv_z * (mat[0] * vec[0] + mat[1] * vec[1] + mat[2]); |
|
result[1] = inv_z * (mat[3] * vec[0] + mat[4] * vec[1] + mat[5]); |
|
} |
|
static inline float DotProduct3(const float vec1[3], const float vec2[3]) { |
|
return vec1[0] * vec2[0] + vec1[1] * vec2[1] + vec1[2] * vec2[2]; |
|
__constant sampler_t PosesImageSampler = CLK_NORMALIZED_COORDS_FALSE | |
|
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; |
|
} |
|
static inline void ComputeViewingAngles(const float point[3], const float normal[3], const int image_idx, float* cos_triangulation_angle, float* cos_incident_angle, image2d_array_t poses_images) { |
|
*cos_triangulation_angle = 0.0f; |
|
*cos_incident_angle = 0.0f; |
|
float C[3]; |
|
for (int i = 0; i < 3; ++i) { |
|
C[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 16, image_idx, 0, 0)).x; |
|
const float SX[3] = {C[0] - point[0], C[1] - point[1], C[2] - point[2]}; |
|
const float RX_inv_norm = rsqrt(DotProduct3(point, point)); |
|
const float SX_inv_norm = rsqrt(DotProduct3(SX, SX)); |
|
*cos_incident_angle = DotProduct3(SX, normal) * SX_inv_norm; |
|
*cos_triangulation_angle = DotProduct3(SX, point) * RX_inv_norm * SX_inv_norm; |
|
} |
|
static inline void ComposeHomography(image2d_array_t poses_images, const int image_idx, const int row, const int col, const float depth, const float normal[3], float H[9], __constant float * ref_inv_K) { |
|
float K[4]; |
|
for (int i = 0; i < 4; ++i) { |
|
K[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i, image_idx, 0, 0)).x; |
|
float R[9]; |
|
for (int i = 0; i < 9; ++i) { |
|
R[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 4, image_idx, 0, 0)).x; |
|
float T[3]; |
|
for (int i = 0; i < 3; ++i) { |
|
T[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 13, image_idx, 0, 0)).x; |
|
const float dist = |
|
depth * (normal[0] * (ref_inv_K[0] * col + ref_inv_K[1]) + |
|
normal[1] * (ref_inv_K[2] * row + ref_inv_K[3]) + normal[2]); |
|
const float inv_dist = 1.0f / dist; |
|
const float inv_dist_N0 = inv_dist * normal[0]; |
|
const float inv_dist_N1 = inv_dist * normal[1]; |
|
const float inv_dist_N2 = inv_dist * normal[2]; |
|
H[0] = ref_inv_K[0] * (K[0] * (R[0] + inv_dist_N0 * T[0]) + |
|
K[1] * (R[6] + inv_dist_N0 * T[2])); |
|
H[1] = ref_inv_K[2] * (K[0] * (R[1] + inv_dist_N1 * T[0]) + |
|
K[1] * (R[7] + inv_dist_N1 * T[2])); |
|
H[2] = K[0] * (R[2] + inv_dist_N2 * T[0]) + |
|
K[1] * (R[8] + inv_dist_N2 * T[2]) + |
|
ref_inv_K[1] * (K[0] * (R[0] + inv_dist_N0 * T[0]) + |
|
K[1] * (R[6] + inv_dist_N0 * T[2])) + |
|
ref_inv_K[3] * (K[0] * (R[1] + inv_dist_N1 * T[0]) + |
|
K[1] * (R[7] + inv_dist_N1 * T[2])); |
|
H[3] = ref_inv_K[0] * (K[2] * (R[3] + inv_dist_N0 * T[1]) + |
|
K[3] * (R[6] + inv_dist_N0 * T[2])); |
|
H[4] = ref_inv_K[2] * (K[2] * (R[4] + inv_dist_N1 * T[1]) + |
|
K[3] * (R[7] + inv_dist_N1 * T[2])); |
|
H[5] = K[2] * (R[5] + inv_dist_N2 * T[1]) + |
|
K[3] * (R[8] + inv_dist_N2 * T[2]) + |
|
ref_inv_K[1] * (K[2] * (R[3] + inv_dist_N0 * T[1]) + |
|
K[3] * (R[6] + inv_dist_N0 * T[2])) + |
|
ref_inv_K[3] * (K[2] * (R[4] + inv_dist_N1 * T[1]) + |
|
K[3] * (R[7] + inv_dist_N1 * T[2])); |
|
H[6] = ref_inv_K[0] * (R[6] + inv_dist_N0 * T[2]); |
|
H[7] = ref_inv_K[2] * (R[7] + inv_dist_N1 * T[2]); |
|
H[8] = R[8] + ref_inv_K[1] * (R[6] + inv_dist_N0 * T[2]) + |
|
ref_inv_K[3] * (R[7] + inv_dist_N1 * T[2]) + inv_dist_N2 * T[2]; |
|
__constant sampler_t ReadRefImageSampler = CLK_NORMALIZED_COORDS_FALSE | |
|
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; |
|
} |
|
static inline void ReadRefImageIntoSharedMemory(image2d_array_t ref_image, __local float* local_image, const int row, const int col, const int thread_id) { |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
if (row == 0) { |
|
int r = row - KWINDOWSIZE / 2; |
|
for (int i = 0; i < KWINDOWSIZE; ++i) { |
|
int c = col - LOCAL_MEM_WORK_GROUP_SIZE; |
|
for (int j = 0; j < 3; ++j) { |
|
local_image[thread_id + i * 3 * LOCAL_MEM_WORK_GROUP_SIZE + |
|
j * LOCAL_MEM_WORK_GROUP_SIZE] = read_imagef(ref_image, ReadRefImageSampler, (int4)(c, r, 0, 0)).x; |
|
c += LOCAL_MEM_WORK_GROUP_SIZE; |
|
} |
|
r += 1; |
|
} |
|
} else { |
|
for (int i = 1; i < KWINDOWSIZE; ++i) { |
|
for (int j = 0; j < 3; ++j) { |
|
local_image[thread_id + (i - 1) * 3 * LOCAL_MEM_WORK_GROUP_SIZE + |
|
j * LOCAL_MEM_WORK_GROUP_SIZE] = |
|
local_image[thread_id + i * 3 * LOCAL_MEM_WORK_GROUP_SIZE + |
|
j * LOCAL_MEM_WORK_GROUP_SIZE]; |
|
} |
|
} |
|
const int r = row + KWINDOWSIZE / 2; |
|
int c = col - LOCAL_MEM_WORK_GROUP_SIZE; |
|
const int i = KWINDOWSIZE - 1; |
|
for (int j = 0; j < 3; ++j) { |
|
local_image[thread_id + i * 3 * LOCAL_MEM_WORK_GROUP_SIZE + |
|
j * LOCAL_MEM_WORK_GROUP_SIZE] = read_imagef(ref_image, ReadRefImageSampler, (int4)(c, r, 0, 0)).x; |
|
c += LOCAL_MEM_WORK_GROUP_SIZE; |
|
} |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
} |
|
__constant sampler_t PccSrcImageSampler = CLK_NORMALIZED_COORDS_FALSE | |
|
CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR; |
|
static inline float pcc_compute(const float spatial_normalization_, const float color_normalization_, const float kMaxCost, int row, int col, float depth, const float* normal, const __local float* local_ref_image, float local_ref_sum, float local_ref_squared_sum, int src_image_idx, image2d_array_t src_images, image2d_array_t poses_images, __constant float * ref_inv_K) { |
|
float tform[9]; |
|
ComposeHomography(poses_images, src_image_idx, row, col, depth, normal, tform, ref_inv_K); |
|
float x_step[3], y_step[3]; |
|
for (int i=0; i<3; ++i) { |
|
x_step[i] = KWINDOWSTEP * tform[i*3]; |
|
y_step[i] = KWINDOWSTEP * tform[i*3+1]; |
|
} |
|
|
|
const int thread_id = get_local_id(0); |
|
const int row_start = row - KWINDOWRADIUS; |
|
const int col_start = col - KWINDOWRADIUS; |
|
float col_src = tform[0] * col_start + tform[1] * row_start + tform[2]; |
|
float row_src = tform[3] * col_start + tform[4] * row_start + tform[5]; |
|
float z = tform[6] * col_start + tform[7] * row_start + tform[8]; |
|
float base_col_src = col_src; |
|
float base_row_src = row_src; |
|
float base_z = z; |
|
int ref_image_idx = LOCAL_MEM_WORK_GROUP_SIZE - KWINDOWRADIUS + thread_id; |
|
int ref_image_base_idx = ref_image_idx; |
|
const float ref_center_color = |
|
local_ref_image[ref_image_idx + KWINDOWRADIUS * 3 * LOCAL_MEM_WORK_GROUP_SIZE + |
|
KWINDOWRADIUS]; |
|
const float ref_color_sum = local_ref_sum; |
|
const float ref_color_squared_sum = local_ref_squared_sum; |
|
float src_color_sum = 0.0f; |
|
float src_color_squared_sum = 0.0f; |
|
float src_ref_color_sum = 0.0f; |
|
float bilateral_weight_sum = 0.0f; |
|
for (int row = -KWINDOWRADIUS; row <= KWINDOWRADIUS; row += KWINDOWSTEP) { |
|
for (int col = -KWINDOWRADIUS; col <= KWINDOWRADIUS; col += KWINDOWSTEP) { |
|
const float inv_z = 1.0f / z; |
|
const float norm_col_src = inv_z * col_src + 0.5f; |
|
const float norm_row_src = inv_z * row_src + 0.5f; |
|
const float ref_color = local_ref_image[ref_image_idx]; |
|
const float src_color = read_imagef(src_images, PccSrcImageSampler, (float4)(norm_col_src, norm_row_src, src_image_idx, 0)).x; |
|
const float spatial_dist_squared = row * row + col * col; |
|
const float color_dist = ref_center_color - ref_color; |
|
const float bilateral_weight = exp(-spatial_dist_squared * spatial_normalization_ - |
|
color_dist * color_dist * color_normalization_); |
|
const float bilateral_weight_src = bilateral_weight * src_color; |
|
src_color_sum += bilateral_weight_src; |
|
src_color_squared_sum += bilateral_weight_src * src_color; |
|
src_ref_color_sum += bilateral_weight_src * ref_color; |
|
bilateral_weight_sum += bilateral_weight; |
|
ref_image_idx += KWINDOWSTEP; |
|
col_src += x_step[0]; |
|
row_src += x_step[1]; |
|
z += x_step[2]; |
|
} |
|
ref_image_base_idx += KWINDOWSTEP * 3 * LOCAL_MEM_WORK_GROUP_SIZE; |
|
ref_image_idx = ref_image_base_idx; |
|
base_col_src += y_step[0]; |
|
base_row_src += y_step[1]; |
|
base_z += y_step[2]; |
|
col_src = base_col_src; |
|
row_src = base_row_src; |
|
z = base_z; |
|
} |
|
const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum; |
|
src_color_sum *= inv_bilateral_weight_sum; |
|
src_color_squared_sum *= inv_bilateral_weight_sum; |
|
src_ref_color_sum *= inv_bilateral_weight_sum; |
|
const float ref_color_var = |
|
ref_color_squared_sum - ref_color_sum * ref_color_sum; |
|
const float src_color_var = |
|
src_color_squared_sum - src_color_sum * src_color_sum; |
|
const float kMinVar = 1e-5f; |
|
float firstCost = 0.0f; |
|
if (ref_color_var < kMinVar || src_color_var < kMinVar) { |
|
return kMaxCost; |
|
} else { |
|
const float src_ref_color_covar = |
|
src_ref_color_sum - ref_color_sum * src_color_sum; |
|
const float src_ref_color_var = sqrt(ref_color_var * src_color_var); |
|
return max(0.0f, min(kMaxCost, 1.0f - src_ref_color_covar / src_ref_color_var)); |
|
} |
|
} |
|
static inline void ComputePointAtDepth(const float row, const float col, const float depth, float point[3], __constant float * ref_inv_K) { |
|
point[0] = depth * (ref_inv_K[0] * col + ref_inv_K[1]); |
|
point[1] = depth * (ref_inv_K[2] * row + ref_inv_K[3]); |
|
point[2] = depth; |
|
__constant sampler_t SrcDepthMapImageSampler = CLK_NORMALIZED_COORDS_FALSE | |
|
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; |
|
} |
|
static inline float ComputeGeomConsistencyCost(const float row, const float col, const float depth, const int image_idx, const float max_cost, image2d_array_t poses_images, image2d_array_t src_depth_map_images, __constant float * ref_inv_K, __constant float * ref_K) |
|
{ |
|
float P[12]; |
|
for (int i = 0; i < 12; ++i) { |
|
P[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 19, image_idx, 0, 0)).x; |
|
float inv_P[12]; |
|
for (int i = 0; i < 12; ++i) { |
|
inv_P[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 31, image_idx, 0, 0)).x; |
|
float forward_point[3]; |
|
ComputePointAtDepth(row, col, depth, forward_point, ref_inv_K); |
|
const float inv_forward_z = |
|
1.0f / (P[8] * forward_point[0] + P[9] * forward_point[1] + |
|
P[10] * forward_point[2] + P[11]); |
|
float src_col = |
|
inv_forward_z * (P[0] * forward_point[0] + P[1] * forward_point[1] + |
|
P[2] * forward_point[2] + P[3]); |
|
float src_row = |
|
inv_forward_z * (P[4] * forward_point[0] + P[5] * forward_point[1] + |
|
P[6] * forward_point[2] + P[7]); |
|
const float src_depth = read_imagef(src_depth_map_images, SrcDepthMapImageSampler, (float4)(src_col + 0.5f, src_row + 0.5f, image_idx, 0)).x; |
|
if (src_depth == 0.0f) { |
|
return max_cost; |
|
} |
|
src_col *= src_depth; |
|
src_row *= src_depth; |
|
const float backward_point_x = |
|
inv_P[0] * src_col + inv_P[1] * src_row + inv_P[2] * src_depth + inv_P[3]; |
|
const float backward_point_y = |
|
inv_P[4] * src_col + inv_P[5] * src_row + inv_P[6] * src_depth + inv_P[7]; |
|
const float backward_point_z = inv_P[8] * src_col + inv_P[9] * src_row + |
|
inv_P[10] * src_depth + inv_P[11]; |
|
const float inv_backward_point_z = 1.0f / backward_point_z; |
|
const float backward_col = |
|
inv_backward_point_z * |
|
(ref_K[0] * backward_point_x + ref_K[1] * backward_point_z); |
|
const float backward_row = |
|
inv_backward_point_z * |
|
(ref_K[2] * backward_point_y + ref_K[3] * backward_point_z); |
|
const float diff_col = col - backward_col; |
|
const float diff_row = row - backward_row; |
|
return min(max_cost, sqrt(diff_col * diff_col + diff_row * diff_row)); |
|
} |
|
__attribute__((reqd_work_group_size(LOCAL_MEM_WORK_GROUP_SIZE, 1, 1))) |
|
__kernel void ComputeInitialCostKernel(__global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_depth, int normal_map_pitch, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, float sigma_spatial, float sigma_color, image2d_array_t ref_image, image2d_array_t src_images, image2d_array_t poses_images, __constant float *ref_inv_K, __local float *local_ref_image) { |
|
const int thread_id = get_local_id(0); |
|
const int col = get_global_id(0); |
|
const float spatial_normalization_ = (1.0f / (2.0f * sigma_spatial * sigma_spatial)); |
|
const float color_normalization_ = (1.0f / (2.0f * sigma_color * sigma_color)); |
|
const float pcc_computer_kMaxCost = 2.0f; |
|
const __local float * pcc_computer_local_ref_image = local_ref_image; |
|
int pcc_computer_row = 0; |
|
int pcc_computer_col = col; |
|
float pcc_computer_depth = 0.0f; |
|
float normal[3]; |
|
const float * pcc_computer_normal = normal; |
|
float pcc_computer_local_ref_sum = 0.0f; |
|
float pcc_computer_local_ref_squared_sum = 0.0f; |
|
int pcc_computer_src_image_idx = -1; |
|
for (int row = 0; row < cost_map_height; ++row) { |
|
ReadRefImageIntoSharedMemory(ref_image, local_ref_image, row, col, thread_id); |
|
if (col < cost_map_width) { |
|
pcc_computer_depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * row) + col); |
|
for (int slice = 0; slice < normal_map_depth; ++slice) { |
|
normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col); |
|
} |
|
pcc_computer_local_ref_sum = *((__global float*)((__global char*)ref_sum_image + ref_sum_pitch * row) + col); |
|
pcc_computer_local_ref_squared_sum = *((__global float*)((__global char*)ref_squared_sum_image + ref_squared_sum_pitch * row) + col); |
|
for (int image_idx = 0; image_idx < cost_map_depth; ++image_idx) { |
|
pcc_computer_src_image_idx = image_idx; |
|
float pcc_computer_result = pcc_compute(spatial_normalization_, color_normalization_, pcc_computer_kMaxCost, pcc_computer_row, pcc_computer_col, pcc_computer_depth, pcc_computer_normal, pcc_computer_local_ref_image, pcc_computer_local_ref_sum, pcc_computer_local_ref_squared_sum, pcc_computer_src_image_idx, src_images, poses_images, ref_inv_K); |
|
*((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col) = pcc_computer_result; |
|
} |
|
pcc_computer_row += 1; |
|
} |
|
} |
|
} |
|
typedef struct { |
|
unsigned int d, v[5]; |
|
} clrandState; |
|
__kernel void InitRandomStateKernel(__global clrandState *output, int width, int height, uint pitch) |
|
{ |
|
const uint row = get_global_id(1); |
|
const uint col = get_global_id(0); |
|
if (col < width && row < height) { |
|
const uint id = row*width + col; |
|
__global clrandState *state = |
|
(__global clrandState*)((__global char*)output + pitch*row) + col; |
|
state->v[0] = 123456789U; |
|
state->v[1] = 362436069U; |
|
state->v[2] = 521288629U; |
|
state->v[3] = 88675123U; |
|
state->v[4] = 5783321U; |
|
state->d = 6615241U; |
|
const uint s0 = id ^ 0x2c7f967fU; |
|
const uint s1 = 0 ^ 0xa03697cbU; |
|
const uint t0 = 1228688033U * s0; |
|
const uint t1 = 2073658381U * s1; |
|
state->v[0] += t0; |
|
state->v[1] ^= t0; |
|
state->v[2] += t1; |
|
state->v[3] ^= t1; |
|
state->v[4] += t0; |
|
state->d += t1 + t0; |
|
} |
|
} |
|
static inline float clrand_uniform(clrandState* state) |
|
{ |
|
const float uint_inc = 2.3283064e-10f; |
|
unsigned int t = state->v[0] ^ (state->v[0] >> 2); |
|
state->v[0] = state->v[1]; |
|
state->v[1] = state->v[2]; |
|
state->v[2] = state->v[3]; |
|
state->v[3] = state->v[4]; |
|
state->v[4] = (state->v[4] ^ (state->v[4] << 4)) ^ (t ^ (t << 1)); |
|
state->d += 362437; |
|
t = state->d + state->v[4]; |
|
return uint_inc + (t * uint_inc); |
|
} |
|
static inline void GenerateRandomNormal(const int row, const int col, clrandState* rand_state, float normal[3], __constant float * ref_inv_K) { |
|
float v1 = 0.0f; |
|
float v2 = 0.0f; |
|
float s = 2.0f; |
|
while (s >= 1.0f) { |
|
v1 = 2.0f * clrand_uniform(rand_state) - 1.0f; |
|
v2 = 2.0f * clrand_uniform(rand_state) - 1.0f; |
|
s = v1 * v1 + v2 * v2; |
|
} |
|
const float s_norm = sqrt(1.0f - s); |
|
normal[0] = 2.0f * v1 * s_norm; |
|
normal[1] = 2.0f * v2 * s_norm; |
|
normal[2] = 1.0f - 2.0f * s; |
|
const float view_ray[3] = {ref_inv_K[0] * col + ref_inv_K[1], ref_inv_K[2] * row + ref_inv_K[3], 1.0f}; |
|
if (DotProduct3(normal, view_ray) > 0) { |
|
normal[0] = -normal[0]; |
|
normal[1] = -normal[1]; |
|
normal[2] = -normal[2]; |
|
} |
|
} |
|
static inline float GenerateRandomDepth(const float depth_min, const float depth_max, clrandState* rand_state) { |
|
return clrand_uniform(rand_state) * (depth_max - depth_min) + depth_min; |
|
} |
|
static inline float PerturbDepth(const float perturbation, const float depth, clrandState* rand_state) { |
|
const float depth_min = (1.0f - perturbation) * depth; |
|
const float depth_max = (1.0f + perturbation) * depth; |
|
return GenerateRandomDepth(depth_min, depth_max, rand_state); |
|
} |
|
static inline void PerturbNormal(const int row, const int col, float perturbation, const float normal[3], clrandState* rand_state, float perturbed_normal[3], __constant float * ref_inv_K) { |
|
int num_trials = 0; |
|
do { |
|
const float a1 = (clrand_uniform(rand_state) - 0.5f) * perturbation; |
|
const float a2 = (clrand_uniform(rand_state) - 0.5f) * perturbation; |
|
const float a3 = (clrand_uniform(rand_state) - 0.5f) * perturbation; |
|
const float sin_a1 = sin(a1); |
|
const float sin_a2 = sin(a2); |
|
const float sin_a3 = sin(a3); |
|
const float cos_a1 = cos(a1); |
|
const float cos_a2 = cos(a2); |
|
const float cos_a3 = cos(a3); |
|
float R[9]; |
|
R[0] = cos_a2 * cos_a3; |
|
R[1] = -cos_a2 * sin_a3; |
|
R[2] = sin_a2; |
|
R[3] = cos_a1 * sin_a3 + cos_a3 * sin_a1 * sin_a2; |
|
R[4] = cos_a1 * cos_a3 - sin_a1 * sin_a2 * sin_a3; |
|
R[5] = -cos_a2 * sin_a1; |
|
R[6] = sin_a1 * sin_a3 - cos_a1 * cos_a3 * sin_a2; |
|
R[7] = cos_a3 * sin_a1 + cos_a1 * sin_a2 * sin_a3; |
|
R[8] = cos_a1 * cos_a2; |
|
Mat33DotVec3(R, normal, perturbed_normal); |
|
const float view_ray[3] = {ref_inv_K[0] * col + ref_inv_K[1], ref_inv_K[2] * row + ref_inv_K[3], 1.0f}; |
|
if (DotProduct3(perturbed_normal, view_ray) < 0.0f) { |
|
const float inv_norm = rsqrt(DotProduct3(perturbed_normal, perturbed_normal)); |
|
perturbed_normal[0] *= inv_norm; |
|
perturbed_normal[1] *= inv_norm; |
|
perturbed_normal[2] *= inv_norm; |
|
return; |
|
} |
|
perturbation *= 0.5f; |
|
++num_trials; |
|
} |
|
while (num_trials < 4); |
|
perturbed_normal[0] = normal[0]; |
|
perturbed_normal[1] = normal[1]; |
|
perturbed_normal[2] = normal[2]; |
|
} |
|
static inline float PropagateDepth(__constant float * ref_inv_K, const float depth1, const float normal1[3], const float row1, const float row2) { |
|
const float x1 = depth1 * (ref_inv_K[2] * row1 + ref_inv_K[3]); |
|
const float y1 = depth1; |
|
const float x2 = x1 + normal1[2]; |
|
const float y2 = y1 - normal1[1]; |
|
const float x4 = ref_inv_K[2] * row2 + ref_inv_K[3]; |
|
const float denom = x2 - x1 + x4 * (y1 - y2); |
|
const float kEps = 1e-5f; |
|
if (fabs(denom) < kEps) { |
|
return depth1; |
|
} |
|
const float nom = y1 * x2 - x1 * y2; |
|
return nom / denom; |
|
} |
|
static inline float likelihood_computer_ComputeNCCProb(const float cost, float likelihood_computer_inv_ncc_sigma_square_, float likelihood_computer_ncc_norm_factor_) { |
|
return exp(cost * cost * likelihood_computer_inv_ncc_sigma_square_) * likelihood_computer_ncc_norm_factor_; |
|
} |
|
static inline float likelihood_computer_ComputeTriProb(const float cos_triangulation_angle, float likelihood_computer_cos_min_triangulation_angle_) { |
|
const float abs_cos_triangulation_angle = fabs(cos_triangulation_angle); |
|
if (abs_cos_triangulation_angle > likelihood_computer_cos_min_triangulation_angle_) { |
|
const float scaled = 1.0f - (1.0f - abs_cos_triangulation_angle) / |
|
(1.0f - likelihood_computer_cos_min_triangulation_angle_); |
|
const float likelihood = 1.0f - scaled * scaled; |
|
return min(1.0f, max(0.0f, likelihood)); |
|
} else { |
|
return 1.0f; |
|
} |
|
} |
|
static inline float likelihood_computer_ComputeIncProb(const float cos_incident_angle, float likelihood_computer_inv_incident_angle_sigma_square_) { |
|
const float x = 1.0f - max(0.0f, cos_incident_angle); |
|
return exp(x * x * likelihood_computer_inv_incident_angle_sigma_square_); |
|
} |
|
static inline float likelihood_computer_ComputeSelProb(const float alpha, const float beta, const float prev, const float prev_weight) { |
|
const float zn0 = (1.0f - alpha) * (1.0f - beta); |
|
const float zn1 = alpha * beta; |
|
const float curr = zn1 / (zn0 + zn1); |
|
return prev_weight * prev + (1.0f - prev_weight) * curr; |
|
} |
|
static inline float likelihood_computer_ComputeResolutionProb(const float H[9], const float row, const float col) { |
|
float src1[2]; |
|
const float ref1[2] = {col - KWINDOWRADIUS, row - KWINDOWRADIUS}; |
|
Mat33DotVec3Homogeneous(H, ref1, src1); |
|
float src2[2]; |
|
const float ref2[2] = {col - KWINDOWRADIUS, row + KWINDOWRADIUS}; |
|
Mat33DotVec3Homogeneous(H, ref2, src2); |
|
float src3[2]; |
|
const float ref3[2] = {col + KWINDOWRADIUS, row + KWINDOWRADIUS}; |
|
Mat33DotVec3Homogeneous(H, ref3, src3); |
|
float src4[2]; |
|
const float ref4[2] = {col + KWINDOWRADIUS, row - KWINDOWRADIUS}; |
|
Mat33DotVec3Homogeneous(H, ref4, src4); |
|
const float ref_area = KWINDOWSIZE * KWINDOWSIZE; |
|
const float src_area = |
|
fabs(0.5f * (src1[0] * src2[1] - src2[0] * src1[1] - src1[0] * src4[1] + |
|
src2[0] * src3[1] - src3[0] * src2[1] + src4[0] * src1[1] + |
|
src3[0] * src4[1] - src4[0] * src3[1])); |
|
if (ref_area > src_area) { |
|
return src_area / ref_area; |
|
} else { |
|
return ref_area / src_area; |
|
} |
|
} |
|
static inline float likelihood_computer_ComputeMessage(bool kForward, const float cost, const float prev, float likelihood_computer_inv_ncc_sigma_square_, float likelihood_computer_ncc_norm_factor_) { |
|
const float kUniformProb = 0.5f; |
|
const float kNoChangeProb = 0.99999f; |
|
const float kChangeProb = 1.0f - kNoChangeProb; |
|
const float emission = likelihood_computer_ComputeNCCProb(cost, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_); |
|
float zn0; |
|
float zn1; |
|
if (kForward) { |
|
zn0 = (prev * kChangeProb + (1.0f - prev) * kNoChangeProb) * kUniformProb; |
|
zn1 = (prev * kNoChangeProb + (1.0f - prev) * kChangeProb) * emission; |
|
} else { |
|
zn0 = prev * emission * kChangeProb + |
|
(1.0f - prev) * kUniformProb * kNoChangeProb; |
|
zn1 = prev * emission * kNoChangeProb + |
|
(1.0f - prev) * kUniformProb * kChangeProb; |
|
} |
|
return zn1 / (zn0 + zn1); |
|
} |
|
static inline float likelihood_computer_ComputeForwardMessage(const float cost, const float prev, float likelihood_computer_inv_ncc_sigma_square_, float likelihood_computer_ncc_norm_factor_) { |
|
return likelihood_computer_ComputeMessage(true, cost, prev, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_); |
|
} |
|
static inline float likelihood_computer_ComputeBackwardMessage(const float cost, const float prev, float likelihood_computer_inv_ncc_sigma_square_, float likelihood_computer_ncc_norm_factor_) { |
|
return likelihood_computer_ComputeMessage(false, cost, prev, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_); |
|
} |
|
static inline float likelihood_computer_ComputeNCCCostNormFactor(const float ncc_sigma) { |
|
return 2.0f / (sqrt(2.0f * M_PI_F) * ncc_sigma * |
|
erf(2.0f / (ncc_sigma * 1.414213562f))); |
|
} |
|
typedef struct { |
|
float depth; |
|
float normal[3]; |
|
} ParamState; |
|
__kernel void ComputeBackwardMessagesKernel(__global float *global_workspace, int global_workspace_height, __global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, float ncc_sigma) |
|
{ |
|
const int col = get_global_id(0); |
|
const float kUniformProb = 0.5f; |
|
float likelihood_computer_inv_ncc_sigma_square_ = -0.5f / (ncc_sigma * ncc_sigma); |
|
float likelihood_computer_ncc_norm_factor_ = likelihood_computer_ComputeNCCCostNormFactor(ncc_sigma); |
|
__global float* forward_message = &global_workspace[col * global_workspace_height]; |
|
if (col < cost_map_width) { |
|
for (int image_idx = 0; image_idx < cost_map_depth; ++image_idx) { |
|
float beta = kUniformProb; |
|
for (int row = cost_map_height - 1; row >= 0; --row) { |
|
const float cost = *((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col); |
|
beta = likelihood_computer_ComputeBackwardMessage(cost, beta, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_); |
|
*((__global float*)((__global char*)sel_prob_map + sel_prob_map_pitch * (image_idx * sel_prob_map_height + row)) + col) = beta; |
|
} |
|
forward_message[image_idx] = kUniformProb; |
|
} |
|
} |
|
} |
|
__kernel void HypothesesAndProbsKernel(int row, __global float *global_workspace, int global_workspace_width, int global_workspace_height, __global float *hypotheses_workspace, int hypotheses_workspace_height, __global clrandState *rand_state_map, __global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, __global float *prev_sel_prob_map, int prev_sel_prob_map_height, int prev_sel_prob_map_pitch, float min_triangulation_angle, float incident_angle_sigma, float ncc_sigma, float perturbation, float prev_sel_prob_weight, image2d_array_t poses_images, |
|
#if HYPOTHESIS_NUMBER > 0 |
|
__constant float *ref_inv_K, __global float *scaledfilt1_depth_map, __global float *scaledfilt1_normal_map, __global float *scaledfilt2_depth_map, __global float *scaledfilt2_normal_map) |
|
#else |
|
__constant float *ref_inv_K) |
|
#endif |
|
{ |
|
const int col = get_global_id(0); |
|
if (col >= cost_map_width) |
|
return; |
|
ParamState prev_param_state; |
|
ParamState curr_param_state; |
|
ParamState rand_param_state; |
|
curr_param_state.depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * row) + col); |
|
for (int slice = 0; slice < 3; ++slice) |
|
curr_param_state.normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col); |
|
if (row == 0) { |
|
prev_param_state.depth = *(depth_map + col); |
|
for (int slice = 0; slice < 3; ++slice) |
|
prev_param_state.normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + 0)) + col); |
|
else { |
|
prev_param_state.depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * (row-1)) + col); |
|
for (int slice = 0; slice < 3; ++slice) |
|
prev_param_state.normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + (row-1))) + col); |
|
prev_param_state.depth = PropagateDepth(ref_inv_K, prev_param_state.depth, prev_param_state.normal, row - 1, row); |
|
clrandState rand_state = *(rand_state_map + col); |
|
rand_param_state.depth = PerturbDepth(perturbation, curr_param_state.depth, &rand_state); |
|
PerturbNormal(row, col, perturbation * M_PI_F, curr_param_state.normal, &rand_state, rand_param_state.normal, ref_inv_K); |
|
*(rand_state_map + col) = rand_state; |
|
#define SET_HYPOTHESIS(h, slice) (hypotheses_workspace[(slice)*global_workspace_width*hypotheses_workspace_height + (h)*global_workspace_width + col]) |
|
SET_HYPOTHESIS(0, 0) = prev_param_state.depth; |
|
SET_HYPOTHESIS(0, 1) = prev_param_state.normal[0]; |
|
SET_HYPOTHESIS(0, 2) = prev_param_state.normal[1]; |
|
SET_HYPOTHESIS(0, 3) = prev_param_state.normal[2]; |
|
SET_HYPOTHESIS(1, 0) = rand_param_state.depth; |
|
SET_HYPOTHESIS(1, 1) = curr_param_state.normal[0]; |
|
SET_HYPOTHESIS(1, 2) = curr_param_state.normal[1]; |
|
SET_HYPOTHESIS(1, 3) = curr_param_state.normal[2]; |
|
SET_HYPOTHESIS(2, 0) = curr_param_state.depth; |
|
SET_HYPOTHESIS(2, 1) = rand_param_state.normal[0]; |
|
SET_HYPOTHESIS(2, 2) = rand_param_state.normal[1]; |
|
SET_HYPOTHESIS(2, 3) = rand_param_state.normal[2]; |
|
SET_HYPOTHESIS(3, 0) = rand_param_state.depth; |
|
SET_HYPOTHESIS(3, 1) = rand_param_state.normal[0]; |
|
SET_HYPOTHESIS(3, 2) = rand_param_state.normal[1]; |
|
SET_HYPOTHESIS(3, 3) = rand_param_state.normal[2]; |
|
#if HYPOTHESIS_NUMBER > 0 |
|
ParamState scaledfilt1_param_state; |
|
ParamState scaledfilt2_param_state; |
|
scaledfilt1_param_state.depth = *((__global float*)((__global char*)scaledfilt1_depth_map + depth_map_pitch * row) + col); |
|
for (int slice = 0; slice < 3; ++slice) |
|
scaledfilt1_param_state.normal[slice] = *((__global float*)((__global char*)scaledfilt1_normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col); |
|
scaledfilt2_param_state.depth = *((__global float*)((__global char*)scaledfilt2_depth_map + depth_map_pitch * row) + col); |
|
for (int slice = 0; slice < 3; ++slice) |
|
scaledfilt2_param_state.normal[slice] = *((__global float*)((__global char*)scaledfilt2_normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col); |
|
int hyp = 4; |
|
SET_HYPOTHESIS(hyp, 0) = (curr_param_state.depth + prev_param_state.depth)/2.0f; |
|
SET_HYPOTHESIS(hyp, 1) = prev_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = prev_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = prev_param_state.normal[2]; |
|
++hyp; |
|
float prev1_depth, prev2_depth; |
|
if (row < 2) { |
|
prev1_depth = *(depth_map + col); |
|
prev2_depth = prev1_depth; |
|
else { |
|
prev1_depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * (row-1)) + col); |
|
prev2_depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * (row-2)) + col); |
|
SET_HYPOTHESIS(hyp, 0) = 2*prev1_depth - prev2_depth; |
|
SET_HYPOTHESIS(hyp, 1) = prev_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = prev_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = prev_param_state.normal[2]; |
|
++hyp; |
|
#if HYPOTHESIS_NUMBER > 1 |
|
const float fuzzy5 = GenerateRandomDepth(curr_param_state.depth - 0.005f, curr_param_state.depth + 0.005f, rand_state); |
|
SET_HYPOTHESIS(hyp, 0) = fuzzy5; |
|
SET_HYPOTHESIS(hyp, 1) = curr_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = curr_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = curr_param_state.normal[2]; |
|
++hyp; |
|
SET_HYPOTHESIS(hyp, 0) = fuzzy5; |
|
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2]; |
|
++hyp; |
|
const float fuzzy10 = GenerateRandomDepth(curr_param_state.depth - 0.01f, curr_param_state.depth + 0.01f, rand_state); |
|
SET_HYPOTHESIS(hyp, 0) = fuzzy10; |
|
SET_HYPOTHESIS(hyp, 1) = curr_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = curr_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = curr_param_state.normal[2]; |
|
++hyp; |
|
SET_HYPOTHESIS(hyp, 0) = fuzzy10; |
|
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2]; |
|
++hyp; |
|
const float fuzzy20 = GenerateRandomDepth(curr_param_state.depth - 0.02f, curr_param_state.depth + 0.02f, rand_state); |
|
SET_HYPOTHESIS(hyp, 0) = fuzzy20; |
|
SET_HYPOTHESIS(hyp, 1) = curr_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = curr_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = curr_param_state.normal[2]; |
|
++hyp; |
|
SET_HYPOTHESIS(hyp, 0) = fuzzy20; |
|
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2]; |
|
++hyp; |
|
*(rand_state_map + col) = rand_state; |
|
#endif |
|
SET_HYPOTHESIS(hyp, 0) = scaledfilt1_param_state.depth; |
|
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2]; |
|
++hyp; |
|
SET_HYPOTHESIS(hyp, 0) = curr_param_state.depth; |
|
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2]; |
|
++hyp; |
|
SET_HYPOTHESIS(hyp, 0) = scaledfilt2_param_state.depth; |
|
SET_HYPOTHESIS(hyp, 1) = scaledfilt2_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = scaledfilt2_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = scaledfilt2_param_state.normal[2]; |
|
++hyp; |
|
SET_HYPOTHESIS(hyp, 0) = curr_param_state.depth; |
|
SET_HYPOTHESIS(hyp, 1) = scaledfilt2_param_state.normal[0]; |
|
SET_HYPOTHESIS(hyp, 2) = scaledfilt2_param_state.normal[1]; |
|
SET_HYPOTHESIS(hyp, 3) = scaledfilt2_param_state.normal[2]; |
|
++hyp; |
|
SET_HYPOTHESIS(hyp, 0) = 0.0f; |
|
SET_HYPOTHESIS(hyp, 1) = 0.0f; |
|
SET_HYPOTHESIS(hyp, 2) = 0.0f; |
|
SET_HYPOTHESIS(hyp, 3) = 0.0f; |
|
++hyp; |
|
#endif |
|
#undef SET_HYPOTHESIS |
|
float likelihood_computer_cos_min_triangulation_angle_ = cos(min_triangulation_angle); |
|
float likelihood_computer_inv_incident_angle_sigma_square_ = -0.5f / (incident_angle_sigma * incident_angle_sigma) ; |
|
float likelihood_computer_inv_ncc_sigma_square_ = -0.5f / (ncc_sigma * ncc_sigma); |
|
float likelihood_computer_ncc_norm_factor_ = likelihood_computer_ComputeNCCCostNormFactor(ncc_sigma); |
|
__global float* forward_message = &global_workspace[col * global_workspace_height]; |
|
__global float* sampling_probs = &global_workspace[global_workspace_width * |
|
global_workspace_height + |
|
col * global_workspace_height]; |
|
float point[3]; |
|
ComputePointAtDepth(row, col, curr_param_state.depth, point, ref_inv_K); |
|
for (int image_idx = 0; image_idx < cost_map_depth; ++image_idx) { |
|
const float cost = *((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col); |
|
const float alpha = likelihood_computer_ComputeForwardMessage(cost, forward_message[image_idx], likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_); |
|
const float beta = *((__global float*)((__global char*)sel_prob_map + sel_prob_map_pitch * (image_idx * sel_prob_map_height + row)) + col); |
|
const float prev_prob = *((__global float*)((__global char*)prev_sel_prob_map + prev_sel_prob_map_pitch * (image_idx * prev_sel_prob_map_height + row)) + col); |
|
const float sel_prob = likelihood_computer_ComputeSelProb(alpha, beta, prev_prob, prev_sel_prob_weight); |
|
float cos_triangulation_angle; |
|
float cos_incident_angle; |
|
ComputeViewingAngles(point, curr_param_state.normal, image_idx, cos_triangulation_angle, &cos_incident_angle, poses_images); |
|
const float tri_prob = |
|
likelihood_computer_ComputeTriProb(cos_triangulation_angle, likelihood_computer_cos_min_triangulation_angle_); |
|
const float inc_prob = |
|
likelihood_computer_ComputeIncProb(cos_incident_angle, likelihood_computer_inv_incident_angle_sigma_square_); |
|
float H[9]; |
|
ComposeHomography(poses_images, image_idx, row, col, curr_param_state.depth, curr_param_state.normal, H, ref_inv_K); |
|
const float res_prob = |
|
likelihood_computer_ComputeResolutionProb(H, row, col); |
|
sampling_probs[image_idx] = sel_prob * tri_prob * inc_prob * res_prob; |
|
__constant sampler_t RefImageSampler = CLK_NORMALIZED_COORDS_FALSE | |
|
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; |
|
__constant sampler_t SrcImageSampler = CLK_NORMALIZED_COORDS_FALSE | |
|
CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR; |
|
#ifdef NO_LOCAL_MEMORY |
|
__kernel void PccComputeKernel(float spatial_normalization_, float color_normalization_, int row, int width, int workspaceWidth, __global float *hypotheses_workspace, __global float *pcc_costs, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, image2d_array_t ref_image, image2d_array_t src_images, image2d_array_t poses_images, __constant float *ref_inv_K) |
|
const int col = get_global_id(0); |
|
if (col >= width) |
|
return; |
|
const int src_image_idx = get_global_id(1); |
|
const int num_src_images = get_global_size(1); |
|
const int hypothesis = get_global_id(2); |
|
const int num_hypotheses = get_global_size(2); |
|
float depth = hypotheses_workspace[hypothesis*workspaceWidth + col]; |
|
float normal[3]; |
|
for (int i=0; i<3; ++i) |
|
normal[i] = hypotheses_workspace[(i+1)*workspaceWidth*num_hypotheses + hypothesis*workspaceWidth + col]; |
|
float tform[9]; |
|
ComposeHomography(poses_images, src_image_idx, row, col, depth, normal, tform, ref_inv_K); |
|
float x_step[3], y_step[3]; |
|
for (int i=0; i<3; ++i) { |
|
x_step[i] = KWINDOWSTEP * tform[i*3]; |
|
y_step[i] = KWINDOWSTEP * tform[i*3+1]; |
|
|
|
const int row_start = row - KWINDOWRADIUS; |
|
const int col_start = col - KWINDOWRADIUS; |
|
float col_src = tform[0] * col_start + tform[1] * row_start + tform[2]; |
|
float row_src = tform[3] * col_start + tform[4] * row_start + tform[5]; |
|
float z = tform[6] * col_start + tform[7] * row_start + tform[8]; |
|
float base_col_src = col_src; |
|
float base_row_src = row_src; |
|
float base_z = z; |
|
const float ref_center_color = |
|
read_imagef(ref_image, RefImageSampler, (int4)(col, row, 0, 0)).x; |
|
const float ref_color_sum = *((__global float*)((__global char*)ref_sum_image + ref_sum_pitch * row) + col); |
|
const float ref_color_squared_sum = *((__global float*)((__global char*)ref_squared_sum_image + ref_squared_sum_pitch * row) + col); |
|
float src_color_sum = 0.0f; |
|
float src_color_squared_sum = 0.0f; |
|
float src_ref_color_sum = 0.0f; |
|
float bilateral_weight_sum = 0.0f; |
|
for (int r = -KWINDOWRADIUS; r <= KWINDOWRADIUS; r += KWINDOWSTEP) { |
|
for (int c = -KWINDOWRADIUS; c <= KWINDOWRADIUS; c += KWINDOWSTEP) { |
|
const float inv_z = 1.0f / z; |
|
const float norm_col_src = inv_z * col_src + 0.5f; |
|
const float norm_row_src = inv_z * row_src + 0.5f; |
|
const float ref_color = read_imagef(ref_image, RefImageSampler, (int4)(col+c, row+r, 0, 0)).x; |
|
const float src_color = read_imagef(src_images, SrcImageSampler, (float4)(norm_col_src, norm_row_src, src_image_idx, 0)).x; |
|
const float spatial_dist_squared = r*r + c*c; |
|
const float color_dist = ref_center_color - ref_color; |
|
const float bilateral_weight = exp(-spatial_dist_squared * spatial_normalization_ - |
|
color_dist * color_dist * color_normalization_); |
|
const float bilateral_weight_src = bilateral_weight * src_color; |
|
src_color_sum += bilateral_weight_src; |
|
src_color_squared_sum += bilateral_weight_src * src_color; |
|
src_ref_color_sum += bilateral_weight_src * ref_color; |
|
bilateral_weight_sum += bilateral_weight; |
|
col_src += x_step[0]; |
|
row_src += x_step[1]; |
|
z += x_step[2]; |
|
} |
|
base_col_src += y_step[0]; |
|
base_row_src += y_step[1]; |
|
base_z += y_step[2]; |
|
col_src = base_col_src; |
|
row_src = base_row_src; |
|
z = base_z; |
|
const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum; |
|
src_color_sum *= inv_bilateral_weight_sum; |
|
src_color_squared_sum *= inv_bilateral_weight_sum; |
|
src_ref_color_sum *= inv_bilateral_weight_sum; |
|
const float ref_color_var = |
|
ref_color_squared_sum - ref_color_sum * ref_color_sum; |
|
const float src_color_var = |
|
src_color_squared_sum - src_color_sum * src_color_sum; |
|
const float kMinVar = 1e-5f; |
|
const float kMaxCost = 2.0f; |
|
float retVal; |
|
if (ref_color_var < kMinVar || src_color_var < kMinVar) { |
|
retVal = kMaxCost; |
|
} else { |
|
const float src_ref_color_covar = |
|
src_ref_color_sum - ref_color_sum * src_color_sum; |
|
const float src_ref_color_var = sqrt(ref_color_var * src_color_var); |
|
retVal = max(0.0f, min(kMaxCost, 1.0f - src_ref_color_covar / src_ref_color_var)); |
|
pcc_costs[workspaceWidth*num_src_images*hypothesis + workspaceWidth*src_image_idx + col] = retVal; |
|
#else |
|
#if KWINDOWRADIUS > LOCAL_MEM_WORK_GROUP_SIZE/2 |
|
#define LOCAL_MEM_WORK_GROUP_SHIFT LOCAL_MEM_WORK_GROUP_SIZE |
|
#else |
|
#define LOCAL_MEM_WORK_GROUP_SHIFT LOCAL_MEM_WORK_GROUP_SIZE/2 |
|
#endif |
|
__kernel void PccComputeKernel(float spatial_normalization_, float color_normalization_, int row, int width, int workspaceWidth, __global float *hypotheses_workspace, __global float *pcc_costs, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, image2d_array_t ref_image, image2d_array_t src_images, image2d_array_t poses_images, __constant float *ref_inv_K, __local float *local_ref_image) |
|
{ |
|
const int col = get_global_id(0); |
|
const int thread = get_local_id(0); |
|
const bool valid = col < width; |
|
const int src_image_idx = get_global_id(1); |
|
const int num_src_images = get_global_size(1); |
|
const int hypothesis = get_global_id(2); |
|
const int num_hypotheses = get_global_size(2); |
|
float col_src, row_src, z; |
|
float base_col_src, base_row_src, base_z; |
|
float x_step[3], y_step[3]; |
|
float ref_center_color, ref_color_sum, ref_color_squared_sum; |
|
float src_color_sum = 0.0f; |
|
float src_color_squared_sum = 0.0f; |
|
float src_ref_color_sum = 0.0f; |
|
float bilateral_weight_sum = 0.0f; |
|
if (valid) { |
|
float depth = hypotheses_workspace[hypothesis*workspaceWidth + col]; |
|
float normal[3]; |
|
for (int i=0; i<3; ++i) |
|
normal[i] = hypotheses_workspace[(i+1)*workspaceWidth*num_hypotheses + hypothesis*workspaceWidth + col]; |
|
float tform[9]; |
|
ComposeHomography(poses_images, src_image_idx, row, col, depth, normal, tform, ref_inv_K); |
|
for (int i=0; i<3; ++i) { |
|
x_step[i] = KWINDOWSTEP * tform[i*3]; |
|
y_step[i] = KWINDOWSTEP * tform[i*3+1]; |
|
} |
|
const int row_start = row - KWINDOWRADIUS; |
|
const int col_start = col - KWINDOWRADIUS; |
|
col_src = tform[0] * col_start + tform[1] * row_start + tform[2]; |
|
row_src = tform[3] * col_start + tform[4] * row_start + tform[5]; |
|
z = tform[6] * col_start + tform[7] * row_start + tform[8]; |
|
base_col_src = col_src; |
|
base_row_src = row_src; |
|
base_z = z; |
|
ref_center_color = read_imagef(ref_image, RefImageSampler, (int4)(col, row, 0, 0)).x; |
|
ref_color_sum = *((__global float*)((__global char*)ref_sum_image + ref_sum_pitch * row) + col); |
|
ref_color_squared_sum = *((__global float*)((__global char*)ref_squared_sum_image + ref_squared_sum_pitch * row) + col); |
|
} |
|
for (int r = -KWINDOWRADIUS; r <= KWINDOWRADIUS; r += KWINDOWSTEP) { |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
local_ref_image[thread] = |
|
read_imagef(ref_image, RefImageSampler, (int4)(col-LOCAL_MEM_WORK_GROUP_SHIFT, row+r, 0, 0)).x; |
|
local_ref_image[thread+2*LOCAL_MEM_WORK_GROUP_SHIFT] = |
|
read_imagef(ref_image, RefImageSampler, (int4)(col+LOCAL_MEM_WORK_GROUP_SHIFT, row+r, 0, 0)).x; |
|
#if KWINDOWRADIUS > LOCAL_MEM_WORK_GROUP_SIZE/2 |
|
local_ref_image[thread+LOCAL_MEM_WORK_GROUP_SIZE] = |
|
read_imagef(ref_image, RefImageSampler, (int4)(col, row+r, 0, 0)).x; |
|
#endif |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
if (valid) { |
|
for (int c = -KWINDOWRADIUS; c <= KWINDOWRADIUS; c += KWINDOWSTEP) { |
|
const float inv_z = 1.0f / z; |
|
const float norm_col_src = inv_z * col_src + 0.5f; |
|
const float norm_row_src = inv_z * row_src + 0.5f; |
|
const float ref_color = local_ref_image[thread+LOCAL_MEM_WORK_GROUP_SHIFT+c]; |
|
const float src_color = read_imagef(src_images, SrcImageSampler, (float4)(norm_col_src, norm_row_src, src_image_idx, 0)).x; |
|
const float spatial_dist_squared = r*r + c*c; |
|
const float color_dist = ref_center_color - ref_color; |
|
const float bilateral_weight = exp(-spatial_dist_squared * spatial_normalization_ - color_dist * color_dist * color_normalization_); |
|
const float bilateral_weight_src = bilateral_weight * src_color; |
|
src_color_sum += bilateral_weight_src; |
|
src_color_squared_sum += bilateral_weight_src * src_color; |
|
src_ref_color_sum += bilateral_weight_src * ref_color; |
|
bilateral_weight_sum += bilateral_weight; |
|
col_src += x_step[0]; |
|
row_src += x_step[1]; |
|
z += x_step[2]; |
|
} |
|
base_col_src += y_step[0]; |
|
base_row_src += y_step[1]; |
|
base_z += y_step[2]; |
|
col_src = base_col_src; |
|
row_src = base_row_src; |
|
z = base_z; |
|
} |
|
} |
|
if (!valid) |
|
return; |
|
const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum; |
|
src_color_sum *= inv_bilateral_weight_sum; |
|
src_color_squared_sum *= inv_bilateral_weight_sum; |
|
src_ref_color_sum *= inv_bilateral_weight_sum; |
|
const float ref_color_var = |
|
ref_color_squared_sum - ref_color_sum * ref_color_sum; |
|
const float src_color_var = |
|
src_color_squared_sum - src_color_sum * src_color_sum; |
|
const float kMinVar = 1e-5f; |
|
const float kMaxCost = 2.0f; |
|
float retVal; |
|
if (ref_color_var < kMinVar || src_color_var < kMinVar) { |
|
retVal = kMaxCost; |
|
} else { |
|
const float src_ref_color_covar = |
|
src_ref_color_sum - ref_color_sum * src_color_sum; |
|
const float src_ref_color_var = sqrt(ref_color_var * src_color_var); |
|
retVal = max(0.0f, min(kMaxCost, 1.0f - src_ref_color_covar / src_ref_color_var)); |
|
} |
|
pcc_costs[workspaceWidth*num_src_images*hypothesis + workspaceWidth*src_image_idx + col] = retVal; |
|
} |
|
#endif |
|
__kernel void MinimumCostKernel(int row, int width, int num_src_images, int num_hypotheses, int kGeomConsistencyTerm, __global float *global_workspace, int workspaceWidth, __global float *pcc_costs, __global float *hypotheses_workspace, __global float *cost_map, int cost_map_height, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, __global float *prev_sel_prob_map, int prev_sel_prob_map_height, int prev_sel_prob_map_pitch, float prev_sel_prob_weight, float ncc_sigma, float geom_consistency_regularizer, float geom_consistency_max_cost, image2d_array_t src_depth_map_images, image2d_array_t poses_images, __constant float *ref_inv_K, |
|
#ifdef HYPOTHESES_STATISTICS |
|
__constant float *ref_K, __global unsigned int *hypotheses_stats) |
|
#else |
|
__constant float *ref_K) |
|
#endif |
|
{ |
|
const int col = get_global_id(0); |
|
if (col >= width) |
|
return; |
|
float likelihood_computer_inv_ncc_sigma_square_ = -0.5f / (ncc_sigma * ncc_sigma); |
|
float likelihood_computer_ncc_norm_factor_ = likelihood_computer_ComputeNCCCostNormFactor(ncc_sigma); |
|
__global float* forward_message = &global_workspace[col * num_src_images]; |
|
__global float* sampling_probs = &global_workspace[workspaceWidth * num_src_images + col * num_src_images]; |
|
float best_cost = 0.0f; |
|
int min_cost_idx = 0; |
|
for (int a=0; a<num_src_images; ++a) { |
|
const float sampling_weight = sampling_probs[a]; |
|
best_cost += *((__global float*)((__global char*)cost_map + cost_map_pitch * (a * cost_map_height + row)) + col) * sampling_weight; |
|
if (kGeomConsistencyTerm) { |
|
float currDepth = *((__global float*)((__global char*)depth_map + depth_map_pitch * row) + col); |
|
best_cost += geom_consistency_regularizer * |
|
ComputeGeomConsistencyCost(row, col, currDepth, a, geom_consistency_max_cost, poses_images, src_depth_map_images, ref_inv_K, ref_K) * sampling_weight; |
|
} |
|
} |
|
for (int i=1; i<num_hypotheses; ++i) { |
|
float hyp_cost = 0.0f; |
|
for (int a=0; a<num_src_images; ++a) { |
|
const float sampling_weight = sampling_probs[a]; |
|
hyp_cost += pcc_costs[workspaceWidth*num_src_images*(i-1) + workspaceWidth*a + col] * sampling_weight; |
|
if (kGeomConsistencyTerm) { |
|
float hypDepth = hypotheses_workspace[(i-1)*workspaceWidth + col]; |
|
hyp_cost += geom_consistency_regularizer * |
|
ComputeGeomConsistencyCost(row, col, hypDepth, a, geom_consistency_max_cost, poses_images, src_depth_map_images, ref_inv_K, ref_K) * sampling_weight; |
|
} |
|
} |
|
if (hyp_cost <= best_cost) { |
|
best_cost = hyp_cost; |
|
min_cost_idx = i; |
|
} |
|
} |
|
#ifdef HYPOTHESES_STATISTICS |
|
hypotheses_stats[workspaceWidth*min_cost_idx + col] += 1; |
|
#endif |
|
if (min_cost_idx > 0) { |
|
*((__global float*)((__global char*)depth_map + depth_map_pitch * row) + col) = hypotheses_workspace[(min_cost_idx-1)*workspaceWidth + col]; |
|
for (size_t slice = 0; slice < 3; ++slice) { |
|
*((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col) = |
|
hypotheses_workspace[(slice+1)*workspaceWidth*(num_hypotheses-1) + (min_cost_idx-1)*workspaceWidth + col]; |
|
} |
|
} |
|
for (int image_idx = 0; image_idx < num_src_images; ++image_idx) |
|
{ |
|
float cost; |
|
if (min_cost_idx == 0) { |
|
cost = *((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col); |
|
} |
|
else { |
|
cost = pcc_costs[workspaceWidth*num_src_images*(min_cost_idx-1) + workspaceWidth*image_idx + col]; |
|
*((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col) = cost; |
|
} |
|
const float alpha = likelihood_computer_ComputeForwardMessage(cost, forward_message[image_idx], likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_); |
|
const float beta = *((__global float*)((__global char*)sel_prob_map + sel_prob_map_pitch * (image_idx * sel_prob_map_height + row)) + col); |
|
const float prev_prob = *((__global float*)((__global char*)prev_sel_prob_map + prev_sel_prob_map_pitch * (image_idx * prev_sel_prob_map_height + row)) + col); |
|
const float prob = likelihood_computer_ComputeSelProb(alpha, beta, prev_prob, prev_sel_prob_weight); |
|
forward_message[image_idx] = alpha; |
|
*((__global float*)((__global char*)sel_prob_map + sel_prob_map_pitch * (image_idx * sel_prob_map_height + row)) + col) = prob; |
|
} |
|
} |
|
__kernel void ConsistencyFilterKernel(int width, __global float *depth_map, __global float *normal_map, __global float *sel_prob_map, int map_pitch, int map_height, int num_src_images, int kFilterConsistencyMode, __global uchar *consistency_mask, int consistency_mask_pitch, image2d_array_t src_depth_map_images, image2d_array_t poses_images, __constant float *ref_inv_K, __constant float *ref_K, float ncc_sigma, float filter_min_ncc, float filter_min_triangulation_angle, float geom_consistency_max_cost, float filter_geom_consistency_max_cost, int filter_min_num_consistent) |
|
{ |
|
const int col = get_global_id(0); |
|
const int row = get_global_id(1); |
|
if (col >= width) |
|
return; |
|
int num_consistent = 0; |
|
const float best_depth = *((__global float*)((__global char*)depth_map + map_pitch * row) + col); |
|
float best_normal[3]; |
|
for (int i=0; i<3; ++i) |
|
best_normal[i] = *((__global float*)((__global char*)normal_map + map_pitch * (i * map_height + row)) + col); |
|
float best_point[3]; |
|
ComputePointAtDepth(row, col, best_depth, best_point, ref_inv_K); |
|
const float likelihood_computer_inv_ncc_sigma_square_ = -0.5f / (ncc_sigma * ncc_sigma); |
|
const float likelihood_computer_ncc_norm_factor_ = likelihood_computer_ComputeNCCCostNormFactor(ncc_sigma); |
|
const float min_ncc_prob = |
|
likelihood_computer_ComputeNCCProb(1.0f - filter_min_ncc, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_); |
|
const float cos_min_triangulation_angle = |
|
cos(filter_min_triangulation_angle); |
|
for (int image_idx = 0; image_idx < num_src_images; ++image_idx) { |
|
float cos_triangulation_angle; |
|
float cos_incident_angle; |
|
ComputeViewingAngles(best_point, best_normal, image_idx, cos_triangulation_angle, &cos_incident_angle, poses_images); |
|
if (cos_triangulation_angle > cos_min_triangulation_angle || |
|
cos_incident_angle <= 0.0f) { |
|
continue; |
|
} |
|
if (!(kFilterConsistencyMode & 0x02)) { |
|
if (*((__global float*)((__global char*)sel_prob_map + map_pitch * (image_idx * map_height + row)) + col) >= min_ncc_prob) { |
|
*(consistency_mask + consistency_mask_pitch * (image_idx * map_height + row) + col) = 1; |
|
num_consistent += 1; |
|
} |
|
} else if (!(kFilterConsistencyMode & 0x01)) { |
|
if (ComputeGeomConsistencyCost(row, col, best_depth, image_idx, geom_consistency_max_cost, poses_images, src_depth_map_images, ref_inv_K, ref_K) <= filter_geom_consistency_max_cost) { |
|
*(consistency_mask + consistency_mask_pitch * (image_idx * map_height + row) + col) = 1; |
|
num_consistent += 1; |
|
} |
|
} else { |
|
if (*((__global float*)((__global char*)sel_prob_map + map_pitch * (image_idx * map_height + row)) + col) >= min_ncc_prob && |
|
ComputeGeomConsistencyCost(row, col, best_depth, image_idx, geom_consistency_max_cost, poses_images, src_depth_map_images, ref_inv_K, ref_K) <= filter_geom_consistency_max_cost) { |
|
*(consistency_mask + consistency_mask_pitch * (image_idx * map_height + row) + col) = 1; |
|
num_consistent += 1; |
|
} |
|
} |
|
} |
|
if (num_consistent < filter_min_num_consistent) { |
|
const float kFilterValue = 0.0f; |
|
*((__global float*)((__global char*)depth_map + map_pitch * row) + col) = kFilterValue; |
|
*((__global float*)((__global char*)normal_map + map_pitch * (0 * map_height + row)) + col) = kFilterValue; |
|
*((__global float*)((__global char*)normal_map + map_pitch * (1 * map_height + row)) + col) = kFilterValue; |
|
*((__global float*)((__global char*)normal_map + map_pitch * (2 * map_height + row)) + col) = kFilterValue; |
|
for (int image_idx = 0; image_idx < num_src_images; ++image_idx) { |
|
*(consistency_mask + consistency_mask_pitch * (image_idx * map_height + row) + col) = 0; |
|
} |
|
} |
|
} |
|
#ifdef NO_LOCAL_MEMORY |
|
__kernel void RotateFloatKernel(__global float *input_data, int input_offset, __global float *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch) |
|
{ |
|
int input_x = get_global_id(0); |
|
int input_y = get_global_id(1); |
|
if (input_x >= width || input_y >= height) { |
|
return; |
|
int output_x = input_y; |
|
int output_y = width - 1 - input_x; |
|
*((__global float *)((__global uchar *)output_data + output_offset + output_y * output_pitch) + output_x) = |
|
*((__global float *)((__global uchar *)input_data + input_offset + input_y * input_pitch) + input_x); |
|
} |
|
__kernel void RotateUint8Kernel(__global uchar *input_data, int input_offset, __global uchar *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch) |
|
{ |
|
int input_x = get_global_id(0); |
|
int input_y = get_global_id(1); |
|
if (input_x < width && input_y < height) { |
|
int output_x = input_y; |
|
int output_y = width - 1 - input_x; |
|
*(output_data + output_offset + output_y * output_pitch + output_x) = |
|
*(input_data + input_offset + input_y * input_pitch + input_x); |
|
} |
|
} |
|
#else |
|
#define ROTATE_TILE_DIM 16 |
|
#define ROTATE_WORKGROUP_ROWS 4 |
|
__kernel void RotateFloatKernel(__global float * input_data, int input_offset, __global float * output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch) |
|
{ |
|
__local float tile[ROTATE_TILE_DIM][ROTATE_TILE_DIM+1]; |
|
int input_x = get_global_id(0); |
|
int x = get_local_id(0); |
|
int y = get_local_id(1); |
|
int yp = get_group_id(1) * ROTATE_TILE_DIM + y; |
|
for (int j=0; j < ROTATE_TILE_DIM; j += ROTATE_WORKGROUP_ROWS) |
|
if (input_x < width && yp+j < height) |
|
tile[y+j][x] = *((__global float *)((__global uchar *)input_data + input_offset + (yp + j) * input_pitch) + input_x); |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
int output_y = width - 1 - (input_x/ROTATE_TILE_DIM * ROTATE_TILE_DIM) - y; |
|
for (int j=0; j < ROTATE_TILE_DIM; j += ROTATE_WORKGROUP_ROWS) |
|
int output_x = ((yp+j)/ROTATE_TILE_DIM)*ROTATE_TILE_DIM + x; |
|
if (output_x < height && output_y-j >= 0) |
|
*((__global float *)((__global uchar *)output_data + output_offset + (output_y - j) * output_pitch) + output_x) = tile[x][y+j]; |
|
} |
|
__kernel void RotateUint8Kernel(__global uchar *input_data, int input_offset, __global uchar *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch) |
|
{ |
|
__local uchar tile[ROTATE_TILE_DIM][ROTATE_TILE_DIM+1]; |
|
int input_x = get_global_id(0); |
|
int x = get_local_id(0); |
|
int y = get_local_id(1); |
|
int yp = get_group_id(1) * ROTATE_TILE_DIM + y; |
|
for (int j=0; j < ROTATE_TILE_DIM; j += ROTATE_WORKGROUP_ROWS) |
|
if (input_x < width && yp+j < height) |
|
tile[y+j][x] = *(input_data + input_offset + (yp + j) * input_pitch + input_x); |
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
int output_y = width - 1 - (input_x/ROTATE_TILE_DIM * ROTATE_TILE_DIM) - y; |
|
for (int j=0; j < ROTATE_TILE_DIM; j += ROTATE_WORKGROUP_ROWS) |
|
{ |
|
int output_x = ((yp+j)/ROTATE_TILE_DIM)*ROTATE_TILE_DIM + x; |
|
if (output_x < height && output_y-j >= 0) |
|
*(output_data + output_offset + (output_y - j) * output_pitch + output_x) = tile[x][y+j]; |
|
} |
|
} |
|
#endif |
|
__kernel void FillWithRandomNumbersKernel(__global float* output, __global clrandState * rand_state_map, float min_value, float max_value, int random_state_width, int random_state_height, int random_state_pitch, int output_height, int output_depth, int output_pitch) { |
|
const uint row = get_global_id(1); |
|
const uint col = get_global_id(0); |
|
if (col < random_state_width && row < random_state_height) { |
|
clrandState rand_state = *((__global clrandState*)((__global char*)rand_state_map + random_state_pitch * row) + col); |
|
for (size_t slice = 0; slice < output_depth; ++slice) { |
|
float random_value = clrand_uniform(&rand_state) * (max_value - min_value) + min_value; |
|
*((__global float*)((__global char*)output + output_pitch * (slice * output_height + row)) + col) = random_value; |
|
} |
|
*((__global clrandState*)((__global char*)rand_state_map + random_state_pitch * row) + col) = rand_state; |
|
} |
|
} |
|
__kernel void InitNormalMapKernel(__global float* normal_map, __global clrandState * rand_state_map, int normal_map_width, int normal_map_height, int normal_map_depth, int normal_map_pitch, int rand_state_map_pitch, __constant float * ref_inv_K) { |
|
const uint row = get_global_id(1); |
|
const uint col = get_global_id(0); |
|
if (col < normal_map_width && row < normal_map_height) { |
|
clrandState rand_state = *((__global clrandState*)((__global char*)rand_state_map + rand_state_map_pitch * row) + col); |
|
float normal[3]; |
|
GenerateRandomNormal(row, col, &rand_state, normal, ref_inv_K); |
|
for (size_t slice = 0; slice < normal_map_depth; ++slice) { |
|
*((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col) = normal[slice]; |
|
} |
|
*((__global clrandState*)((__global char*)rand_state_map + rand_state_map_pitch * row) + col) = rand_state; |
|
} |
|
__constant sampler_t FilterSampler = CLK_NORMALIZED_COORDS_FALSE | |
|
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; |
|
__kernel void FilterKernel(image2d_array_t image_array, __global uchar *image, int pitch_image, __global float *sum_image, int pitch_sum_image, __global float *squared_sum_image, int pitch_squared_sum_image, int window_radius, int window_step, float sigma_spatial, float sigma_color) { |
|
const int row = get_global_id(1); |
|
const int col = get_global_id(0); |
|
const float spatial_normalization_ = (1.0f / (2.0f * sigma_spatial * sigma_spatial)); |
|
const float color_normalization_ = (1.0f / (2.0f * sigma_color * sigma_color)); |
|
const float center_color = read_imagef(image_array, FilterSampler, int4)(col, row, 0, 0)).x; |
|
float color_sum = 0.0f; |
|
float color_squared_sum = 0.0f; |
|
float bilateral_weight_sum = 0.0f; |
|
for (int window_row = -window_radius; window_row <= window_radius; |
|
window_row += window_step) { |
|
for (int window_col = -window_radius; window_col <= window_radius; |
|
window_col += window_step) { |
|
const float color = read_imagef(image_array, FilterSampler, int4)(col + window_col, row + window_row, 0, 0)).x; |
|
const float spatial_dist_squared = window_row * window_row + window_col * window_col; |
|
const float color_dist = center_color - color; |
|
const float bilateral_weight = exp(-spatial_dist_squared * spatial_normalization_ - |
|
color_dist * color_dist * color_normalization_); |
|
color_sum += bilateral_weight * color; |
|
color_squared_sum += bilateral_weight * color * color; |
|
bilateral_weight_sum += bilateral_weight; |
|
} |
|
} |
|
color_sum /= bilateral_weight_sum; |
|
color_squared_sum /= bilateral_weight_sum; |
|
*(image + pitch_image * row + col) = (uchar)(255.0f * center_color); |
|
*((__global float*)((__global char*)sum_image + pitch_sum_image * row) + col) = color_sum; |
|
*((__global float*)((__global char*)squared_sum_image + pitch_squared_sum_image * row) + col) = color_squared_sum; |
|
} |
|
__kernel void RotateNormalMapKernel(__global float * normal_map, int normal_map_width, int normal_map_height, int normal_map_depth, int normal_map_pitch) { |
|
const int row = get_global_id(1); |
|
const int col = get_global_id(0); |
|
if (col < normal_map_width && row < normal_map_height) { |
|
float normal[3]; |
|
for (size_t slice = 0; slice < normal_map_depth; ++slice) { |
|
normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col); |
|
} |
|
float rotated_normal[3]; |
|
rotated_normal[0] = normal[1]; |
|
rotated_normal[1] = -normal[0]; |
|
rotated_normal[2] = normal[2]; |
|
for (size_t slice = 0; slice < normal_map_depth; ++slice) { |
|
*((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col) = rotated_normal[slice]; |
|
} |
|
} |
|
} |
|
__kernel void CopyFromGpuMatCLKernel(__global uchar * input_array, __write_only image2d_array_t output_array, int pitch, int width, int height) { |
|
const int row = get_global_id(1); |
|
const int col = get_global_id(0); |
|
int4 pos = (int4)(col, row, 0, 0); |
|
float4 pix = (float4)((*(input_array + pitch * row + col))/255.0f); |
|
if (col < width && row < height) { |
|
write_imagef(output_array, pos, pix); |
|
} |
|
} |