Skip to content

Instantly share code, notes, and snippets.

@ochafik
Last active February 2, 2022 19:04
Show Gist options
  • Select an option

  • Save ochafik/30a0d0599d77d35fe4d8cdc6aa28ec59 to your computer and use it in GitHub Desktop.

Select an option

Save ochafik/30a0d0599d77d35fe4d8cdc6aa28ec59 to your computer and use it in GitHub Desktop.
MeshroomCL OpenCL kernels extracted from its Windows64 binaries for reverse engineering purposes

Context: openphotogrammetry/meshroomcl#33

wget https://github.com/openphotogrammetry/meshroomcl/releases/download/v0.8.4/MeshroomCL-0.8.4-win64.zip
unzip MeshroomCL-0.8.4-win64.zip

for f in $( find MeshroomCL-0.8.4 -name '*.exe' ) ; do strings $f > $( basename $f ).strings.txt ; done
// If you change these two values below, change in SiftMatchCL.cpp also
#define USE_LOCAL_MEMORY 1
#define WORKGROUP_SIZE 32
#define VECTOR_TYPE uchar16
#define VECTOR_LEN 16
#define VECTOR_ITS (128 / VECTOR_LEN)
__kernel void FindBestMatchOneWay_Kernel(__global int *result, __global VECTOR_TYPE *des1, __global VECTOR_TYPE *des2, int num1, int num2, float distmax, float ratiomax)
{
#if USE_LOCAL_MEMORY
__local VECTOR_TYPE desc[WORKGROUP_SIZE][VECTOR_ITS+1];
#endif
const int i = get_global_id(0);
if (i < num1)
{
// If we're using local memory, then copy this work-item's
// feature descriptor into a local memory cache.
#if USE_LOCAL_MEMORY
const int lid = get_local_id(0);
for (int k=0; k < VECTOR_ITS; ++k)
desc[lid][k] = des1[i*VECTOR_ITS + k];
#endif
int best_j = -1;
int best_dist = 0;
int second_best_dist = 0;
// Search the vector des2 for the corresponding descriptor
// that has the highest dot product with descriptor "i" of des1.
for (int j=0; j < num2; ++j)
{
// Compute the dot product for this pair of descriptors
uint dot = 0;
for (int d=0; d<VECTOR_ITS; ++d) {
#if USE_LOCAL_MEMORY
dot += desc[lid][d].s0 * des2[j*VECTOR_ITS + d].s0;
dot += desc[lid][d].s1 * des2[j*VECTOR_ITS + d].s1;
dot += desc[lid][d].s2 * des2[j*VECTOR_ITS + d].s2;
dot += desc[lid][d].s3 * des2[j*VECTOR_ITS + d].s3;
dot += desc[lid][d].s4 * des2[j*VECTOR_ITS + d].s4;
dot += desc[lid][d].s5 * des2[j*VECTOR_ITS + d].s5;
dot += desc[lid][d].s6 * des2[j*VECTOR_ITS + d].s6;
dot += desc[lid][d].s7 * des2[j*VECTOR_ITS + d].s7;
dot += desc[lid][d].s8 * des2[j*VECTOR_ITS + d].s8;
dot += desc[lid][d].s9 * des2[j*VECTOR_ITS + d].s9;
dot += desc[lid][d].sA * des2[j*VECTOR_ITS + d].sA;
dot += desc[lid][d].sB * des2[j*VECTOR_ITS + d].sB;
dot += desc[lid][d].sC * des2[j*VECTOR_ITS + d].sC;
dot += desc[lid][d].sD * des2[j*VECTOR_ITS + d].sD;
dot += desc[lid][d].sE * des2[j*VECTOR_ITS + d].sE;
dot += desc[lid][d].sF * des2[j*VECTOR_ITS + d].sF;
#else
dot += des1[i*VECTOR_ITS + d].s0 * des2[j*VECTOR_ITS + d].s0;
dot += des1[i*VECTOR_ITS + d].s1 * des2[j*VECTOR_ITS + d].s1;
dot += des1[i*VECTOR_ITS + d].s2 * des2[j*VECTOR_ITS + d].s2;
dot += des1[i*VECTOR_ITS + d].s3 * des2[j*VECTOR_ITS + d].s3;
dot += des1[i*VECTOR_ITS + d].s4 * des2[j*VECTOR_ITS + d].s4;
dot += des1[i*VECTOR_ITS + d].s5 * des2[j*VECTOR_ITS + d].s5;
dot += des1[i*VECTOR_ITS + d].s6 * des2[j*VECTOR_ITS + d].s6;
dot += des1[i*VECTOR_ITS + d].s7 * des2[j*VECTOR_ITS + d].s7;
dot += des1[i*VECTOR_ITS + d].s8 * des2[j*VECTOR_ITS + d].s8;
dot += des1[i*VECTOR_ITS + d].s9 * des2[j*VECTOR_ITS + d].s9;
dot += des1[i*VECTOR_ITS + d].sA * des2[j*VECTOR_ITS + d].sA;
dot += des1[i*VECTOR_ITS + d].sB * des2[j*VECTOR_ITS + d].sB;
dot += des1[i*VECTOR_ITS + d].sC * des2[j*VECTOR_ITS + d].sC;
dot += des1[i*VECTOR_ITS + d].sD * des2[j*VECTOR_ITS + d].sD;
dot += des1[i*VECTOR_ITS + d].sE * des2[j*VECTOR_ITS + d].sE;
dot += des1[i*VECTOR_ITS + d].sF * des2[j*VECTOR_ITS + d].sF;
#endif
}
if (dot > best_dist) {
best_j = j;
second_best_dist = best_dist;
best_dist = dot;
}
else if (dot > second_best_dist) {
second_best_dist = dot;
}
}
// The SIFT descriptor vectors are normalized to length 512, so
// we scale distance by 1/512^2.
const float best_dist_normed =
acos(fmin(best_dist * 0.000003814697265625f, 1.0f));
// Check if match distance fails threshold
if (best_dist_normed > distmax) {
result[i] = -1;
return;
}
const float second_best_dist_normed =
acos(fmin(second_best_dist * 0.000003814697265625f, 1.0f));
// Check if match fails ratio test
if (best_dist_normed >= ratiomax * second_best_dist_normed) {
result[i] = -1;
return;
}
// Record the match as valid!
result[i] = best_j;
}
}
// Guided filter as implemented in the COLMAP CPU version. Parameter "m"
// is a 3x3 fundamental matrix or homography matrix, depending on the
// value of "guided". Parameter "errormax" is the squared maximum
// residual threshold.
bool Guided(float x1, float y1, float x2, float y2, int guided, __constant float *m, float errormax)
{
float residual2;
const float a0 = m[0]*x1 + m[1]*y1 + m[2];
const float a1 = m[3]*x1 + m[4]*y1 + m[5];
const float a2 = m[6]*x1 + m[7]*y1 + m[8];
if (!(guided & 1)) // CALIBRATED or UNCALIBRATED two-view geometry
{
const float b0 = m[0]*x2 + m[3]*y2 + m[6];
const float b1 = m[1]*x2 + m[4]*y2 + m[7];
const float c = x2*a0 + y2*a1 + a2;
residual2 = c * c / (a0*a0 + a1*a1 + b0*b0 + b1*b1);
}
else // PLANAR or PANORAMIC two-view geometry
{
const float b0 = a0 / a2 - x2;
const float b1 = a1 / a2 - y2;
residual2 = b0*b0 + b1*b1;
}
return residual2 > errormax;
__kernel void FindGuidedBestMatchOneWay_Kernel(__global int *result, __global VECTOR_TYPE *des1, __global VECTOR_TYPE *des2, __global float *loc1, __global float *loc2, int num1, int num2, int guided, // 0 = Fundamental matrix, first pass
// 1 = Homography matrix, first pass
// 2 = Fundamental matrix, second pass of cross-check
// 3 = Homography matrix, second pass of cross-check
__constant float *gm, // Fundamental or homography matrix for guided filter
float distmax, float ratiomax, float errormax)
#if USE_LOCAL_MEMORY
__local VECTOR_TYPE desc[WORKGROUP_SIZE][VECTOR_ITS+1];
#endif
const int i = get_global_id(0);
if (i < num1)
{
// If we're using local memory, then copy this work-item's
// feature descriptor into a local memory cache.
#if USE_LOCAL_MEMORY
const int lid = get_local_id(0);
for (int k=0; k < VECTOR_ITS; ++k)
desc[lid][k] = des1[i*VECTOR_ITS + k];
#endif
int best_j = -1;
int best_dist = 0;
int second_best_dist = 0;
// Search the vector des2 for the corresponding descriptor
// that has the highest dot product with descriptor "i" of des1.
for (int j=0; j < num2; ++j)
{
float x1, y1, x2, y2;
uint dot = 0;
// If this is the cross-check pass, we swap the order of the
// pixel coordinates, since the gm matrix was computed as the
// transform from the first image to the second.
if (guided & 2)
{
x1 = loc2[2*j];
y1 = loc2[2*j+1];
x2 = loc1[2*i];
y2 = loc1[2*i+1];
}
else
{
x1 = loc1[2*i];
y1 = loc1[2*i+1];
x2 = loc2[2*j];
y2 = loc2[2*j+1];
}
// Now apply the guided filter, and if the feature pair is not
// filtered out, then go ahead and compute the matching score.
if (!Guided(x1, y1, x2, y2, guided, gm, errormax))
{
// Compute the dot product for this pair of descriptors
for (int d=0; d<VECTOR_ITS; ++d) {
#if USE_LOCAL_MEMORY
dot += desc[lid][d].s0 * des2[j*VECTOR_ITS + d].s0;
dot += desc[lid][d].s1 * des2[j*VECTOR_ITS + d].s1;
dot += desc[lid][d].s2 * des2[j*VECTOR_ITS + d].s2;
dot += desc[lid][d].s3 * des2[j*VECTOR_ITS + d].s3;
dot += desc[lid][d].s4 * des2[j*VECTOR_ITS + d].s4;
dot += desc[lid][d].s5 * des2[j*VECTOR_ITS + d].s5;
dot += desc[lid][d].s6 * des2[j*VECTOR_ITS + d].s6;
dot += desc[lid][d].s7 * des2[j*VECTOR_ITS + d].s7;
dot += desc[lid][d].s8 * des2[j*VECTOR_ITS + d].s8;
dot += desc[lid][d].s9 * des2[j*VECTOR_ITS + d].s9;
dot += desc[lid][d].sA * des2[j*VECTOR_ITS + d].sA;
dot += desc[lid][d].sB * des2[j*VECTOR_ITS + d].sB;
dot += desc[lid][d].sC * des2[j*VECTOR_ITS + d].sC;
dot += desc[lid][d].sD * des2[j*VECTOR_ITS + d].sD;
dot += desc[lid][d].sE * des2[j*VECTOR_ITS + d].sE;
dot += desc[lid][d].sF * des2[j*VECTOR_ITS + d].sF;
#else
dot += des1[i*VECTOR_ITS + d].s0 * des2[j*VECTOR_ITS + d].s0;
dot += des1[i*VECTOR_ITS + d].s1 * des2[j*VECTOR_ITS + d].s1;
dot += des1[i*VECTOR_ITS + d].s2 * des2[j*VECTOR_ITS + d].s2;
dot += des1[i*VECTOR_ITS + d].s3 * des2[j*VECTOR_ITS + d].s3;
dot += des1[i*VECTOR_ITS + d].s4 * des2[j*VECTOR_ITS + d].s4;
dot += des1[i*VECTOR_ITS + d].s5 * des2[j*VECTOR_ITS + d].s5;
dot += des1[i*VECTOR_ITS + d].s6 * des2[j*VECTOR_ITS + d].s6;
dot += des1[i*VECTOR_ITS + d].s7 * des2[j*VECTOR_ITS + d].s7;
dot += des1[i*VECTOR_ITS + d].s8 * des2[j*VECTOR_ITS + d].s8;
dot += des1[i*VECTOR_ITS + d].s9 * des2[j*VECTOR_ITS + d].s9;
dot += des1[i*VECTOR_ITS + d].sA * des2[j*VECTOR_ITS + d].sA;
dot += des1[i*VECTOR_ITS + d].sB * des2[j*VECTOR_ITS + d].sB;
dot += des1[i*VECTOR_ITS + d].sC * des2[j*VECTOR_ITS + d].sC;
dot += des1[i*VECTOR_ITS + d].sD * des2[j*VECTOR_ITS + d].sD;
dot += des1[i*VECTOR_ITS + d].sE * des2[j*VECTOR_ITS + d].sE;
dot += des1[i*VECTOR_ITS + d].sF * des2[j*VECTOR_ITS + d].sF;
#endif
}
if (dot > best_dist) {
best_j = j;
second_best_dist = best_dist;
best_dist = dot;
}
else if (dot > second_best_dist) {
second_best_dist = dot;
}
}
// The SIFT descriptor vectors are normalized to length 512, so
// we scale distance by 1/512^2.
const float best_dist_normed =
acos(fmin(best_dist * 0.000003814697265625f, 1.0f));
// Check if match distance fails threshold
if (best_dist_normed > distmax) {
result[i] = -1;
return;
}
const float second_best_dist_normed =
acos(fmin(second_best_dist * 0.000003814697265625f, 1.0f));
// Check if match fails ratio test
if (best_dist_normed >= ratiomax * second_best_dist_normed) {
result[i] = -1;
return;
}
// Record the match as valid!
result[i] = best_j;
}
}
#define KWINDOWSIZE (KWINDOWRADIUS*2 + 1)
static inline void Mat33DotVec3(const float mat[9], const float vec[3], float result[3]) {
result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2];
result[1] = mat[3] * vec[0] + mat[4] * vec[1] + mat[5] * vec[2];
result[2] = mat[6] * vec[0] + mat[7] * vec[1] + mat[8] * vec[2];
}
static inline void Mat33DotVec3Homogeneous(const float mat[9], const float vec[2], float result[2]) {
const float inv_z = 1.0f / (mat[6] * vec[0] + mat[7] * vec[1] + mat[8]);
result[0] = inv_z * (mat[0] * vec[0] + mat[1] * vec[1] + mat[2]);
result[1] = inv_z * (mat[3] * vec[0] + mat[4] * vec[1] + mat[5]);
}
static inline float DotProduct3(const float vec1[3], const float vec2[3]) {
return vec1[0] * vec2[0] + vec1[1] * vec2[1] + vec1[2] * vec2[2];
__constant sampler_t PosesImageSampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
}
static inline void ComputeViewingAngles(const float point[3], const float normal[3], const int image_idx, float* cos_triangulation_angle, float* cos_incident_angle, image2d_array_t poses_images) {
*cos_triangulation_angle = 0.0f;
*cos_incident_angle = 0.0f;
float C[3];
for (int i = 0; i < 3; ++i) {
C[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 16, image_idx, 0, 0)).x;
const float SX[3] = {C[0] - point[0], C[1] - point[1], C[2] - point[2]};
const float RX_inv_norm = rsqrt(DotProduct3(point, point));
const float SX_inv_norm = rsqrt(DotProduct3(SX, SX));
*cos_incident_angle = DotProduct3(SX, normal) * SX_inv_norm;
*cos_triangulation_angle = DotProduct3(SX, point) * RX_inv_norm * SX_inv_norm;
}
static inline void ComposeHomography(image2d_array_t poses_images, const int image_idx, const int row, const int col, const float depth, const float normal[3], float H[9], __constant float * ref_inv_K) {
float K[4];
for (int i = 0; i < 4; ++i) {
K[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i, image_idx, 0, 0)).x;
float R[9];
for (int i = 0; i < 9; ++i) {
R[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 4, image_idx, 0, 0)).x;
float T[3];
for (int i = 0; i < 3; ++i) {
T[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 13, image_idx, 0, 0)).x;
const float dist =
depth * (normal[0] * (ref_inv_K[0] * col + ref_inv_K[1]) +
normal[1] * (ref_inv_K[2] * row + ref_inv_K[3]) + normal[2]);
const float inv_dist = 1.0f / dist;
const float inv_dist_N0 = inv_dist * normal[0];
const float inv_dist_N1 = inv_dist * normal[1];
const float inv_dist_N2 = inv_dist * normal[2];
H[0] = ref_inv_K[0] * (K[0] * (R[0] + inv_dist_N0 * T[0]) +
K[1] * (R[6] + inv_dist_N0 * T[2]));
H[1] = ref_inv_K[2] * (K[0] * (R[1] + inv_dist_N1 * T[0]) +
K[1] * (R[7] + inv_dist_N1 * T[2]));
H[2] = K[0] * (R[2] + inv_dist_N2 * T[0]) +
K[1] * (R[8] + inv_dist_N2 * T[2]) +
ref_inv_K[1] * (K[0] * (R[0] + inv_dist_N0 * T[0]) +
K[1] * (R[6] + inv_dist_N0 * T[2])) +
ref_inv_K[3] * (K[0] * (R[1] + inv_dist_N1 * T[0]) +
K[1] * (R[7] + inv_dist_N1 * T[2]));
H[3] = ref_inv_K[0] * (K[2] * (R[3] + inv_dist_N0 * T[1]) +
K[3] * (R[6] + inv_dist_N0 * T[2]));
H[4] = ref_inv_K[2] * (K[2] * (R[4] + inv_dist_N1 * T[1]) +
K[3] * (R[7] + inv_dist_N1 * T[2]));
H[5] = K[2] * (R[5] + inv_dist_N2 * T[1]) +
K[3] * (R[8] + inv_dist_N2 * T[2]) +
ref_inv_K[1] * (K[2] * (R[3] + inv_dist_N0 * T[1]) +
K[3] * (R[6] + inv_dist_N0 * T[2])) +
ref_inv_K[3] * (K[2] * (R[4] + inv_dist_N1 * T[1]) +
K[3] * (R[7] + inv_dist_N1 * T[2]));
H[6] = ref_inv_K[0] * (R[6] + inv_dist_N0 * T[2]);
H[7] = ref_inv_K[2] * (R[7] + inv_dist_N1 * T[2]);
H[8] = R[8] + ref_inv_K[1] * (R[6] + inv_dist_N0 * T[2]) +
ref_inv_K[3] * (R[7] + inv_dist_N1 * T[2]) + inv_dist_N2 * T[2];
__constant sampler_t ReadRefImageSampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
}
static inline void ReadRefImageIntoSharedMemory(image2d_array_t ref_image, __local float* local_image, const int row, const int col, const int thread_id) {
barrier(CLK_LOCAL_MEM_FENCE);
if (row == 0) {
int r = row - KWINDOWSIZE / 2;
for (int i = 0; i < KWINDOWSIZE; ++i) {
int c = col - LOCAL_MEM_WORK_GROUP_SIZE;
for (int j = 0; j < 3; ++j) {
local_image[thread_id + i * 3 * LOCAL_MEM_WORK_GROUP_SIZE +
j * LOCAL_MEM_WORK_GROUP_SIZE] = read_imagef(ref_image, ReadRefImageSampler, (int4)(c, r, 0, 0)).x;
c += LOCAL_MEM_WORK_GROUP_SIZE;
}
r += 1;
}
} else {
for (int i = 1; i < KWINDOWSIZE; ++i) {
for (int j = 0; j < 3; ++j) {
local_image[thread_id + (i - 1) * 3 * LOCAL_MEM_WORK_GROUP_SIZE +
j * LOCAL_MEM_WORK_GROUP_SIZE] =
local_image[thread_id + i * 3 * LOCAL_MEM_WORK_GROUP_SIZE +
j * LOCAL_MEM_WORK_GROUP_SIZE];
}
}
const int r = row + KWINDOWSIZE / 2;
int c = col - LOCAL_MEM_WORK_GROUP_SIZE;
const int i = KWINDOWSIZE - 1;
for (int j = 0; j < 3; ++j) {
local_image[thread_id + i * 3 * LOCAL_MEM_WORK_GROUP_SIZE +
j * LOCAL_MEM_WORK_GROUP_SIZE] = read_imagef(ref_image, ReadRefImageSampler, (int4)(c, r, 0, 0)).x;
c += LOCAL_MEM_WORK_GROUP_SIZE;
}
barrier(CLK_LOCAL_MEM_FENCE);
}
__constant sampler_t PccSrcImageSampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;
static inline float pcc_compute(const float spatial_normalization_, const float color_normalization_, const float kMaxCost, int row, int col, float depth, const float* normal, const __local float* local_ref_image, float local_ref_sum, float local_ref_squared_sum, int src_image_idx, image2d_array_t src_images, image2d_array_t poses_images, __constant float * ref_inv_K) {
float tform[9];
ComposeHomography(poses_images, src_image_idx, row, col, depth, normal, tform, ref_inv_K);
float x_step[3], y_step[3];
for (int i=0; i<3; ++i) {
x_step[i] = KWINDOWSTEP * tform[i*3];
y_step[i] = KWINDOWSTEP * tform[i*3+1];
}
const int thread_id = get_local_id(0);
const int row_start = row - KWINDOWRADIUS;
const int col_start = col - KWINDOWRADIUS;
float col_src = tform[0] * col_start + tform[1] * row_start + tform[2];
float row_src = tform[3] * col_start + tform[4] * row_start + tform[5];
float z = tform[6] * col_start + tform[7] * row_start + tform[8];
float base_col_src = col_src;
float base_row_src = row_src;
float base_z = z;
int ref_image_idx = LOCAL_MEM_WORK_GROUP_SIZE - KWINDOWRADIUS + thread_id;
int ref_image_base_idx = ref_image_idx;
const float ref_center_color =
local_ref_image[ref_image_idx + KWINDOWRADIUS * 3 * LOCAL_MEM_WORK_GROUP_SIZE +
KWINDOWRADIUS];
const float ref_color_sum = local_ref_sum;
const float ref_color_squared_sum = local_ref_squared_sum;
float src_color_sum = 0.0f;
float src_color_squared_sum = 0.0f;
float src_ref_color_sum = 0.0f;
float bilateral_weight_sum = 0.0f;
for (int row = -KWINDOWRADIUS; row <= KWINDOWRADIUS; row += KWINDOWSTEP) {
for (int col = -KWINDOWRADIUS; col <= KWINDOWRADIUS; col += KWINDOWSTEP) {
const float inv_z = 1.0f / z;
const float norm_col_src = inv_z * col_src + 0.5f;
const float norm_row_src = inv_z * row_src + 0.5f;
const float ref_color = local_ref_image[ref_image_idx];
const float src_color = read_imagef(src_images, PccSrcImageSampler, (float4)(norm_col_src, norm_row_src, src_image_idx, 0)).x;
const float spatial_dist_squared = row * row + col * col;
const float color_dist = ref_center_color - ref_color;
const float bilateral_weight = exp(-spatial_dist_squared * spatial_normalization_ -
color_dist * color_dist * color_normalization_);
const float bilateral_weight_src = bilateral_weight * src_color;
src_color_sum += bilateral_weight_src;
src_color_squared_sum += bilateral_weight_src * src_color;
src_ref_color_sum += bilateral_weight_src * ref_color;
bilateral_weight_sum += bilateral_weight;
ref_image_idx += KWINDOWSTEP;
col_src += x_step[0];
row_src += x_step[1];
z += x_step[2];
}
ref_image_base_idx += KWINDOWSTEP * 3 * LOCAL_MEM_WORK_GROUP_SIZE;
ref_image_idx = ref_image_base_idx;
base_col_src += y_step[0];
base_row_src += y_step[1];
base_z += y_step[2];
col_src = base_col_src;
row_src = base_row_src;
z = base_z;
}
const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum;
src_color_sum *= inv_bilateral_weight_sum;
src_color_squared_sum *= inv_bilateral_weight_sum;
src_ref_color_sum *= inv_bilateral_weight_sum;
const float ref_color_var =
ref_color_squared_sum - ref_color_sum * ref_color_sum;
const float src_color_var =
src_color_squared_sum - src_color_sum * src_color_sum;
const float kMinVar = 1e-5f;
float firstCost = 0.0f;
if (ref_color_var < kMinVar || src_color_var < kMinVar) {
return kMaxCost;
} else {
const float src_ref_color_covar =
src_ref_color_sum - ref_color_sum * src_color_sum;
const float src_ref_color_var = sqrt(ref_color_var * src_color_var);
return max(0.0f, min(kMaxCost, 1.0f - src_ref_color_covar / src_ref_color_var));
}
}
static inline void ComputePointAtDepth(const float row, const float col, const float depth, float point[3], __constant float * ref_inv_K) {
point[0] = depth * (ref_inv_K[0] * col + ref_inv_K[1]);
point[1] = depth * (ref_inv_K[2] * row + ref_inv_K[3]);
point[2] = depth;
__constant sampler_t SrcDepthMapImageSampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
}
static inline float ComputeGeomConsistencyCost(const float row, const float col, const float depth, const int image_idx, const float max_cost, image2d_array_t poses_images, image2d_array_t src_depth_map_images, __constant float * ref_inv_K, __constant float * ref_K)
{
float P[12];
for (int i = 0; i < 12; ++i) {
P[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 19, image_idx, 0, 0)).x;
float inv_P[12];
for (int i = 0; i < 12; ++i) {
inv_P[i] = read_imagef(poses_images, PosesImageSampler, (int4)(i + 31, image_idx, 0, 0)).x;
float forward_point[3];
ComputePointAtDepth(row, col, depth, forward_point, ref_inv_K);
const float inv_forward_z =
1.0f / (P[8] * forward_point[0] + P[9] * forward_point[1] +
P[10] * forward_point[2] + P[11]);
float src_col =
inv_forward_z * (P[0] * forward_point[0] + P[1] * forward_point[1] +
P[2] * forward_point[2] + P[3]);
float src_row =
inv_forward_z * (P[4] * forward_point[0] + P[5] * forward_point[1] +
P[6] * forward_point[2] + P[7]);
const float src_depth = read_imagef(src_depth_map_images, SrcDepthMapImageSampler, (float4)(src_col + 0.5f, src_row + 0.5f, image_idx, 0)).x;
if (src_depth == 0.0f) {
return max_cost;
}
src_col *= src_depth;
src_row *= src_depth;
const float backward_point_x =
inv_P[0] * src_col + inv_P[1] * src_row + inv_P[2] * src_depth + inv_P[3];
const float backward_point_y =
inv_P[4] * src_col + inv_P[5] * src_row + inv_P[6] * src_depth + inv_P[7];
const float backward_point_z = inv_P[8] * src_col + inv_P[9] * src_row +
inv_P[10] * src_depth + inv_P[11];
const float inv_backward_point_z = 1.0f / backward_point_z;
const float backward_col =
inv_backward_point_z *
(ref_K[0] * backward_point_x + ref_K[1] * backward_point_z);
const float backward_row =
inv_backward_point_z *
(ref_K[2] * backward_point_y + ref_K[3] * backward_point_z);
const float diff_col = col - backward_col;
const float diff_row = row - backward_row;
return min(max_cost, sqrt(diff_col * diff_col + diff_row * diff_row));
}
__attribute__((reqd_work_group_size(LOCAL_MEM_WORK_GROUP_SIZE, 1, 1)))
__kernel void ComputeInitialCostKernel(__global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_depth, int normal_map_pitch, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, float sigma_spatial, float sigma_color, image2d_array_t ref_image, image2d_array_t src_images, image2d_array_t poses_images, __constant float *ref_inv_K, __local float *local_ref_image) {
const int thread_id = get_local_id(0);
const int col = get_global_id(0);
const float spatial_normalization_ = (1.0f / (2.0f * sigma_spatial * sigma_spatial));
const float color_normalization_ = (1.0f / (2.0f * sigma_color * sigma_color));
const float pcc_computer_kMaxCost = 2.0f;
const __local float * pcc_computer_local_ref_image = local_ref_image;
int pcc_computer_row = 0;
int pcc_computer_col = col;
float pcc_computer_depth = 0.0f;
float normal[3];
const float * pcc_computer_normal = normal;
float pcc_computer_local_ref_sum = 0.0f;
float pcc_computer_local_ref_squared_sum = 0.0f;
int pcc_computer_src_image_idx = -1;
for (int row = 0; row < cost_map_height; ++row) {
ReadRefImageIntoSharedMemory(ref_image, local_ref_image, row, col, thread_id);
if (col < cost_map_width) {
pcc_computer_depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * row) + col);
for (int slice = 0; slice < normal_map_depth; ++slice) {
normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col);
}
pcc_computer_local_ref_sum = *((__global float*)((__global char*)ref_sum_image + ref_sum_pitch * row) + col);
pcc_computer_local_ref_squared_sum = *((__global float*)((__global char*)ref_squared_sum_image + ref_squared_sum_pitch * row) + col);
for (int image_idx = 0; image_idx < cost_map_depth; ++image_idx) {
pcc_computer_src_image_idx = image_idx;
float pcc_computer_result = pcc_compute(spatial_normalization_, color_normalization_, pcc_computer_kMaxCost, pcc_computer_row, pcc_computer_col, pcc_computer_depth, pcc_computer_normal, pcc_computer_local_ref_image, pcc_computer_local_ref_sum, pcc_computer_local_ref_squared_sum, pcc_computer_src_image_idx, src_images, poses_images, ref_inv_K);
*((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col) = pcc_computer_result;
}
pcc_computer_row += 1;
}
}
}
typedef struct {
unsigned int d, v[5];
} clrandState;
__kernel void InitRandomStateKernel(__global clrandState *output, int width, int height, uint pitch)
{
const uint row = get_global_id(1);
const uint col = get_global_id(0);
if (col < width && row < height) {
const uint id = row*width + col;
__global clrandState *state =
(__global clrandState*)((__global char*)output + pitch*row) + col;
state->v[0] = 123456789U;
state->v[1] = 362436069U;
state->v[2] = 521288629U;
state->v[3] = 88675123U;
state->v[4] = 5783321U;
state->d = 6615241U;
const uint s0 = id ^ 0x2c7f967fU;
const uint s1 = 0 ^ 0xa03697cbU;
const uint t0 = 1228688033U * s0;
const uint t1 = 2073658381U * s1;
state->v[0] += t0;
state->v[1] ^= t0;
state->v[2] += t1;
state->v[3] ^= t1;
state->v[4] += t0;
state->d += t1 + t0;
}
}
static inline float clrand_uniform(clrandState* state)
{
const float uint_inc = 2.3283064e-10f;
unsigned int t = state->v[0] ^ (state->v[0] >> 2);
state->v[0] = state->v[1];
state->v[1] = state->v[2];
state->v[2] = state->v[3];
state->v[3] = state->v[4];
state->v[4] = (state->v[4] ^ (state->v[4] << 4)) ^ (t ^ (t << 1));
state->d += 362437;
t = state->d + state->v[4];
return uint_inc + (t * uint_inc);
}
static inline void GenerateRandomNormal(const int row, const int col, clrandState* rand_state, float normal[3], __constant float * ref_inv_K) {
float v1 = 0.0f;
float v2 = 0.0f;
float s = 2.0f;
while (s >= 1.0f) {
v1 = 2.0f * clrand_uniform(rand_state) - 1.0f;
v2 = 2.0f * clrand_uniform(rand_state) - 1.0f;
s = v1 * v1 + v2 * v2;
}
const float s_norm = sqrt(1.0f - s);
normal[0] = 2.0f * v1 * s_norm;
normal[1] = 2.0f * v2 * s_norm;
normal[2] = 1.0f - 2.0f * s;
const float view_ray[3] = {ref_inv_K[0] * col + ref_inv_K[1], ref_inv_K[2] * row + ref_inv_K[3], 1.0f};
if (DotProduct3(normal, view_ray) > 0) {
normal[0] = -normal[0];
normal[1] = -normal[1];
normal[2] = -normal[2];
}
}
static inline float GenerateRandomDepth(const float depth_min, const float depth_max, clrandState* rand_state) {
return clrand_uniform(rand_state) * (depth_max - depth_min) + depth_min;
}
static inline float PerturbDepth(const float perturbation, const float depth, clrandState* rand_state) {
const float depth_min = (1.0f - perturbation) * depth;
const float depth_max = (1.0f + perturbation) * depth;
return GenerateRandomDepth(depth_min, depth_max, rand_state);
}
static inline void PerturbNormal(const int row, const int col, float perturbation, const float normal[3], clrandState* rand_state, float perturbed_normal[3], __constant float * ref_inv_K) {
int num_trials = 0;
do {
const float a1 = (clrand_uniform(rand_state) - 0.5f) * perturbation;
const float a2 = (clrand_uniform(rand_state) - 0.5f) * perturbation;
const float a3 = (clrand_uniform(rand_state) - 0.5f) * perturbation;
const float sin_a1 = sin(a1);
const float sin_a2 = sin(a2);
const float sin_a3 = sin(a3);
const float cos_a1 = cos(a1);
const float cos_a2 = cos(a2);
const float cos_a3 = cos(a3);
float R[9];
R[0] = cos_a2 * cos_a3;
R[1] = -cos_a2 * sin_a3;
R[2] = sin_a2;
R[3] = cos_a1 * sin_a3 + cos_a3 * sin_a1 * sin_a2;
R[4] = cos_a1 * cos_a3 - sin_a1 * sin_a2 * sin_a3;
R[5] = -cos_a2 * sin_a1;
R[6] = sin_a1 * sin_a3 - cos_a1 * cos_a3 * sin_a2;
R[7] = cos_a3 * sin_a1 + cos_a1 * sin_a2 * sin_a3;
R[8] = cos_a1 * cos_a2;
Mat33DotVec3(R, normal, perturbed_normal);
const float view_ray[3] = {ref_inv_K[0] * col + ref_inv_K[1], ref_inv_K[2] * row + ref_inv_K[3], 1.0f};
if (DotProduct3(perturbed_normal, view_ray) < 0.0f) {
const float inv_norm = rsqrt(DotProduct3(perturbed_normal, perturbed_normal));
perturbed_normal[0] *= inv_norm;
perturbed_normal[1] *= inv_norm;
perturbed_normal[2] *= inv_norm;
return;
}
perturbation *= 0.5f;
++num_trials;
}
while (num_trials < 4);
perturbed_normal[0] = normal[0];
perturbed_normal[1] = normal[1];
perturbed_normal[2] = normal[2];
}
static inline float PropagateDepth(__constant float * ref_inv_K, const float depth1, const float normal1[3], const float row1, const float row2) {
const float x1 = depth1 * (ref_inv_K[2] * row1 + ref_inv_K[3]);
const float y1 = depth1;
const float x2 = x1 + normal1[2];
const float y2 = y1 - normal1[1];
const float x4 = ref_inv_K[2] * row2 + ref_inv_K[3];
const float denom = x2 - x1 + x4 * (y1 - y2);
const float kEps = 1e-5f;
if (fabs(denom) < kEps) {
return depth1;
}
const float nom = y1 * x2 - x1 * y2;
return nom / denom;
}
static inline float likelihood_computer_ComputeNCCProb(const float cost, float likelihood_computer_inv_ncc_sigma_square_, float likelihood_computer_ncc_norm_factor_) {
return exp(cost * cost * likelihood_computer_inv_ncc_sigma_square_) * likelihood_computer_ncc_norm_factor_;
}
static inline float likelihood_computer_ComputeTriProb(const float cos_triangulation_angle, float likelihood_computer_cos_min_triangulation_angle_) {
const float abs_cos_triangulation_angle = fabs(cos_triangulation_angle);
if (abs_cos_triangulation_angle > likelihood_computer_cos_min_triangulation_angle_) {
const float scaled = 1.0f - (1.0f - abs_cos_triangulation_angle) /
(1.0f - likelihood_computer_cos_min_triangulation_angle_);
const float likelihood = 1.0f - scaled * scaled;
return min(1.0f, max(0.0f, likelihood));
} else {
return 1.0f;
}
}
static inline float likelihood_computer_ComputeIncProb(const float cos_incident_angle, float likelihood_computer_inv_incident_angle_sigma_square_) {
const float x = 1.0f - max(0.0f, cos_incident_angle);
return exp(x * x * likelihood_computer_inv_incident_angle_sigma_square_);
}
static inline float likelihood_computer_ComputeSelProb(const float alpha, const float beta, const float prev, const float prev_weight) {
const float zn0 = (1.0f - alpha) * (1.0f - beta);
const float zn1 = alpha * beta;
const float curr = zn1 / (zn0 + zn1);
return prev_weight * prev + (1.0f - prev_weight) * curr;
}
static inline float likelihood_computer_ComputeResolutionProb(const float H[9], const float row, const float col) {
float src1[2];
const float ref1[2] = {col - KWINDOWRADIUS, row - KWINDOWRADIUS};
Mat33DotVec3Homogeneous(H, ref1, src1);
float src2[2];
const float ref2[2] = {col - KWINDOWRADIUS, row + KWINDOWRADIUS};
Mat33DotVec3Homogeneous(H, ref2, src2);
float src3[2];
const float ref3[2] = {col + KWINDOWRADIUS, row + KWINDOWRADIUS};
Mat33DotVec3Homogeneous(H, ref3, src3);
float src4[2];
const float ref4[2] = {col + KWINDOWRADIUS, row - KWINDOWRADIUS};
Mat33DotVec3Homogeneous(H, ref4, src4);
const float ref_area = KWINDOWSIZE * KWINDOWSIZE;
const float src_area =
fabs(0.5f * (src1[0] * src2[1] - src2[0] * src1[1] - src1[0] * src4[1] +
src2[0] * src3[1] - src3[0] * src2[1] + src4[0] * src1[1] +
src3[0] * src4[1] - src4[0] * src3[1]));
if (ref_area > src_area) {
return src_area / ref_area;
} else {
return ref_area / src_area;
}
}
static inline float likelihood_computer_ComputeMessage(bool kForward, const float cost, const float prev, float likelihood_computer_inv_ncc_sigma_square_, float likelihood_computer_ncc_norm_factor_) {
const float kUniformProb = 0.5f;
const float kNoChangeProb = 0.99999f;
const float kChangeProb = 1.0f - kNoChangeProb;
const float emission = likelihood_computer_ComputeNCCProb(cost, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_);
float zn0;
float zn1;
if (kForward) {
zn0 = (prev * kChangeProb + (1.0f - prev) * kNoChangeProb) * kUniformProb;
zn1 = (prev * kNoChangeProb + (1.0f - prev) * kChangeProb) * emission;
} else {
zn0 = prev * emission * kChangeProb +
(1.0f - prev) * kUniformProb * kNoChangeProb;
zn1 = prev * emission * kNoChangeProb +
(1.0f - prev) * kUniformProb * kChangeProb;
}
return zn1 / (zn0 + zn1);
}
static inline float likelihood_computer_ComputeForwardMessage(const float cost, const float prev, float likelihood_computer_inv_ncc_sigma_square_, float likelihood_computer_ncc_norm_factor_) {
return likelihood_computer_ComputeMessage(true, cost, prev, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_);
}
static inline float likelihood_computer_ComputeBackwardMessage(const float cost, const float prev, float likelihood_computer_inv_ncc_sigma_square_, float likelihood_computer_ncc_norm_factor_) {
return likelihood_computer_ComputeMessage(false, cost, prev, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_);
}
static inline float likelihood_computer_ComputeNCCCostNormFactor(const float ncc_sigma) {
return 2.0f / (sqrt(2.0f * M_PI_F) * ncc_sigma *
erf(2.0f / (ncc_sigma * 1.414213562f)));
}
typedef struct {
float depth;
float normal[3];
} ParamState;
__kernel void ComputeBackwardMessagesKernel(__global float *global_workspace, int global_workspace_height, __global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, float ncc_sigma)
{
const int col = get_global_id(0);
const float kUniformProb = 0.5f;
float likelihood_computer_inv_ncc_sigma_square_ = -0.5f / (ncc_sigma * ncc_sigma);
float likelihood_computer_ncc_norm_factor_ = likelihood_computer_ComputeNCCCostNormFactor(ncc_sigma);
__global float* forward_message = &global_workspace[col * global_workspace_height];
if (col < cost_map_width) {
for (int image_idx = 0; image_idx < cost_map_depth; ++image_idx) {
float beta = kUniformProb;
for (int row = cost_map_height - 1; row >= 0; --row) {
const float cost = *((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col);
beta = likelihood_computer_ComputeBackwardMessage(cost, beta, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_);
*((__global float*)((__global char*)sel_prob_map + sel_prob_map_pitch * (image_idx * sel_prob_map_height + row)) + col) = beta;
}
forward_message[image_idx] = kUniformProb;
}
}
}
__kernel void HypothesesAndProbsKernel(int row, __global float *global_workspace, int global_workspace_width, int global_workspace_height, __global float *hypotheses_workspace, int hypotheses_workspace_height, __global clrandState *rand_state_map, __global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, __global float *prev_sel_prob_map, int prev_sel_prob_map_height, int prev_sel_prob_map_pitch, float min_triangulation_angle, float incident_angle_sigma, float ncc_sigma, float perturbation, float prev_sel_prob_weight, image2d_array_t poses_images,
#if HYPOTHESIS_NUMBER > 0
__constant float *ref_inv_K, __global float *scaledfilt1_depth_map, __global float *scaledfilt1_normal_map, __global float *scaledfilt2_depth_map, __global float *scaledfilt2_normal_map)
#else
__constant float *ref_inv_K)
#endif
{
const int col = get_global_id(0);
if (col >= cost_map_width)
return;
ParamState prev_param_state;
ParamState curr_param_state;
ParamState rand_param_state;
curr_param_state.depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * row) + col);
for (int slice = 0; slice < 3; ++slice)
curr_param_state.normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col);
if (row == 0) {
prev_param_state.depth = *(depth_map + col);
for (int slice = 0; slice < 3; ++slice)
prev_param_state.normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + 0)) + col);
else {
prev_param_state.depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * (row-1)) + col);
for (int slice = 0; slice < 3; ++slice)
prev_param_state.normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + (row-1))) + col);
prev_param_state.depth = PropagateDepth(ref_inv_K, prev_param_state.depth, prev_param_state.normal, row - 1, row);
clrandState rand_state = *(rand_state_map + col);
rand_param_state.depth = PerturbDepth(perturbation, curr_param_state.depth, &rand_state);
PerturbNormal(row, col, perturbation * M_PI_F, curr_param_state.normal, &rand_state, rand_param_state.normal, ref_inv_K);
*(rand_state_map + col) = rand_state;
#define SET_HYPOTHESIS(h, slice) (hypotheses_workspace[(slice)*global_workspace_width*hypotheses_workspace_height + (h)*global_workspace_width + col])
SET_HYPOTHESIS(0, 0) = prev_param_state.depth;
SET_HYPOTHESIS(0, 1) = prev_param_state.normal[0];
SET_HYPOTHESIS(0, 2) = prev_param_state.normal[1];
SET_HYPOTHESIS(0, 3) = prev_param_state.normal[2];
SET_HYPOTHESIS(1, 0) = rand_param_state.depth;
SET_HYPOTHESIS(1, 1) = curr_param_state.normal[0];
SET_HYPOTHESIS(1, 2) = curr_param_state.normal[1];
SET_HYPOTHESIS(1, 3) = curr_param_state.normal[2];
SET_HYPOTHESIS(2, 0) = curr_param_state.depth;
SET_HYPOTHESIS(2, 1) = rand_param_state.normal[0];
SET_HYPOTHESIS(2, 2) = rand_param_state.normal[1];
SET_HYPOTHESIS(2, 3) = rand_param_state.normal[2];
SET_HYPOTHESIS(3, 0) = rand_param_state.depth;
SET_HYPOTHESIS(3, 1) = rand_param_state.normal[0];
SET_HYPOTHESIS(3, 2) = rand_param_state.normal[1];
SET_HYPOTHESIS(3, 3) = rand_param_state.normal[2];
#if HYPOTHESIS_NUMBER > 0
ParamState scaledfilt1_param_state;
ParamState scaledfilt2_param_state;
scaledfilt1_param_state.depth = *((__global float*)((__global char*)scaledfilt1_depth_map + depth_map_pitch * row) + col);
for (int slice = 0; slice < 3; ++slice)
scaledfilt1_param_state.normal[slice] = *((__global float*)((__global char*)scaledfilt1_normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col);
scaledfilt2_param_state.depth = *((__global float*)((__global char*)scaledfilt2_depth_map + depth_map_pitch * row) + col);
for (int slice = 0; slice < 3; ++slice)
scaledfilt2_param_state.normal[slice] = *((__global float*)((__global char*)scaledfilt2_normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col);
int hyp = 4;
SET_HYPOTHESIS(hyp, 0) = (curr_param_state.depth + prev_param_state.depth)/2.0f;
SET_HYPOTHESIS(hyp, 1) = prev_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = prev_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = prev_param_state.normal[2];
++hyp;
float prev1_depth, prev2_depth;
if (row < 2) {
prev1_depth = *(depth_map + col);
prev2_depth = prev1_depth;
else {
prev1_depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * (row-1)) + col);
prev2_depth = *((__global float*)((__global char*)depth_map + depth_map_pitch * (row-2)) + col);
SET_HYPOTHESIS(hyp, 0) = 2*prev1_depth - prev2_depth;
SET_HYPOTHESIS(hyp, 1) = prev_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = prev_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = prev_param_state.normal[2];
++hyp;
#if HYPOTHESIS_NUMBER > 1
const float fuzzy5 = GenerateRandomDepth(curr_param_state.depth - 0.005f, curr_param_state.depth + 0.005f, rand_state);
SET_HYPOTHESIS(hyp, 0) = fuzzy5;
SET_HYPOTHESIS(hyp, 1) = curr_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = curr_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = curr_param_state.normal[2];
++hyp;
SET_HYPOTHESIS(hyp, 0) = fuzzy5;
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2];
++hyp;
const float fuzzy10 = GenerateRandomDepth(curr_param_state.depth - 0.01f, curr_param_state.depth + 0.01f, rand_state);
SET_HYPOTHESIS(hyp, 0) = fuzzy10;
SET_HYPOTHESIS(hyp, 1) = curr_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = curr_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = curr_param_state.normal[2];
++hyp;
SET_HYPOTHESIS(hyp, 0) = fuzzy10;
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2];
++hyp;
const float fuzzy20 = GenerateRandomDepth(curr_param_state.depth - 0.02f, curr_param_state.depth + 0.02f, rand_state);
SET_HYPOTHESIS(hyp, 0) = fuzzy20;
SET_HYPOTHESIS(hyp, 1) = curr_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = curr_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = curr_param_state.normal[2];
++hyp;
SET_HYPOTHESIS(hyp, 0) = fuzzy20;
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2];
++hyp;
*(rand_state_map + col) = rand_state;
#endif
SET_HYPOTHESIS(hyp, 0) = scaledfilt1_param_state.depth;
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2];
++hyp;
SET_HYPOTHESIS(hyp, 0) = curr_param_state.depth;
SET_HYPOTHESIS(hyp, 1) = scaledfilt1_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = scaledfilt1_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = scaledfilt1_param_state.normal[2];
++hyp;
SET_HYPOTHESIS(hyp, 0) = scaledfilt2_param_state.depth;
SET_HYPOTHESIS(hyp, 1) = scaledfilt2_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = scaledfilt2_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = scaledfilt2_param_state.normal[2];
++hyp;
SET_HYPOTHESIS(hyp, 0) = curr_param_state.depth;
SET_HYPOTHESIS(hyp, 1) = scaledfilt2_param_state.normal[0];
SET_HYPOTHESIS(hyp, 2) = scaledfilt2_param_state.normal[1];
SET_HYPOTHESIS(hyp, 3) = scaledfilt2_param_state.normal[2];
++hyp;
SET_HYPOTHESIS(hyp, 0) = 0.0f;
SET_HYPOTHESIS(hyp, 1) = 0.0f;
SET_HYPOTHESIS(hyp, 2) = 0.0f;
SET_HYPOTHESIS(hyp, 3) = 0.0f;
++hyp;
#endif
#undef SET_HYPOTHESIS
float likelihood_computer_cos_min_triangulation_angle_ = cos(min_triangulation_angle);
float likelihood_computer_inv_incident_angle_sigma_square_ = -0.5f / (incident_angle_sigma * incident_angle_sigma) ;
float likelihood_computer_inv_ncc_sigma_square_ = -0.5f / (ncc_sigma * ncc_sigma);
float likelihood_computer_ncc_norm_factor_ = likelihood_computer_ComputeNCCCostNormFactor(ncc_sigma);
__global float* forward_message = &global_workspace[col * global_workspace_height];
__global float* sampling_probs = &global_workspace[global_workspace_width *
global_workspace_height +
col * global_workspace_height];
float point[3];
ComputePointAtDepth(row, col, curr_param_state.depth, point, ref_inv_K);
for (int image_idx = 0; image_idx < cost_map_depth; ++image_idx) {
const float cost = *((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col);
const float alpha = likelihood_computer_ComputeForwardMessage(cost, forward_message[image_idx], likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_);
const float beta = *((__global float*)((__global char*)sel_prob_map + sel_prob_map_pitch * (image_idx * sel_prob_map_height + row)) + col);
const float prev_prob = *((__global float*)((__global char*)prev_sel_prob_map + prev_sel_prob_map_pitch * (image_idx * prev_sel_prob_map_height + row)) + col);
const float sel_prob = likelihood_computer_ComputeSelProb(alpha, beta, prev_prob, prev_sel_prob_weight);
float cos_triangulation_angle;
float cos_incident_angle;
ComputeViewingAngles(point, curr_param_state.normal, image_idx, cos_triangulation_angle, &cos_incident_angle, poses_images);
const float tri_prob =
likelihood_computer_ComputeTriProb(cos_triangulation_angle, likelihood_computer_cos_min_triangulation_angle_);
const float inc_prob =
likelihood_computer_ComputeIncProb(cos_incident_angle, likelihood_computer_inv_incident_angle_sigma_square_);
float H[9];
ComposeHomography(poses_images, image_idx, row, col, curr_param_state.depth, curr_param_state.normal, H, ref_inv_K);
const float res_prob =
likelihood_computer_ComputeResolutionProb(H, row, col);
sampling_probs[image_idx] = sel_prob * tri_prob * inc_prob * res_prob;
__constant sampler_t RefImageSampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__constant sampler_t SrcImageSampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;
#ifdef NO_LOCAL_MEMORY
__kernel void PccComputeKernel(float spatial_normalization_, float color_normalization_, int row, int width, int workspaceWidth, __global float *hypotheses_workspace, __global float *pcc_costs, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, image2d_array_t ref_image, image2d_array_t src_images, image2d_array_t poses_images, __constant float *ref_inv_K)
const int col = get_global_id(0);
if (col >= width)
return;
const int src_image_idx = get_global_id(1);
const int num_src_images = get_global_size(1);
const int hypothesis = get_global_id(2);
const int num_hypotheses = get_global_size(2);
float depth = hypotheses_workspace[hypothesis*workspaceWidth + col];
float normal[3];
for (int i=0; i<3; ++i)
normal[i] = hypotheses_workspace[(i+1)*workspaceWidth*num_hypotheses + hypothesis*workspaceWidth + col];
float tform[9];
ComposeHomography(poses_images, src_image_idx, row, col, depth, normal, tform, ref_inv_K);
float x_step[3], y_step[3];
for (int i=0; i<3; ++i) {
x_step[i] = KWINDOWSTEP * tform[i*3];
y_step[i] = KWINDOWSTEP * tform[i*3+1];
const int row_start = row - KWINDOWRADIUS;
const int col_start = col - KWINDOWRADIUS;
float col_src = tform[0] * col_start + tform[1] * row_start + tform[2];
float row_src = tform[3] * col_start + tform[4] * row_start + tform[5];
float z = tform[6] * col_start + tform[7] * row_start + tform[8];
float base_col_src = col_src;
float base_row_src = row_src;
float base_z = z;
const float ref_center_color =
read_imagef(ref_image, RefImageSampler, (int4)(col, row, 0, 0)).x;
const float ref_color_sum = *((__global float*)((__global char*)ref_sum_image + ref_sum_pitch * row) + col);
const float ref_color_squared_sum = *((__global float*)((__global char*)ref_squared_sum_image + ref_squared_sum_pitch * row) + col);
float src_color_sum = 0.0f;
float src_color_squared_sum = 0.0f;
float src_ref_color_sum = 0.0f;
float bilateral_weight_sum = 0.0f;
for (int r = -KWINDOWRADIUS; r <= KWINDOWRADIUS; r += KWINDOWSTEP) {
for (int c = -KWINDOWRADIUS; c <= KWINDOWRADIUS; c += KWINDOWSTEP) {
const float inv_z = 1.0f / z;
const float norm_col_src = inv_z * col_src + 0.5f;
const float norm_row_src = inv_z * row_src + 0.5f;
const float ref_color = read_imagef(ref_image, RefImageSampler, (int4)(col+c, row+r, 0, 0)).x;
const float src_color = read_imagef(src_images, SrcImageSampler, (float4)(norm_col_src, norm_row_src, src_image_idx, 0)).x;
const float spatial_dist_squared = r*r + c*c;
const float color_dist = ref_center_color - ref_color;
const float bilateral_weight = exp(-spatial_dist_squared * spatial_normalization_ -
color_dist * color_dist * color_normalization_);
const float bilateral_weight_src = bilateral_weight * src_color;
src_color_sum += bilateral_weight_src;
src_color_squared_sum += bilateral_weight_src * src_color;
src_ref_color_sum += bilateral_weight_src * ref_color;
bilateral_weight_sum += bilateral_weight;
col_src += x_step[0];
row_src += x_step[1];
z += x_step[2];
}
base_col_src += y_step[0];
base_row_src += y_step[1];
base_z += y_step[2];
col_src = base_col_src;
row_src = base_row_src;
z = base_z;
const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum;
src_color_sum *= inv_bilateral_weight_sum;
src_color_squared_sum *= inv_bilateral_weight_sum;
src_ref_color_sum *= inv_bilateral_weight_sum;
const float ref_color_var =
ref_color_squared_sum - ref_color_sum * ref_color_sum;
const float src_color_var =
src_color_squared_sum - src_color_sum * src_color_sum;
const float kMinVar = 1e-5f;
const float kMaxCost = 2.0f;
float retVal;
if (ref_color_var < kMinVar || src_color_var < kMinVar) {
retVal = kMaxCost;
} else {
const float src_ref_color_covar =
src_ref_color_sum - ref_color_sum * src_color_sum;
const float src_ref_color_var = sqrt(ref_color_var * src_color_var);
retVal = max(0.0f, min(kMaxCost, 1.0f - src_ref_color_covar / src_ref_color_var));
pcc_costs[workspaceWidth*num_src_images*hypothesis + workspaceWidth*src_image_idx + col] = retVal;
#else
#if KWINDOWRADIUS > LOCAL_MEM_WORK_GROUP_SIZE/2
#define LOCAL_MEM_WORK_GROUP_SHIFT LOCAL_MEM_WORK_GROUP_SIZE
#else
#define LOCAL_MEM_WORK_GROUP_SHIFT LOCAL_MEM_WORK_GROUP_SIZE/2
#endif
__kernel void PccComputeKernel(float spatial_normalization_, float color_normalization_, int row, int width, int workspaceWidth, __global float *hypotheses_workspace, __global float *pcc_costs, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, image2d_array_t ref_image, image2d_array_t src_images, image2d_array_t poses_images, __constant float *ref_inv_K, __local float *local_ref_image)
{
const int col = get_global_id(0);
const int thread = get_local_id(0);
const bool valid = col < width;
const int src_image_idx = get_global_id(1);
const int num_src_images = get_global_size(1);
const int hypothesis = get_global_id(2);
const int num_hypotheses = get_global_size(2);
float col_src, row_src, z;
float base_col_src, base_row_src, base_z;
float x_step[3], y_step[3];
float ref_center_color, ref_color_sum, ref_color_squared_sum;
float src_color_sum = 0.0f;
float src_color_squared_sum = 0.0f;
float src_ref_color_sum = 0.0f;
float bilateral_weight_sum = 0.0f;
if (valid) {
float depth = hypotheses_workspace[hypothesis*workspaceWidth + col];
float normal[3];
for (int i=0; i<3; ++i)
normal[i] = hypotheses_workspace[(i+1)*workspaceWidth*num_hypotheses + hypothesis*workspaceWidth + col];
float tform[9];
ComposeHomography(poses_images, src_image_idx, row, col, depth, normal, tform, ref_inv_K);
for (int i=0; i<3; ++i) {
x_step[i] = KWINDOWSTEP * tform[i*3];
y_step[i] = KWINDOWSTEP * tform[i*3+1];
}
const int row_start = row - KWINDOWRADIUS;
const int col_start = col - KWINDOWRADIUS;
col_src = tform[0] * col_start + tform[1] * row_start + tform[2];
row_src = tform[3] * col_start + tform[4] * row_start + tform[5];
z = tform[6] * col_start + tform[7] * row_start + tform[8];
base_col_src = col_src;
base_row_src = row_src;
base_z = z;
ref_center_color = read_imagef(ref_image, RefImageSampler, (int4)(col, row, 0, 0)).x;
ref_color_sum = *((__global float*)((__global char*)ref_sum_image + ref_sum_pitch * row) + col);
ref_color_squared_sum = *((__global float*)((__global char*)ref_squared_sum_image + ref_squared_sum_pitch * row) + col);
}
for (int r = -KWINDOWRADIUS; r <= KWINDOWRADIUS; r += KWINDOWSTEP) {
barrier(CLK_LOCAL_MEM_FENCE);
local_ref_image[thread] =
read_imagef(ref_image, RefImageSampler, (int4)(col-LOCAL_MEM_WORK_GROUP_SHIFT, row+r, 0, 0)).x;
local_ref_image[thread+2*LOCAL_MEM_WORK_GROUP_SHIFT] =
read_imagef(ref_image, RefImageSampler, (int4)(col+LOCAL_MEM_WORK_GROUP_SHIFT, row+r, 0, 0)).x;
#if KWINDOWRADIUS > LOCAL_MEM_WORK_GROUP_SIZE/2
local_ref_image[thread+LOCAL_MEM_WORK_GROUP_SIZE] =
read_imagef(ref_image, RefImageSampler, (int4)(col, row+r, 0, 0)).x;
#endif
barrier(CLK_LOCAL_MEM_FENCE);
if (valid) {
for (int c = -KWINDOWRADIUS; c <= KWINDOWRADIUS; c += KWINDOWSTEP) {
const float inv_z = 1.0f / z;
const float norm_col_src = inv_z * col_src + 0.5f;
const float norm_row_src = inv_z * row_src + 0.5f;
const float ref_color = local_ref_image[thread+LOCAL_MEM_WORK_GROUP_SHIFT+c];
const float src_color = read_imagef(src_images, SrcImageSampler, (float4)(norm_col_src, norm_row_src, src_image_idx, 0)).x;
const float spatial_dist_squared = r*r + c*c;
const float color_dist = ref_center_color - ref_color;
const float bilateral_weight = exp(-spatial_dist_squared * spatial_normalization_ - color_dist * color_dist * color_normalization_);
const float bilateral_weight_src = bilateral_weight * src_color;
src_color_sum += bilateral_weight_src;
src_color_squared_sum += bilateral_weight_src * src_color;
src_ref_color_sum += bilateral_weight_src * ref_color;
bilateral_weight_sum += bilateral_weight;
col_src += x_step[0];
row_src += x_step[1];
z += x_step[2];
}
base_col_src += y_step[0];
base_row_src += y_step[1];
base_z += y_step[2];
col_src = base_col_src;
row_src = base_row_src;
z = base_z;
}
}
if (!valid)
return;
const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum;
src_color_sum *= inv_bilateral_weight_sum;
src_color_squared_sum *= inv_bilateral_weight_sum;
src_ref_color_sum *= inv_bilateral_weight_sum;
const float ref_color_var =
ref_color_squared_sum - ref_color_sum * ref_color_sum;
const float src_color_var =
src_color_squared_sum - src_color_sum * src_color_sum;
const float kMinVar = 1e-5f;
const float kMaxCost = 2.0f;
float retVal;
if (ref_color_var < kMinVar || src_color_var < kMinVar) {
retVal = kMaxCost;
} else {
const float src_ref_color_covar =
src_ref_color_sum - ref_color_sum * src_color_sum;
const float src_ref_color_var = sqrt(ref_color_var * src_color_var);
retVal = max(0.0f, min(kMaxCost, 1.0f - src_ref_color_covar / src_ref_color_var));
}
pcc_costs[workspaceWidth*num_src_images*hypothesis + workspaceWidth*src_image_idx + col] = retVal;
}
#endif
__kernel void MinimumCostKernel(int row, int width, int num_src_images, int num_hypotheses, int kGeomConsistencyTerm, __global float *global_workspace, int workspaceWidth, __global float *pcc_costs, __global float *hypotheses_workspace, __global float *cost_map, int cost_map_height, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, __global float *prev_sel_prob_map, int prev_sel_prob_map_height, int prev_sel_prob_map_pitch, float prev_sel_prob_weight, float ncc_sigma, float geom_consistency_regularizer, float geom_consistency_max_cost, image2d_array_t src_depth_map_images, image2d_array_t poses_images, __constant float *ref_inv_K,
#ifdef HYPOTHESES_STATISTICS
__constant float *ref_K, __global unsigned int *hypotheses_stats)
#else
__constant float *ref_K)
#endif
{
const int col = get_global_id(0);
if (col >= width)
return;
float likelihood_computer_inv_ncc_sigma_square_ = -0.5f / (ncc_sigma * ncc_sigma);
float likelihood_computer_ncc_norm_factor_ = likelihood_computer_ComputeNCCCostNormFactor(ncc_sigma);
__global float* forward_message = &global_workspace[col * num_src_images];
__global float* sampling_probs = &global_workspace[workspaceWidth * num_src_images + col * num_src_images];
float best_cost = 0.0f;
int min_cost_idx = 0;
for (int a=0; a<num_src_images; ++a) {
const float sampling_weight = sampling_probs[a];
best_cost += *((__global float*)((__global char*)cost_map + cost_map_pitch * (a * cost_map_height + row)) + col) * sampling_weight;
if (kGeomConsistencyTerm) {
float currDepth = *((__global float*)((__global char*)depth_map + depth_map_pitch * row) + col);
best_cost += geom_consistency_regularizer *
ComputeGeomConsistencyCost(row, col, currDepth, a, geom_consistency_max_cost, poses_images, src_depth_map_images, ref_inv_K, ref_K) * sampling_weight;
}
}
for (int i=1; i<num_hypotheses; ++i) {
float hyp_cost = 0.0f;
for (int a=0; a<num_src_images; ++a) {
const float sampling_weight = sampling_probs[a];
hyp_cost += pcc_costs[workspaceWidth*num_src_images*(i-1) + workspaceWidth*a + col] * sampling_weight;
if (kGeomConsistencyTerm) {
float hypDepth = hypotheses_workspace[(i-1)*workspaceWidth + col];
hyp_cost += geom_consistency_regularizer *
ComputeGeomConsistencyCost(row, col, hypDepth, a, geom_consistency_max_cost, poses_images, src_depth_map_images, ref_inv_K, ref_K) * sampling_weight;
}
}
if (hyp_cost <= best_cost) {
best_cost = hyp_cost;
min_cost_idx = i;
}
}
#ifdef HYPOTHESES_STATISTICS
hypotheses_stats[workspaceWidth*min_cost_idx + col] += 1;
#endif
if (min_cost_idx > 0) {
*((__global float*)((__global char*)depth_map + depth_map_pitch * row) + col) = hypotheses_workspace[(min_cost_idx-1)*workspaceWidth + col];
for (size_t slice = 0; slice < 3; ++slice) {
*((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col) =
hypotheses_workspace[(slice+1)*workspaceWidth*(num_hypotheses-1) + (min_cost_idx-1)*workspaceWidth + col];
}
}
for (int image_idx = 0; image_idx < num_src_images; ++image_idx)
{
float cost;
if (min_cost_idx == 0) {
cost = *((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col);
}
else {
cost = pcc_costs[workspaceWidth*num_src_images*(min_cost_idx-1) + workspaceWidth*image_idx + col];
*((__global float*)((__global char*)cost_map + cost_map_pitch * (image_idx * cost_map_height + row)) + col) = cost;
}
const float alpha = likelihood_computer_ComputeForwardMessage(cost, forward_message[image_idx], likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_);
const float beta = *((__global float*)((__global char*)sel_prob_map + sel_prob_map_pitch * (image_idx * sel_prob_map_height + row)) + col);
const float prev_prob = *((__global float*)((__global char*)prev_sel_prob_map + prev_sel_prob_map_pitch * (image_idx * prev_sel_prob_map_height + row)) + col);
const float prob = likelihood_computer_ComputeSelProb(alpha, beta, prev_prob, prev_sel_prob_weight);
forward_message[image_idx] = alpha;
*((__global float*)((__global char*)sel_prob_map + sel_prob_map_pitch * (image_idx * sel_prob_map_height + row)) + col) = prob;
}
}
__kernel void ConsistencyFilterKernel(int width, __global float *depth_map, __global float *normal_map, __global float *sel_prob_map, int map_pitch, int map_height, int num_src_images, int kFilterConsistencyMode, __global uchar *consistency_mask, int consistency_mask_pitch, image2d_array_t src_depth_map_images, image2d_array_t poses_images, __constant float *ref_inv_K, __constant float *ref_K, float ncc_sigma, float filter_min_ncc, float filter_min_triangulation_angle, float geom_consistency_max_cost, float filter_geom_consistency_max_cost, int filter_min_num_consistent)
{
const int col = get_global_id(0);
const int row = get_global_id(1);
if (col >= width)
return;
int num_consistent = 0;
const float best_depth = *((__global float*)((__global char*)depth_map + map_pitch * row) + col);
float best_normal[3];
for (int i=0; i<3; ++i)
best_normal[i] = *((__global float*)((__global char*)normal_map + map_pitch * (i * map_height + row)) + col);
float best_point[3];
ComputePointAtDepth(row, col, best_depth, best_point, ref_inv_K);
const float likelihood_computer_inv_ncc_sigma_square_ = -0.5f / (ncc_sigma * ncc_sigma);
const float likelihood_computer_ncc_norm_factor_ = likelihood_computer_ComputeNCCCostNormFactor(ncc_sigma);
const float min_ncc_prob =
likelihood_computer_ComputeNCCProb(1.0f - filter_min_ncc, likelihood_computer_inv_ncc_sigma_square_, likelihood_computer_ncc_norm_factor_);
const float cos_min_triangulation_angle =
cos(filter_min_triangulation_angle);
for (int image_idx = 0; image_idx < num_src_images; ++image_idx) {
float cos_triangulation_angle;
float cos_incident_angle;
ComputeViewingAngles(best_point, best_normal, image_idx, cos_triangulation_angle, &cos_incident_angle, poses_images);
if (cos_triangulation_angle > cos_min_triangulation_angle ||
cos_incident_angle <= 0.0f) {
continue;
}
if (!(kFilterConsistencyMode & 0x02)) {
if (*((__global float*)((__global char*)sel_prob_map + map_pitch * (image_idx * map_height + row)) + col) >= min_ncc_prob) {
*(consistency_mask + consistency_mask_pitch * (image_idx * map_height + row) + col) = 1;
num_consistent += 1;
}
} else if (!(kFilterConsistencyMode & 0x01)) {
if (ComputeGeomConsistencyCost(row, col, best_depth, image_idx, geom_consistency_max_cost, poses_images, src_depth_map_images, ref_inv_K, ref_K) <= filter_geom_consistency_max_cost) {
*(consistency_mask + consistency_mask_pitch * (image_idx * map_height + row) + col) = 1;
num_consistent += 1;
}
} else {
if (*((__global float*)((__global char*)sel_prob_map + map_pitch * (image_idx * map_height + row)) + col) >= min_ncc_prob &&
ComputeGeomConsistencyCost(row, col, best_depth, image_idx, geom_consistency_max_cost, poses_images, src_depth_map_images, ref_inv_K, ref_K) <= filter_geom_consistency_max_cost) {
*(consistency_mask + consistency_mask_pitch * (image_idx * map_height + row) + col) = 1;
num_consistent += 1;
}
}
}
if (num_consistent < filter_min_num_consistent) {
const float kFilterValue = 0.0f;
*((__global float*)((__global char*)depth_map + map_pitch * row) + col) = kFilterValue;
*((__global float*)((__global char*)normal_map + map_pitch * (0 * map_height + row)) + col) = kFilterValue;
*((__global float*)((__global char*)normal_map + map_pitch * (1 * map_height + row)) + col) = kFilterValue;
*((__global float*)((__global char*)normal_map + map_pitch * (2 * map_height + row)) + col) = kFilterValue;
for (int image_idx = 0; image_idx < num_src_images; ++image_idx) {
*(consistency_mask + consistency_mask_pitch * (image_idx * map_height + row) + col) = 0;
}
}
}
#ifdef NO_LOCAL_MEMORY
__kernel void RotateFloatKernel(__global float *input_data, int input_offset, __global float *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch)
{
int input_x = get_global_id(0);
int input_y = get_global_id(1);
if (input_x >= width || input_y >= height) {
return;
int output_x = input_y;
int output_y = width - 1 - input_x;
*((__global float *)((__global uchar *)output_data + output_offset + output_y * output_pitch) + output_x) =
*((__global float *)((__global uchar *)input_data + input_offset + input_y * input_pitch) + input_x);
}
__kernel void RotateUint8Kernel(__global uchar *input_data, int input_offset, __global uchar *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch)
{
int input_x = get_global_id(0);
int input_y = get_global_id(1);
if (input_x < width && input_y < height) {
int output_x = input_y;
int output_y = width - 1 - input_x;
*(output_data + output_offset + output_y * output_pitch + output_x) =
*(input_data + input_offset + input_y * input_pitch + input_x);
}
}
#else
#define ROTATE_TILE_DIM 16
#define ROTATE_WORKGROUP_ROWS 4
__kernel void RotateFloatKernel(__global float * input_data, int input_offset, __global float * output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch)
{
__local float tile[ROTATE_TILE_DIM][ROTATE_TILE_DIM+1];
int input_x = get_global_id(0);
int x = get_local_id(0);
int y = get_local_id(1);
int yp = get_group_id(1) * ROTATE_TILE_DIM + y;
for (int j=0; j < ROTATE_TILE_DIM; j += ROTATE_WORKGROUP_ROWS)
if (input_x < width && yp+j < height)
tile[y+j][x] = *((__global float *)((__global uchar *)input_data + input_offset + (yp + j) * input_pitch) + input_x);
barrier(CLK_LOCAL_MEM_FENCE);
int output_y = width - 1 - (input_x/ROTATE_TILE_DIM * ROTATE_TILE_DIM) - y;
for (int j=0; j < ROTATE_TILE_DIM; j += ROTATE_WORKGROUP_ROWS)
int output_x = ((yp+j)/ROTATE_TILE_DIM)*ROTATE_TILE_DIM + x;
if (output_x < height && output_y-j >= 0)
*((__global float *)((__global uchar *)output_data + output_offset + (output_y - j) * output_pitch) + output_x) = tile[x][y+j];
}
__kernel void RotateUint8Kernel(__global uchar *input_data, int input_offset, __global uchar *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch)
{
__local uchar tile[ROTATE_TILE_DIM][ROTATE_TILE_DIM+1];
int input_x = get_global_id(0);
int x = get_local_id(0);
int y = get_local_id(1);
int yp = get_group_id(1) * ROTATE_TILE_DIM + y;
for (int j=0; j < ROTATE_TILE_DIM; j += ROTATE_WORKGROUP_ROWS)
if (input_x < width && yp+j < height)
tile[y+j][x] = *(input_data + input_offset + (yp + j) * input_pitch + input_x);
barrier(CLK_LOCAL_MEM_FENCE);
int output_y = width - 1 - (input_x/ROTATE_TILE_DIM * ROTATE_TILE_DIM) - y;
for (int j=0; j < ROTATE_TILE_DIM; j += ROTATE_WORKGROUP_ROWS)
{
int output_x = ((yp+j)/ROTATE_TILE_DIM)*ROTATE_TILE_DIM + x;
if (output_x < height && output_y-j >= 0)
*(output_data + output_offset + (output_y - j) * output_pitch + output_x) = tile[x][y+j];
}
}
#endif
__kernel void FillWithRandomNumbersKernel(__global float* output, __global clrandState * rand_state_map, float min_value, float max_value, int random_state_width, int random_state_height, int random_state_pitch, int output_height, int output_depth, int output_pitch) {
const uint row = get_global_id(1);
const uint col = get_global_id(0);
if (col < random_state_width && row < random_state_height) {
clrandState rand_state = *((__global clrandState*)((__global char*)rand_state_map + random_state_pitch * row) + col);
for (size_t slice = 0; slice < output_depth; ++slice) {
float random_value = clrand_uniform(&rand_state) * (max_value - min_value) + min_value;
*((__global float*)((__global char*)output + output_pitch * (slice * output_height + row)) + col) = random_value;
}
*((__global clrandState*)((__global char*)rand_state_map + random_state_pitch * row) + col) = rand_state;
}
}
__kernel void InitNormalMapKernel(__global float* normal_map, __global clrandState * rand_state_map, int normal_map_width, int normal_map_height, int normal_map_depth, int normal_map_pitch, int rand_state_map_pitch, __constant float * ref_inv_K) {
const uint row = get_global_id(1);
const uint col = get_global_id(0);
if (col < normal_map_width && row < normal_map_height) {
clrandState rand_state = *((__global clrandState*)((__global char*)rand_state_map + rand_state_map_pitch * row) + col);
float normal[3];
GenerateRandomNormal(row, col, &rand_state, normal, ref_inv_K);
for (size_t slice = 0; slice < normal_map_depth; ++slice) {
*((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col) = normal[slice];
}
*((__global clrandState*)((__global char*)rand_state_map + rand_state_map_pitch * row) + col) = rand_state;
}
__constant sampler_t FilterSampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__kernel void FilterKernel(image2d_array_t image_array, __global uchar *image, int pitch_image, __global float *sum_image, int pitch_sum_image, __global float *squared_sum_image, int pitch_squared_sum_image, int window_radius, int window_step, float sigma_spatial, float sigma_color) {
const int row = get_global_id(1);
const int col = get_global_id(0);
const float spatial_normalization_ = (1.0f / (2.0f * sigma_spatial * sigma_spatial));
const float color_normalization_ = (1.0f / (2.0f * sigma_color * sigma_color));
const float center_color = read_imagef(image_array, FilterSampler, int4)(col, row, 0, 0)).x;
float color_sum = 0.0f;
float color_squared_sum = 0.0f;
float bilateral_weight_sum = 0.0f;
for (int window_row = -window_radius; window_row <= window_radius;
window_row += window_step) {
for (int window_col = -window_radius; window_col <= window_radius;
window_col += window_step) {
const float color = read_imagef(image_array, FilterSampler, int4)(col + window_col, row + window_row, 0, 0)).x;
const float spatial_dist_squared = window_row * window_row + window_col * window_col;
const float color_dist = center_color - color;
const float bilateral_weight = exp(-spatial_dist_squared * spatial_normalization_ -
color_dist * color_dist * color_normalization_);
color_sum += bilateral_weight * color;
color_squared_sum += bilateral_weight * color * color;
bilateral_weight_sum += bilateral_weight;
}
}
color_sum /= bilateral_weight_sum;
color_squared_sum /= bilateral_weight_sum;
*(image + pitch_image * row + col) = (uchar)(255.0f * center_color);
*((__global float*)((__global char*)sum_image + pitch_sum_image * row) + col) = color_sum;
*((__global float*)((__global char*)squared_sum_image + pitch_squared_sum_image * row) + col) = color_squared_sum;
}
__kernel void RotateNormalMapKernel(__global float * normal_map, int normal_map_width, int normal_map_height, int normal_map_depth, int normal_map_pitch) {
const int row = get_global_id(1);
const int col = get_global_id(0);
if (col < normal_map_width && row < normal_map_height) {
float normal[3];
for (size_t slice = 0; slice < normal_map_depth; ++slice) {
normal[slice] = *((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col);
}
float rotated_normal[3];
rotated_normal[0] = normal[1];
rotated_normal[1] = -normal[0];
rotated_normal[2] = normal[2];
for (size_t slice = 0; slice < normal_map_depth; ++slice) {
*((__global float*)((__global char*)normal_map + normal_map_pitch * (slice * normal_map_height + row)) + col) = rotated_normal[slice];
}
}
}
__kernel void CopyFromGpuMatCLKernel(__global uchar * input_array, __write_only image2d_array_t output_array, int pitch, int width, int height) {
const int row = get_global_id(1);
const int col = get_global_id(0);
int4 pos = (int4)(col, row, 0, 0);
float4 pix = (float4)((*(input_array + pitch * row + col))/255.0f);
if (col < width && row < height) {
write_imagef(output_array, pos, pix);
}
}
__kernel void FindBestMatchOneWay_Kernel(__global int *result, __global VECTOR_TYPE *des1, __global VECTOR_TYPE *des2, int num1, int num2, float distmax, float ratiomax);
__kernel void FindGuidedBestMatchOneWay_Kernel(__global int *result, __global VECTOR_TYPE *des1, __global VECTOR_TYPE *des2, __global float *loc1, __global float *loc2, int num1, int num2, int guided, // 0 = Fundamental matrix, first pass;
__kernel void ComputeInitialCostKernel(__global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_depth, int normal_map_pitch, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, float sigma_spatial, float sigma_color, image2d_array_t ref_image, ;
__kernel void InitRandomStateKernel(__global clrandState *output, int width, int height, uint pitch);
__kernel void ComputeBackwardMessagesKernel(__global float *global_workspace, int global_workspace_height, __global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, float ncc_sigma);
__kernel void HypothesesAndProbsKernel(int row, __global float *global_workspace, int global_workspace_width, int global_workspace_height, __global float *hypotheses_workspace, int hypotheses_workspace_height, __global clrandState *rand_state_map, __global float *cost_map, int cost_map_width, int cost_map_height, int cost_map_depth, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, __global float *prev_sel_prob_map, int prev_sel_prob_map_height, int prev_sel_prob_map_pitch, float min_triangulation_angle, float incident_angle_sigma, float ncc_sigma, float perturbation, float prev_sel_prob_weight, image2d_array_t poses_images, ;
__kernel void PccComputeKernel(float spatial_normalization_, float color_normalization_, int row, int width, int workspaceWidth, __global float *hypotheses_workspace, __global float *pcc_costs, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, image2d_array_t ref_image, image2d_array_t src_images, ;
__kernel void PccComputeKernel(float spatial_normalization_, float color_normalization_, int row, int width, int workspaceWidth, __global float *hypotheses_workspace, __global float *pcc_costs, __global float *ref_sum_image, int ref_sum_pitch, __global float *ref_squared_sum_image, int ref_squared_sum_pitch, image2d_array_t ref_image, image2d_array_t src_images, ;
__kernel void MinimumCostKernel(int row, int width, int num_src_images, int num_hypotheses, int kGeomConsistencyTerm, __global float *global_workspace, int workspaceWidth, __global float *pcc_costs, __global float *hypotheses_workspace, __global float *cost_map, int cost_map_height, int cost_map_pitch, __global float *depth_map, int depth_map_pitch, __global float *normal_map, int normal_map_height, int normal_map_pitch, __global float *sel_prob_map, int sel_prob_map_height, int sel_prob_map_pitch, __global float *prev_sel_prob_map, int prev_sel_prob_map_height, int prev_sel_prob_map_pitch, float prev_sel_prob_weight, float ncc_sigma, float geom_consistency_regularizer, float geom_consistency_max_cost, image2d_array_t src_depth_map_images, image2d_array_t poses_images, __constant float *ref_inv_K, ifdef HYPOTHESES_STATISTICS;
__kernel void ConsistencyFilterKernel(int width, __global float *depth_map, __global float *normal_map, __global float *sel_prob_map, int map_pitch, int map_height, int num_src_images, int kFilterConsistencyMode, __global uchar *consistency_mask, int consistency_mask_pitch, image2d_array_t src_depth_map_images, image2d_array_t poses_images, __constant float *ref_inv_K, __constant float *ref_K, float ncc_sigma, float filter_min_ncc, float filter_min_triangulation_angle, float geom_consistency_max_cost, float filter_geom_consistency_max_cost, int filter_min_num_consistent);
__kernel void RotateFloatKernel(__global float *input_data, int input_offset, __global float *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch);
__kernel void RotateUint8Kernel(__global uchar *input_data, int input_offset, __global uchar *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch);
__kernel void RotateFloatKernel(__global float * input_data, int input_offset, __global float * output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch);
__kernel void RotateUint8Kernel(__global uchar *input_data, int input_offset, __global uchar *output_data, int output_offset, const int width, const int height, const int input_pitch, const int output_pitch);
__kernel void FillWithRandomNumbersKernel(__global float* output, __global clrandState * rand_state_map, float min_value, float max_value, int random_state_width, int random_state_height, int random_state_pitch, int output_height, int output_depth, int output_pitch) {;
__kernel void InitNormalMapKernel(__global float* normal_map, __global clrandState * rand_state_map, int normal_map_width, int normal_map_height, ;
__kernel void FilterKernel(image2d_array_t image_array, __global uchar *image, int pitch_image, __global float *sum_image, int pitch_sum_image, __global float *squared_sum_image, int pitch_squared_sum_image, int window_radius, int window_step, float sigma_spatial, float sigma_color) {;
__kernel void RotateNormalMapKernel(__global float * normal_map, int normal_map_width, int normal_map_height, int normal_map_depth, int normal_map_pitch) {;
__kernel void CopyFromGpuMatCLKernel(__global uchar * input_array, __write_only image2d_array_t output_array, int pitch, int width, int height) {;
#version 150
uniform mat4 u_pmv_matrix;
in vec3 a_pos;
in vec4 a_color;
out vec4 v_pos;
out vec4 v_color;
void main(void) {
v_pos = u_pmv_matrix * vec4(a_pos, 1);
v_color = a_color;
[#version 150
in vec4 v_color;
out vec4 f_color;
void main(void) {
f_color = v_color;
[#version 150
in vec4 v_color;
out vec4 f_color;
void main(void) {
f_color = v_color;
#version 150
layout(lines) in;
layout(triangle_strip, max_vertices = 4) out;
uniform vec2 u_inv_viewport;
uniform float u_line_width;
in vec4 v_pos[2];
in vec4 v_color[2];
out vec4 g_color;
void main() {
vec2 dir = normalize(v_pos[1].xy / v_pos[1].w - v_pos[0].xy / v_pos[0].w);
vec2 normal_dir = vec2(-dir.y, dir.x);
vec2 offset = (vec2(u_line_width) * u_inv_viewport) * normal_dir;
gl_Position = vec4(v_pos[0].xy + offset * v_pos[0].w, v_pos[0].z, v_pos[0].w);
g_color = v_color[0];
EmitVertex();
gl_Position = vec4(v_pos[1].xy + offset * v_pos[1].w, v_pos[1].z, v_pos[1].w);
g_color = v_color[1];
EmitVertex();
gl_Position = vec4(v_pos[0].xy - offset * v_pos[0].w, v_pos[0].z, v_pos[0].w);
g_color = v_color[0];
EmitVertex();
gl_Position = vec4(v_pos[1].xy - offset * v_pos[1].w, v_pos[1].z, v_pos[1].w);
g_color = v_color[1];
EmitVertex();
EndPrimitive();
#version 150
uniform float u_point_size;
uniform mat4 u_pmv_matrix;
in vec3 a_position;
in vec4 a_color;
out vec4 v_color;
void main(void) {
gl_Position = u_pmv_matrix * vec4(a_position, 1);
gl_PointSize = u_point_size;
v_color = a_color;
#version 150
uniform mat4 u_pmv_matrix;
in vec3 a_position;
in vec4 a_color;
out vec4 v_color;
void main(void) {
gl_Position = u_pmv_matrix * vec4(a_position, 1);
v_color = a_color;
[#version 150
in vec4 g_color;
out vec4 f_color;
void main(void) {
f_color = g_color;
#pragma optionNV(ifcvt none)
#pragma optionNV(unroll all)
vec4 val = vec4(0.0, 0.0, 0.0, 0.0), data1, buf;
vec2 index = gl_FragCoord.yx;
vec2 stripe_size = size.xy * SIFT_PER_STRIPE;
vec2 temp_div1 = index / stripe_size;
vec2 stripe_index = floor(temp_div1);
index = floor(stripe_size * (temp_div1 - stripe_index));
vec2 temp_div2 = index * vec2(1.0 / float(SIFT_PER_STRIPE));
vec2 temp_floor2 = floor(temp_div2);
vec2 index_v = temp_floor2 + vec2(0.5);
vec2 index_h = vec2(SIFT_PER_STRIPE)* (temp_div2 - temp_floor2);
vec2 tx = (index_h + stripe_index * vec2(SIFT_PER_STRIPE))* vec2(PIXEL_PER_SIFT) + 0.5;
vec2 tpos1, tpos2;
vec4 tpos = vec4(tx, index_v);
for(int i = 0; i < PIXEL_PER_SIFT; ++i){
buf = texture2DRect(tex2, tpos.yw);
data1 = texture2DRect(tex1, tpos.xz);
val += (data1 * buf);
tpos.xy = tpos.xy + vec2(1.0, 1.0);
const float factor = 0.248050689697265625;
gl_FragColor =vec4(dot(val, vec4(factor)), index, 0);
uniform sampler2DRect tex1, tex2; uniform vec2
size;
void main()
#define PIXEL_PER_SIFT
#define SIFT_PER_STRIPE
tex1
tex2
vec4 val = vec4(0.0, 0.0, 0.0, 0.0), data1, buf;
vec2 index = gl_FragCoord.yx;
vec2 stripe_size = size.xy * SIFT_PER_STRIPE;
vec2 temp_div1 = index / stripe_size;
vec2 stripe_index = floor(temp_div1);
index = floor(stripe_size * (temp_div1 - stripe_index));
vec2 temp_div2 = index * vec2(1.0/ float(SIFT_PER_STRIPE));
vec2 temp_floor2 = floor(temp_div2);
vec2 index_v = temp_floor2 + vec2(0.5);
vec2 index_h = vec2(SIFT_PER_STRIPE)* (temp_div2 - temp_floor2);
vec4 tlpos = vec4((index_h + stripe_index * vec2(SIFT_PER_STRIPE)) + 0.5, index_v);
vec3 loc1 = vec3(texture2DRect(texL1, tlpos.xz).xw, 1.0);
vec3 loc2 = vec3(texture2DRect(texL2, tlpos.yw).xw, 1.0);
vec3 hxloc1 = H* loc1;
vec2 diff = loc2.xy- (hxloc1.xy/hxloc1.z);
float disth = diff.x * diff.x + diff.y * diff.y;
if(disth > size.z ) {gl_FragColor = vec4(0.0, index, 0.0); return;}
vec3 fx1 = (F * loc1), ftx2 = (loc2 * F);
float x2tfx1 = dot(loc2, fx1);
vec4 temp = vec4(fx1.xy, ftx2.xy);
float sampson_error = (x2tfx1 * x2tfx1) / dot(temp, temp);
if(sampson_error > size.w) {gl_FragColor = vec4(0.0, index, 0.0); return;}
vec2 tx = (index_h + stripe_index * SIFT_PER_STRIPE)* vec2(PIXEL_PER_SIFT) + 0.5;
vec2 tpos1, tpos2;
vec4 tpos = vec4(tx, index_v);
for(int i = 0; i < PIXEL_PER_SIFT; ++i){
buf = texture2DRect(tex2, tpos.yw);
data1 = texture2DRect(tex1, tpos.xz);
val += data1 * buf;
tpos.xy = tpos.xy + vec2(1.0, 1.0);
const float factor = 0.248050689697265625;
gl_FragColor =vec4(dot(val, vec4(factor)), index, 0.0);
uniform sampler2DRect tex1, tex2;
uniform sampler2DRect texL1;
uniform sampler2DRect texL2;
uniform mat3 H;
uniform mat3 F;
uniform vec4
size;
void main()
texL1
texL2
#define BLOCK_WIDTH 16.0
uniform sampler2DRect tex;
uniform vec3 param;
void main ()
float index = gl_FragCoord.x + floor(gl_FragCoord.y) * BLOCK_WIDTH;
vec2 bestv = vec2(-1.0); float imax = -1.0;
for(float i = 0.0; i < param.x; i ++){
float v = texture2DRect(tex, vec2(i + 0.5, index)).r;
imax = v > bestv.r ? i : imax;
bestv = v > bestv.r? vec2(v, bestv.r) : max(bestv, vec2(v));
bestv = acos(min(bestv, 1.0));
if(bestv.x >= param.y || bestv.x >= param.z * bestv.y) imax = -1.0;
gl_FragColor = vec4(imax, bestv, index);
param
#define BLOCK_WIDTH 16.0
uniform sampler2DRect tex; uniform vec3 param;
void main ()
float index = gl_FragCoord.x + floor(gl_FragCoord.y) * BLOCK_WIDTH;
vec2 bestv = vec2(-1.0); float imax = -1.0;
for(float i = 0.0; i < param.x; i ++){
float v = texture2DRect(tex, vec2(index, i + 0.5)).r;
imax = (v > bestv.r)? i : imax;
bestv = v > bestv.r? vec2(v, bestv.r) : max(bestv, vec2(v));
bestv = acos(min(bestv, 1.0));
if(bestv.x >= param.y || bestv.x >= param.z * bestv.y) imax = -1.0;
gl_FragColor = vec4(imax, bestv, index);
Project for VS2005+ or set siftgpu_enable_cuda to 1 in makefile
uniform sampler2DRect tex;
void main(void){ float intensity = 0.0 ; vec2 pos;
float or = texture2DRect(tex, gl_TexCoord[0].st).r;
intensity+= or *
) , 0);
pos = gl_TexCoord[0].st + vec2(float(texture2DRect(tex, pos).r;
intensity+=
gl_FragColor.r = or;
gl_FragColor.b = intensity;}
void main(void){ float intensity = 0.0;vec2 pos;
vec2 orb = texture2DRect(tex, gl_TexCoord[0].st).rb;
intensity+= orb.y *
) );
pos = gl_TexCoord[0].st + vec2(0, float(intensity+= texture2DRect(tex, pos).b *
gl_FragColor.b = orb.y;
gl_FragColor.g = intensity - orb.x;
gl_FragColor.r = intensity;}
void main(void){ vec4 result = vec4(0, 0, 0, 0);
vec4 pc; vec2 coord;
),0);
coord = gl_TexCoord[0].xy + vec2(float(pc=texture2DRect(tex, coord);
if(coord.x < 0.0) pc = pc.rrbb;
)*pc.grab;
result += vec4()*pc.rrbb;
)*pc.ggaa;
gl_FragColor = result;}
vec4 pc; vec2 coord;
coord = gl_TexCoord[0].xy + vec2(0, float(if(coord.y < 0.0) pc = pc.rgrg;
)*pc.barg;
)*pc.rgrg;
)*pc.baba;
uniform sampler2DRect tex; void main(void){
float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex, gl_TexCoord[0].st ).rgb);
gl_FragColor = vec4(intensity, intensity, intensity, 1.0);}
void main(void){gl_FragColor.rg = gl_TexCoord[0].st;}
uniform sampler2DRect tex; void main(void){gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg;}
uniform sampler2DRect tex; void main ()
vec4 v1, v2, gg;
vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);
gg.x = texture2DRect(tex, gl_TexCoord[1].xy).r;
gg.y = texture2DRect(tex, gl_TexCoord[2].xy).r;
gg.z = texture2DRect(tex, gl_TexCoord[3].xy).r;
gg.w = texture2DRect(tex, gl_TexCoord[4].xy).r;
vec2 dxdy = (gg.yw - gg.xz);
float grad = 0.5*length(dxdy);
float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);
gl_FragData[0] = vec4(cc.rg, grad, theta);
uniform sampler2DRect tex; uniform vec2 truncate;
void main(){ gl_FragColor = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate)); }
uniform sampler2DRect tex; uniform sampler2DRect oTex;
uniform float size; void main(){
vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);
vec4 oo = texture2DRect(oTex, cc.rg);
gl_FragColor.rg = cc.rg;
gl_FragColor.b = oo.a;
gl_FragColor.a = size;}
oTex
Orientation simplified on this hardware
Descriptor ignored on this hardware
void main(){gl_FragColor = vec4(0.0);}
uniform sampler2DRect tex; void main(){
gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg; gl_FragColor.ba = vec2(0.0,1.0);
uniform vec4 sizes; uniform sampler2DRect tex;
void main(void){
float fwidth = sizes.y; float twidth = sizes.z; float rwidth = sizes.w;
float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);
float px = mod(index, twidth);
vec2 tpos= floor(vec2(px, index*rwidth))+0.5;
vec4 cc = texture2DRect(tex, tpos );
float size = 3.0 * cc.a; //sizes.x;//
gl_FragColor.zw = vec2(0.0, 1.0);
if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy; }
else {float type = fract(px);
vec2 dxy = vec2(0);
dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);
dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size);
float s = sin(cc.b); float c = cos(cc.b);
gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;
gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}
sizes
uniform sampler2DRect tex; void main(void){float r = texture2DRect(tex, gl_TexCoord[0].st).r;
gl_FragColor = vec4(r, r, r, 1);}
uniform sampler2DRect tex; void main(void){float g = 0.5+(20.0*texture2DRect(tex, gl_TexCoord[0].st).g);
gl_FragColor = vec4(g, g, g, 0.0);}
uniform sampler2DRect tex; void main(void){
vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);gl_FragColor = vec4(5.0* cc.bbb, 1.0);}
uniform sampler2DRect tex; void main(void){
vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);
if(cc.r ==0.0) discard; gl_FragColor = (cc.r==1.0? vec4(1.0, 0.0, 0,1.0):vec4(0.0,1.0,0.0,1.0));}
* min(2.0 * cc.r + 0.1, 1.0))
#define THRESHOLD2
* min(2.0 * cc.r + 0.1, 1.0))
#define THRESHOLD1 (define THRESHOLD0 (define THRESHOLD2
#define THRESHOLD1
#define THRESHOLD0
float dog = 0.0;
gl_FragData[1] = vec4(0, 0, 0, 0);
dog = cc.g > float(THRESHOLD0) && all(greaterThan(cc.gggg, max(v1, v2)))?1.0: 0.0;
dog = cc.g < float(-THRESHOLD0) && all(lessThan(cc.gggg, min(v1, v2)))?0.5: dog;
if(dog == 0.0) return;
uniform sampler2DRect tex, texU, texD; void main ()
vec4 v1, v2, gg, temp;
vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y);
vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);
temp = texture2DRect(tex, gl_TexCoord[1].xy);
v1.x = temp.g;
gg.x = temp.r;
temp = texture2DRect(tex, gl_TexCoord[2].xy) ;
v1.y = temp.g;
gg.y = temp.r;
temp = texture2DRect(tex, gl_TexCoord[3].xy) ;
v1.z = temp.g;
gg.z = temp.r;
temp = texture2DRect(tex, gl_TexCoord[4].xy) ;
v1.w = temp.g;
gg.w = temp.r;
v2.x = texture2DRect(tex, gl_TexCoord[5].xy).g;
v2.y = texture2DRect(tex, gl_TexCoord[6].xy).g;
v2.z = texture2DRect(tex, gl_TexCoord[7].xy).g;
v2.w = texture2DRect(tex, TexRU.xy).g;
vec2 dxdy = (gg.yw - gg.xz);
float grad = 0.5*length(dxdy);
float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);
gl_FragData[0] = vec4(cc.rg, grad, theta);
float fxx, fyy, fxy;
vec4 D2 = v1.xyzw - cc.gggg;
vec2 D4 = v2.xw - v2.yz;
fxx = D2.x + D2.y;
fyy = D2.z + D2.w;
fxy = 0.25*(D4.x + D4.y);
float fxx_plus_fyy = fxx + fyy;
float score_up = fxx_plus_fyy*fxx_plus_fyy;
float score_down = (fxx*fyy - fxy*fxy);
if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)return;
else{
if(cc.g > v3.x || any(greaterThan(cc.gggg, v4)) ||any(greaterThan(cc.gggg, v6)))return;
v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;
v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;
v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;
v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;
v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;
v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;
v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;
v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;
v6.w = texture2DRect(texD, TexRU.xy).g;
if(cc.g > v3.y || any(greaterThan(cc.gggg, v5)) ||any(greaterThan(cc.gggg, v6)))return;
if(dog == 1.0)
if(cc.g < v3.x || any(lessThan(cc.gggg, v4)) ||any(lessThan(cc.gggg, v6)))return;
v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;
v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;
v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;
v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;
v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;
v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;
v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;
v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;
v6.w = texture2DRect(texD, TexRU.xy).g;
if(cc.g < v3.y || any(lessThan(cc.gggg, v5)) ||any(lessThan(cc.gggg, v6)))return;
v3.x = texture2DRect(texU, gl_TexCoord[0].xy).g;
v4.x = texture2DRect(texU, gl_TexCoord[1].xy).g;
v4.y = texture2DRect(texU, gl_TexCoord[2].xy).g;
v4.z = texture2DRect(texU, gl_TexCoord[3].xy).g;
v4.w = texture2DRect(texU, gl_TexCoord[4].xy).g;
v6.x = texture2DRect(texU, gl_TexCoord[5].xy).g;
v6.y = texture2DRect(texU, gl_TexCoord[6].xy).g;
v6.z = texture2DRect(texU, gl_TexCoord[7].xy).g;
v6.w = texture2DRect(texU, TexRU.xy).g;
vec2 D5 = 0.5*(v1.yw-v1.xz);
float fx = D5.x, fy = D5.y ;
float fs, fss , fxs, fys ;
vec2 v3; vec4 v4, v5, v6;
gl_FragData[1] = vec4( dog, dxys);
bool dog_test = (abs(cc.g + 0.5*dot(vec3(fx, fy, fs), dxys ))<= float(THRESHOLD1)) ;
if(dog_test || any(greaterThan(abs(dxys), vec3(1.0)))) dog = 0.0;
dxys.z = A2.w /A2.z;
dxys.y = A1.w - dxys.z*A1.z;
dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y;
vec3 dxys = vec3(0.0);
vec4 A0, A1, A2 ;
A0 = vec4(fxx, fxy, fxs, -fx);
A1 = vec4(fxy, fyy, fys, -fy);
A2 = vec4(fxs, fys, fss, -fs);
vec3 x3 = abs(vec3(fxx, fxy, fxs));
float maxa = max(max(x3.x, x3.y), x3.z);
if(maxa >= 1e-10 ) {
if(x3.y ==maxa )
vec4 TEMP = A1; A1 = A0; A0 = TEMP;
}else if( x3.z == maxa )
vec4 TEMP = A2; A2 = A0; A0 = TEMP;
A0 /= A0.x;
A1 -= A1.x * A0;
A2 -= A2.x * A0;
vec2 x2 = abs(vec2(A1.y, A2.y));
if( x2.y > x2.x )
vec3 TEMP = A2.yzw;
A2.yzw = A1.yzw;
A1.yzw = TEMP;
x2.x = x2.y;
if(x2.x >= 1e-10) {
A1.yzw /= A1.y;
A2.yzw -= A2.y * A1.yzw;
if(abs(A2.z) >= 1e-10) {
fs = 0.5*( v3.x - v3.y );
fss = v3.x + v3.y - cc.g - cc.g;
fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);
fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);
gl_FragData[1] = vec4( dog, 0.0, 0.0, 0.0) ;
gl_FragData[1] = vec4(dog, 0.0, 0.0, 0.0) ;
Detection simplified on this hardware
texU
texD
uniform sampler2DRect tex; void main (void){
vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r, texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);
gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));
uniform sampler2DRect tex;uniform vec2 bbox;
void main (void ){
vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r, texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);
bvec4 helper2 = bvec4(
all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0.0, all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0.0, all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0.0, all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0.0);
gl_FragColor = vec4(helper2);
bbox
uniform sampler2DRect tex; void main (void){
vec4 helper; vec4 helper2;
helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw;
helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw;
gl_FragColor.rg = helper2.xz + helper2.yw;
helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw;
helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw;
gl_FragColor.ba= helper2.xz+helper2.yw;
tex0
uniform sampler2DRect tex
uniform float width;
void main(void){
float index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);
vec2 pos = vec2(0.5, 0.5);
uniform sampler2DRect tex;
vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);
vec2 pos = tc.rg; float index = tc.b;
vec2 sum;
vec4 cc;
vec2 cpos = vec2(-0.5, 0.5);
vec2 opos;
, pos);
cc = texture2DRect(tex
sum.x = cc.r + cc.g; sum.y = sum.x + cc.b;
if (index <cc.r){ opos = cpos.xx;}
else if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}
else if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}
else {opos = cpos.yy; index -= sum.y;}
pos = (pos + pos + opos);
gl_FragColor = vec4(pos, index, 1.0);
void main()
vec4 bins[10];
bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);
bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);
bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);
vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy);
vec2 pos = loc.xy;
bool orientation_mode = (size.z != 0.0);
float sigma = orientation_mode? abs(size.z) : loc.w;
uniform sampler2DRect texS;
uniform sampler2DRect tex;
uniform sampler2DRect gradTex;
uniform vec4 size;
#define ORIENTATION_THRESHOLD
#define SAMPLE_WF float(define GAUSSIAN_WF float(if(offset.x < 0.6) sigma = -sigma;
#endif
if(orientation_mode){
vec4 offset = texture2DRect(texS, pos);
pos.xy = pos.xy + offset.yz;
sigma = sigma * pow(size.w, offset.w);
#if
//bool fixed_orientation = (size.z < 0.0);
if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}
float gsigma = sigma * GAUSSIAN_WF;
vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF))) ;
vec2 dim = size.xy;
float dist_threshold = win.x*win.x+0.5;
float factor = -0.5/(gsigma*gsigma);
vec4 sz;
vec2 spos;
//if(any(pos.xy <= 1)) discard;
sz.xy = max( pos - win, vec2(1,1));
sz.zw = min( pos + win, dim-vec2(2, 2));
sz = floor(sz)+0.5;
for(spos.y = sz.y; spos.y <= sz.w;
spos.y+=1.0)
for(spos.x = sz.x; spos.x <= sz.z;
spos.x+=1.0)
vec2 offset = spos - pos;
float sq_dist = dot(offset,offset);
if( sq_dist < dist_threshold){
vec4 cc = texture2DRect(gradTex, spos);
float grad = cc.b;
float theta = cc.a;
float idx = floor(degrees(theta)*0.1);
if(idx < 0.0 ) idx += 36.0;
float weight = grad*exp(sq_dist * factor);
float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ;
vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));
int iidx = int((idx*0.25));
bins[iidx]+=inc;
if(idx < 16.0)
if(idx < 8.0)
if(idx < 4.0)
bins[0]+=inc;}
else
bins[1]+=inc;}
}else
if(idx < 12.0){
bins[2]+=inc;}
else
bins[3]+=inc;}
}else if(idx < 32.0)
if(idx < 24.0)
if(idx <20.0)
bins[4]+=inc;}
else
bins[5]+=inc;}
}else
if(idx < 28.0){
bins[6]+=inc;}
else
bins[7]+=inc;}
}else
bins[8]+=inc;
gradTex
texS
//mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;
mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0;
mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;
#define FILTER_CODE(i) {
vec4 newb
(bins[i]* m2);
newb.xyz
( prev.yzw * m1);
prev = bins[i];
newb.wzy
( bins[i+1].zyx *m1);
bins[i] = newb;}
for (int j=0; j<2; j++)
vec4 prev = bins[8];
bins[9]
= bins[0];
for (int i=0; i<9; i++)
FILTER_CODE(i);
FILTER_CODE(0);
FILTER_CODE(1);
FILTER_CODE(2);
FILTER_CODE(3);
FILTER_CODE(4);
FILTER_CODE(5);
FILTER_CODE(6);
FILTER_CODE(7);
FILTER_CODE(8);
vec4 maxh; vec2 maxh2;
vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);
maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));
vec4 Orientations = vec4(0.0, 0.0, 0.0, 0.0);
vec4 weights = vec4(0.0,0.0,0.0,0.0);
{test = greaterThan(bins[i], hh);
if(weight <=weights.g){}\
else if(weight >weights.r)\
{weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\
else {weights.g = weight; Orientations.g = th;}
if(weight <=weights.b){}\
else if(weight >weights.r)\
{weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\
else if(weight >weights.g)\
{weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\
else {weights.b = weight; Orientations.b = th;}
if(weight <=weights.a){}\
else if(weight >weights.r)\
{weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\
else if(weight >weights.g)\
{weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\
else if(weight >weights.b)\
{weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\
else {weights.a = weight; Orientations.a = th;}
float Orientation;
if(npeaks<=0.0){\
test = equal(bins[i], maxh)
npeaks++;
Orientation = th;
prevb = bins[i].w;
else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x )
float
di = -0.5 * (bins[i+1].x-bins[i].z) / (bins[i+1].x+bins[i].z-bins[i].w - bins[i].w) ; \
float
th = (k+di+3.5);
float weight = bins[i].w;
if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) )
float
di = -0.5 * (bins[i].w-bins[i].y) / (bins[i].w+bins[i].y-bins[i].z- bins[i].z) ; \
float
th = (k+di+2.5);
float weight = bins[i].z;
else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) )
float
di = -0.5 * (bins[i].z-bins[i].x) / (bins[i].z+bins[i].x-bins[i].y- bins[i].y) ; \
float
th = (k+di+1.5);
float weight = bins[i].y;
if( any ( test) )
if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y )
float
di = -0.5 * (bins[i].y-prevb) / (bins[i].y+prevb-bins[i].x - bins[i].x) ; \
float
th = (k+di+0.5);
float weight = bins[i].x;
#define FINDPEAK(i, k)
vec4 hh = maxh * ORIENTATION_THRESHOLD;
bvec4 test;
bins[9] = bins[0];
float npeaks = 0.0, k = 0.0;
float prevb
= bins[8].w;
for (int i = 0; i < 9; i++)
FINDPEAK(i, k);
k = k + 4.0;
vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;
bins[9] = bins[0];
float npeaks = 0.0;
float prevb
= bins[8].w;
FINDPEAK(0, 0.0);
FINDPEAK(1, 4.0);
FINDPEAK(2, 8.0);
FINDPEAK(3, 12.0);
FINDPEAK(4, 16.0);
FINDPEAK(5, 20.0);
FINDPEAK(6, 24.0);
FINDPEAK(7, 28.0);
FINDPEAK(8, 32.0);
), vec4(greaterThan(weights, hh)));
gl_FragData[0] = vec4(pos, npeaks, sigma);
gl_FragData[1] = radians((Orientations )*10.0);
}else{
gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);
if(orientation_mode){
npeaks = dot(vec4(1,1, gl_FragData[0] = vec4(pos, radians((Orientation)*10.0), sigma);
#define M_PI 3.14159265358979323846
#define TWO_PI (2.0*M_PI)
#define RPI 1.2732395447351626861510701069801
#define WF size.z
uniform sampler2DRect tex;
uniform sampler2DRect gradTex;
uniform vec4 dsize;
uniform vec3 size;
void main()
vec2 dim
= size.xy;
//image size
float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;
float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5));
index = floor(index*0.125) + 0.49;
vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;
vec2 pos = texture2DRect(tex, coord).xy;
if(any(lessThanEqual(pos.xy, vec2(1.0))) || any(greaterThanEqual(pos.xy, dim-1.0)))// discard;
{ gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }
float anglef = texture2DRect(tex, coord).z;
if(anglef > M_PI) anglef -= TWO_PI;
float sigma = texture2DRect(tex, coord).w;
float spt = abs(sigma * WF);
//default to be 3*sigma
vec4 cscs, rots;
cscs.y = sin(anglef);
cscs.x = cos(anglef);
cscs.zw = - cscs.xy;
rots = cscs /spt;
cscs *= spt;
vec4 temp; vec2 pt, offsetpt;
/*the fraction part of idx is .5*/
offsetpt.x = 4.0* fract(idx*0.25) - 2.0;
offsetpt.y = floor(idx*0.25) - 1.5;
temp = cscs.xwyx*offsetpt.xyxy;
pt = pos + temp.xz + temp.yw;
vec2 bwin = abs(cscs.xy);
float bsz = bwin.x + bwin.y;
vec4 sz;
sz.xy = max(pt - vec2(bsz), vec2(1,1));
sz.zw = min(pt + vec2(bsz), dim - vec2(2, 2));
sz = floor(sz)+0.5;
vec4 DA, DB; vec2 spos;
DA = DB = vec4(0.0, 0.0, 0.0, 0.0);
for(spos.y = sz.y; spos.y <= sz.w;
spos.y+=1.0)
for(spos.x = sz.x; spos.x <= sz.z;
spos.x+=1.0)
vec2 diff = spos - pt;
temp = rots.xywx * diff.xyxy;
vec2 nxy = (temp.xz + temp.yw);
vec2 nxyn = abs(nxy);
if(all( lessThan(nxyn, vec2(1.0)) ))
vec4 cc = texture2DRect(gradTex, spos);
float mod = cc.b;
float angle = cc.a;
float theta0 = RPI * (anglef - angle);
float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;
diff = nxy + offsetpt.xy;
float ww = exp(-0.125*dot(diff, diff));
vec2 weights = vec2(1) - nxyn;
float weight = weights.x * weights.y *mod*ww;
float theta1 = floor(theta);
float weight2 = (theta - theta1) * weight;
float weight1 = weight - weight2;
DA += vec4(equal(vec4(theta1), vec4(0, 1, 2, 3)))*weight1;
DA += vec4(equal(vec4(theta1), vec4(7, 0, 1, 2)))*weight2;
DB += vec4(equal(vec4(theta1), vec4(4, 5, 6, 7)))*weight1;
DB += vec4(equal(vec4(theta1), vec4(3, 4, 5, 6)))*weight2;
gl_FragData[0] = DA; gl_FragData[1] = DB;
dsize
uniform sampler2DRect tex; void main(){
float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);
gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}
uniform sampler2DRect tex; void main(){
gl_FragColor= vec4(texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r, texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}
uniform sampler2DRect tex; uniform vec4 truncate; void main(){
vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy));
bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);
if(ob.y) { gl_FragColor = (truncate.z ==0.0 ? cc.rrbb : cc.ggaa); }
else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);}
else {
vec4 weights = vec4(vec4(0.0, 1.0, 2.0, 3.0) == truncate.wwww);
float v = dot(weights, cc); gl_FragColor = vec4(v);}}
uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()
vec4 v1, v2, gg;
vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);
vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);
gl_FragData[0] = cc - cp;
vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);
vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);
vec4 dx = (vec4(cr.rb, cc.ga) - vec4(cc.rb, cl.ga)).zxwy;
vec4 dy = (vec4(cu.rg, cc.ba) - vec4(cc.rg, cd.ba)).zwxy;
vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);
gl_FragData[1] = grad;
vec4 invalid = vec4(equal(grad, vec4(0.0)));
vec4 ov = atan(dy, dx + invalid);
gl_FragData[2] = ov;
texp
uniform sampler2DRect tex; uniform sampler2DRect oTex; uniform vec2 size; void main(){
vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);
vec2 co = cc.xy * 0.5;
vec4 oo = texture2DRect(oTex, co);
bvec2 bo = lessThan(fract(co), vec2(0.5));
float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a);
gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}
uniform sampler2DRect tex;void main(){
gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}
uniform sampler2DRect tex; uniform vec4 sizes; void main(){
float fwidth = sizes.y;
float twidth = sizes.z;
float rwidth = sizes.w;
float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);
float px = mod(index, twidth);
vec2 tpos= floor(vec2(px, index*rwidth))+0.5;
vec4 cc = texture2DRect(tex, tpos );
float size = 3.0 * cc.a;
gl_FragColor.zw = vec2(0.0, 1.0);
if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else
float type = fract(px);
vec2 dxy; float s, c;
dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);
dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size);
s = sin(cc.b); c = cos(cc.b);
gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;
gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}
uniform sampler2DRect tex; void main(){
vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy);
bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));
float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}
uniform sampler2DRect tex; void main(){
vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));
float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);
gl_FragColor = vec4(g, g, g, 1.0);}
uniform sampler2DRect tex; void main(){
vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));
float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); gl_FragColor = vec4(5.0 *vec3(v), 1.0); }
uniform sampler2DRect tex; void main(){
vec4 oc = texture2DRect(tex, gl_TexCoord[0].xy);
vec4 cc = vec4(equal(abs(oc.rrrr), vec4(1.0, 2.0, 3.0, 4.0)));
bvec2 ff = lessThan(fract(gl_TexCoord[0].xy) , vec2(0.5));
float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);
if(v == 0.0) discard;
else if(oc.r > 0.0) gl_FragColor = vec4(1.0, 0.0, 0,1.0);
else gl_FragColor = vec4(0.0,1.0,0.0,1.0);
uniform sampler2DRect tex;
uniform sampler2DRect gtex;
uniform sampler2DRect otex; uniform vec4 size;
void main()
vec4 bins[10];
bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);
bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);
bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);
vec4 sift = texture2DRect(tex, gl_TexCoord[0].xy);
vec2 pos = sift.xy;
bool orientation_mode = (size.z != 0.0);
float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w);
//bool fixed_orientation = (size.z < 0.0);
if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}
float gsigma = sigma * GAUSSIAN_WF;
vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF)));
vec2 dim = size.xy;
vec4 dist_threshold = vec4(win.x*win.x+0.5);
float factor = -0.5/(gsigma*gsigma);
vec4 sz;
vec2 spos;
//if(any(pos.xy <= float(1))) discard;
sz.xy = max( pos - win, vec2(2.0,2.0));
sz.zw = min( pos + win, dim-vec2(3.0));
sz = floor(sz*0.5) + 0.5;
for(spos.y = sz.y; spos.y <= sz.w;
spos.y+=1.0)
for(spos.x = sz.x; spos.x <= sz.z;
spos.x+=1.0)
vec2 offset = 2.0 * spos - pos - vec2(0.5);
vec4 off = vec4(offset, offset + vec2(1));
vec4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww;
bvec4 inside = lessThan(distsq, dist_threshold);
if(any(inside))
vec4 gg = texture2DRect(gtex, spos);
vec4 oo = texture2DRect(otex, spos);
vec4 weight = gg * exp(distsq * factor);
vec4 idxv = floor(degrees(oo)*0.1);
idxv+= (vec4(lessThan(idxv, vec4(0.0)))*36.0);
vec4 vidx = fract(idxv * 0.25) * 4.0;//mod(idxv, 4.0);
for(int i = 0 ; i < 4; i++)
if(inside[i])
float idx = idxv[i];
vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0.0,1.0,2.0,3.0)));
int iidx = int(floor(idx*0.25));
bins[iidx]+=inc;
for(int i = 0 ; i < 4; i++)
if(inside[i])
float idx = idxv[i];
vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3)));
if(idx < 16.0)
if(idx < 8.0)
if(idx < 4.0)
bins[0]+=inc;}
else
bins[1]+=inc;}
}else
if(idx < 12.0){
bins[2]+=inc;}
else
bins[3]+=inc;}
}else if(idx < 32.0)
if(idx < 24.0)
if(idx <20.0)
bins[4]+=inc;}
else
bins[5]+=inc;}
}else
if(idx < 28.0){
bins[6]+=inc;}
else
bins[7]+=inc;}
}else
bins[8]+=inc;
gtex
otex
uniform sampler2DRect tex; void main ()
vec4 key = vec4(texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r, texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);
gl_FragColor = vec4(notEqual(key, vec4(0.0)));
uniform sampler2DRect tex; uniform vec4 bbox; void main ()
vec4 helper1 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[0].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));
vec4 helper2 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[1].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));
vec4 helper3 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[2].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));
vec4 helper4 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[3].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));
vec4 bx1 = vec4(lessThan(gl_TexCoord[0].xxyy, bbox));
vec4 bx4 = vec4(lessThan(gl_TexCoord[3].xxyy, bbox));
vec4 bx2 = vec4(bx4.xy, bx1.zw);
vec4 bx3 = vec4(bx1.xy, bx4.zw);
helper1 = min(min(bx1.xyxy, bx1.zzww), helper1);
helper2 = min(min(bx2.xyxy, bx2.zzww), helper2);
helper3 = min(min(bx3.xyxy, bx3.zzww), helper3);
helper4 = min(min(bx4.xyxy, bx4.zzww), helper4);
gl_FragColor.r = float(any(greaterThan(max(helper1.xy, helper1.zw), vec2(0.0))));
gl_FragColor.g = float(any(greaterThan(max(helper2.xy, helper2.zw), vec2(0.0))));
gl_FragColor.b = float(any(greaterThan(max(helper3.xy, helper3.zw), vec2(0.0))));
gl_FragColor.a = float(any(greaterThan(max(helper4.xy, helper4.zw), vec2(0.0))));
uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()
vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);
vec2 pos = tc.rg; float index = tc.b;
vec4 tk = texture2DRect( ktex, pos);
vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0)));
vec2 opos;
opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));
opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));
gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, 1.0, tk.w);
uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()
vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);
vec2 pos = tc.rg; float index = tc.b;
vec4 tk = texture2DRect( ktex, pos);
vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0)))
vec2 opos;
opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));
opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));
gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, sign(tk.r), tk.w);
ktex
uniform sampler2DRect tex; void main ()
vec4 helper; vec4 helper2;
helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw;
helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw;
gl_FragColor.rg = helper2.xz + helper2.yw;
helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw;
helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw;
gl_FragColor.ba= helper2.xz+helper2.yw;
#define REPEAT4(FUNCTION)\
for(int i = 0; i < 4; ++i)\
FUNCTION(i);\
#define REPEAT4(FUNCTION)\
FUNCTION(0);\
FUNCTION(1);\
FUNCTION(2);\
FUNCTION(3);
#define DEFINE_EXTRA() vec4 ii = texture2DRect(texI, gl_TexCoord[0].xy); ii = min(2.0 * ii + 0.1, 1.0)
#define MOVE_EXTRA(idx)
ii[0] = ii[idx]
* ii[0])
#define THRESHOLD2
* ii[i])
#define THRESHOLD1 (define THRESHOLD0(i) (uniform sampler2DRect texI;
#define DEFINE_EXTRA()
#define MOVE_EXTRA(idx)
#define THRESHOLD0(i)
uniform sampler2DRect tex; uniform sampler2DRect texU;
uniform sampler2DRect texD; void main ()
vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y);
vec4 ccc = texture2DRect(tex, gl_TexCoord[0].xy);
vec4 clc = texture2DRect(tex, gl_TexCoord[1].xy);
vec4 crc = texture2DRect(tex, gl_TexCoord[2].xy);
vec4 ccd = texture2DRect(tex, gl_TexCoord[3].xy);
vec4 ccu = texture2DRect(tex, gl_TexCoord[4].xy);
vec4 cld = texture2DRect(tex, gl_TexCoord[5].xy);
vec4 clu = texture2DRect(tex, gl_TexCoord[6].xy);
vec4 crd = texture2DRect(tex, gl_TexCoord[7].xy);
vec4 cru = texture2DRect(tex, TexRU.xy);
vec4 cc = ccc;
vec4 v1[4], v2[4];
v1[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);
v1[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);
v1[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);
v1[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);
v2[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);
v2[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);
v2[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);
v2[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);
DEFINE_EXTRA();
vec4 key = vec4(0.0);
#define KEYTEST_STEP0(i) \
bvec4 test1 = greaterThan(vec4(cc[i]), max(v1[i], v2[i])), test2 = lessThan(vec4(cc[i]), min(v1[i], v2[i]));\
key[i] = cc[i] > float(THRESHOLD0(i)) && all(test1)?1.0: 0.0;\
key[i] = cc[i] < float(-THRESHOLD0(i)) && all(test2)? -1.0: key[i];\
REPEAT4(KEYTEST_STEP0);
if(gl_TexCoord[0].x < 1.0) {key.rb = vec2(0.0);}
if(gl_TexCoord[0].y < 1.0) {key.rg = vec2(0.0);}
gl_FragColor = vec4(0.0);
if(any(notEqual(key, vec4(0.0)))) {
float fxx[4], fyy[4], fxy[4], fx[4], fy[4];
#define EDGE_SUPPRESION(i) \
if(key[i] != 0.0)\
vec4 D2 = v1[i].xyzw - cc[i];\
vec2 D4 = v2[i].xw - v2[i].yz;\
vec2 D5 = 0.5*(v1[i].yw-v1[i].xz); \
fx[i] = D5.x;
fy[i] = D5.y ;\
fxx[i] = D2.x + D2.y;\
fyy[i] = D2.z + D2.w;\
fxy[i] = 0.25*(D4.x + D4.y);\
float fxx_plus_fyy = fxx[i] + fyy[i];\
float score_up = fxx_plus_fyy*fxx_plus_fyy; \
float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\
if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)key[i] = 0.0;\
REPEAT4(EDGE_SUPPRESION);
if(any(notEqual(key, vec4(0.0)))) {
#define KEYTEST_STEP2(i)\
if(key[i] == 1.0)\
bvec4 test = lessThan(vec4(cc[i]), max(v5[i], v6[i]));\
if(cc[i] < cd[i] || any(test))key[i] = 0.0; \
}else if(key[i] == -1.0)\
bvec4 test = greaterThan(vec4(cc[i]), min(v5[i], v6[i]));\
if(cc[i] > cd[i] || any(test))key[i] = 0.0; \
REPEAT4(KEYTEST_STEP2);
float keysum = dot(abs(key), vec4(1, 1, 1, 1)) ;
//assume there is only one keypoint in the four.
if(keysum==1.0) {
ccc = texture2DRect(texD, gl_TexCoord[0].xy);
clc = texture2DRect(texD, gl_TexCoord[1].xy);
crc = texture2DRect(texD, gl_TexCoord[2].xy);
ccd = texture2DRect(texD, gl_TexCoord[3].xy);
ccu = texture2DRect(texD, gl_TexCoord[4].xy);
cld = texture2DRect(texD, gl_TexCoord[5].xy);
clu = texture2DRect(texD, gl_TexCoord[6].xy);
crd = texture2DRect(texD, gl_TexCoord[7].xy);
cru = texture2DRect(texD, TexRU.xy);
vec4 cd = ccc;
v5[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);
v5[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);
v5[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);
v5[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);
v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);
v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);
v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);
v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);
#define KEYTEST_STEP1(i)\
if(key[i] == 1.0)\
bvec4 test = lessThan(vec4(cc[i]), max(v4[i], v6[i])); \
if(cc[i] < cu[i] || any(test))key[i] = 0.0; \
}else if(key[i] == -1.0)\
bvec4 test = greaterThan(vec4(cc[i]), min(v4[i], v6[i])); \
if(cc[i] > cu[i] || any(test) )key[i] = 0.0; \
REPEAT4(KEYTEST_STEP1);
if(any(notEqual(key, vec4(0.0)))) {
vec4 v4[4], v5[4], v6[4];
ccc = texture2DRect(texU, gl_TexCoord[0].xy);
clc = texture2DRect(texU, gl_TexCoord[1].xy);
crc = texture2DRect(texU, gl_TexCoord[2].xy);
ccd = texture2DRect(texU, gl_TexCoord[3].xy);
ccu = texture2DRect(texU, gl_TexCoord[4].xy);
cld = texture2DRect(texU, gl_TexCoord[5].xy);
clu = texture2DRect(texU, gl_TexCoord[6].xy);
crd = texture2DRect(texU, gl_TexCoord[7].xy);
cru = texture2DRect(texU, TexRU.xy);
vec4 cu = ccc;
v4[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);
v4[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);
v4[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);
v4[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);
v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);
v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);
v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);
v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);
float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));
gl_FragColor = vec4(keyv, offset);
}}}}
float fs = 0.5*( cu[0] - cd[0] );
float fss = cu[0] + cd[0] - cc[0] - cc[0];
float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);
float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);
vec4 A0, A1, A2 ;
A0 = vec4(fxx[0], fxy[0], fxs, -fx[0]);
A1 = vec4(fxy[0], fyy[0], fys, -fy[0]);
A2 = vec4(fxs, fys, fss, -fs);
vec3 x3 = abs(vec3(fxx[0], fxy[0], fxs));
float maxa = max(max(x3.x, x3.y), x3.z);
if(maxa >= 1e-10 )
if(x3.y ==maxa )
vec4 TEMP = A1; A1 = A0; A0 = TEMP;
}else if( x3.z == maxa )
vec4 TEMP = A2; A2 = A0; A0 = TEMP;
A0 /= A0.x;
A1 -= A1.x * A0;
A2 -= A2.x * A0;
vec2 x2 = abs(vec2(A1.y, A2.y));
if( x2.y > x2.x )
vec3 TEMP = A2.yzw;
A2.yzw = A1.yzw;
A1.yzw = TEMP;
x2.x = x2.y;
if(x2.x >= 1e-10) {
A1.yzw /= A1.y;
A2.yzw -= A2.y * A1.yzw;
if(abs(A2.z) >= 1e-10) {
offset.z = A2.w /A2.z;
offset.y = A1.w - offset.z*A1.z;
offset.x = A0.w - offset.z*A0.z - offset.y*A0.y;
bool test = (abs(cc[0] + 0.5*dot(vec3(fx[0], fy[0], fs), offset ))>float(THRESHOLD1)) ;
if(!test || any( greaterThan(abs(offset), vec3(1.0)))) key = vec4(0.0);
vec3 offset = vec3(0.0, 0.0, 0.0);
#define TESTMOVE_KEYPOINT(idx) \
if(key[idx] != 0.0) \
cu[0] = cu[idx];
cd[0] = cd[idx];
cc[0] = cc[idx];
v4[0] = v4[idx];
v5[0] = v5[idx];
fxy[0] = fxy[idx];
fxx[0] = fxx[idx];
fyy[0] = fyy[idx];
fx[0] = fx[idx];
fy[0] = fy[idx];
MOVE_EXTRA(idx); \
TESTMOVE_KEYPOINT(1);
TESTMOVE_KEYPOINT(2);
TESTMOVE_KEYPOINT(3);
float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));
gl_FragColor = vec4(keyv, 0.0, 0.0, 0.0);
}}}}
texI
#define M_PI 3.14159265358979323846
#define TWO_PI (2.0*M_PI)
#define RPI 1.2732395447351626861510701069801
#define WF size.z
uniform sampler2DRect tex;
uniform sampler2DRect gtex;
uniform sampler2DRect otex;
uniform vec4
dsize;
uniform vec3
size;
void main()
vec2 dim
= size.xy;
//image size
float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;
float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5));
index = floor(index*0.125)+ 0.49;
vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;
vec2 pos = texture2DRect(tex, coord).xy;
vec2 wsz = texture2DRect(tex, coord).zw;
float aspect_ratio = wsz.y / wsz.x;
float aspect_sq = aspect_ratio * aspect_ratio;
vec2 spt = wsz * 0.25; vec2 ispt = 1.0 / spt;
vec4 temp; vec2 pt;
pt.x = pos.x + fract(idx*0.25) * wsz.x;
pt.y = pos.y + (floor(idx*0.25) + 0.5) * spt.y;
vec4 sz;
sz.xy = max(pt - spt, vec2(2,2));
sz.zw = min(pt + spt, dim - vec2(3));
sz = floor(sz * 0.5)+0.5;
vec4 DA, DB; vec2 spos;
DA = DB = vec4(0.0, 0.0, 0.0, 0.0);
vec4 nox = vec4(0.0, 1.0, 0.0, 1.0);
vec4 noy = vec4(0.0, 0.0, 1.0, 1.0);
for(spos.y = sz.y; spos.y <= sz.w;
spos.y+=1.0)
for(spos.x = sz.x; spos.x <= sz.z;
spos.x+=1.0)
vec2 tpt = spos * 2.0 - pt - 0.5;
vec4 nx = (tpt.x + nox) * ispt.x;
vec4 ny = (tpt.y + noy) * ispt.y;
vec4 nxn = abs(nx), nyn = abs(ny);
bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0));
if(any(inside))
vec4 gg = texture2DRect(gtex, spos);
vec4 oo = texture2DRect(otex, spos);
vec4 theta0 = (- oo)*RPI;
vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0);
vec4 theta1 = floor(theta);
vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg;
vec4 weight2 = (theta - theta1) * weight;
vec4 weight1 = weight - weight2;
#define ADD_DESCRIPTOR(i) \
if(inside[i])\
DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \
DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \
DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \
DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \
REPEAT4(ADD_DESCRIPTOR);
#define M_PI 3.14159265358979323846
#define TWO_PI (2.0*M_PI)
#define RPI 1.2732395447351626861510701069801
#define WF size.z
uniform sampler2DRect tex;
uniform sampler2DRect gtex;
uniform sampler2DRect otex;
uniform vec4
dsize;
uniform vec3
size;
void main()
vec2 dim
= size.xy;
//image size
float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;
float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5));
index = floor(index*0.125)+ 0.49;
vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;
vec2 pos = texture2DRect(tex, coord).xy;
if(any(lessThan(pos.xy, vec2(1.0))) || any(greaterThan(pos.xy, dim-1.0)))
//discard;
{ gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }
float anglef = texture2DRect(tex, coord).z;
if(anglef > M_PI) anglef -= TWO_PI;
float sigma = texture2DRect(tex, coord).w;
float spt = abs(sigma * WF);
//default to be 3*sigma
vec4 cscs, rots;
cscs.x = cos(anglef); cscs.y = sin(anglef);
cscs.zw = - cscs.xy;
rots = cscs /spt;
cscs *= spt;
vec4 temp; vec2 pt, offsetpt;
/*the fraction part of idx is .5*/
offsetpt.x = 4.0* fract(idx*0.25) - 2.0;
offsetpt.y = floor(idx*0.25) - 1.5;
temp = cscs.xwyx*offsetpt.xyxy;
pt = pos + temp.xz + temp.yw;
vec2 bwin = abs(cscs.xy);
float bsz = bwin.x + bwin.y;
vec4 sz;
sz.xy = max(pt - vec2(bsz), vec2(2,2));
sz.zw = min(pt + vec2(bsz), dim - vec2(3));
sz = floor(sz * 0.5)+0.5;
vec4 DA, DB; vec2 spos;
DA = DB = vec4(0.0, 0.0, 0.0, 0.0);
vec4 nox = vec4(0.0, rots.xy, rots.x + rots.y);
vec4 noy = vec4(0.0, rots.wx, rots.w + rots.x);
for(spos.y = sz.y; spos.y <= sz.w;
spos.y+=1.0)
for(spos.x = sz.x; spos.x <= sz.z;
spos.x+=1.0)
vec2 tpt = spos * 2.0 - pt - 0.5;
vec4 temp = rots.xywx * tpt.xyxy;
vec2 temp2 = temp.xz + temp.yw;
vec4 nx = temp2.x + nox;
vec4 ny = temp2.y + noy;
vec4 nxn = abs(nx), nyn = abs(ny);
bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0));
if(any(inside))
vec4 gg = texture2DRect(gtex, spos);
vec4 oo = texture2DRect(otex, spos);
vec4 theta0 = (anglef - oo)*RPI;
vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0);
vec4 theta1 = floor(theta);
vec4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y;
vec4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy ));
vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg * ww;
vec4 weight2 = (theta - theta1) * weight;
vec4 weight1 = weight - weight2;
#define ADD_DESCRIPTOR(i) \
if(inside[i])\
DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \
DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \
DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \
DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \
REPEAT4(ADD_DESCRIPTOR);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment